diff --git a/NOTICES b/NOTICES index 0c5a91ce98..345174731e 100644 --- a/NOTICES +++ b/NOTICES @@ -616,7 +616,9 @@ test-log (https://github.com/d-e-s-o/test-log) - Apache 2 License +tracing-subscriber (https://github.com/tokio-rs/tracing) +- MIT License The build pipeline uses the following GitHub Actions from the marketplace. diff --git a/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java b/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java index 7319657880..343105e904 100644 --- a/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java +++ b/java/system-test/system-test-suite/src/main/java/sleeper/systemtest/suite/fixtures/SystemTestInstance.java @@ -60,6 +60,7 @@ import static sleeper.core.properties.instance.IngestProperty.INGEST_TRACKER_ENABLED; import static sleeper.core.properties.instance.IngestProperty.MAXIMUM_CONCURRENT_INGEST_TASKS; import static sleeper.core.properties.instance.LoggingLevelsProperty.LOGGING_LEVEL; +import static sleeper.core.properties.instance.LoggingLevelsProperty.RUST_BACKTRACE; import static sleeper.core.properties.instance.MetricsProperty.METRICS_TABLE_BATCH_SIZE; import static sleeper.core.properties.instance.NonPersistentEMRProperty.DEFAULT_BULK_IMPORT_EMR_EXECUTOR_X86_INSTANCE_TYPES; import static sleeper.core.properties.instance.NonPersistentEMRProperty.DEFAULT_BULK_IMPORT_EMR_INSTANCE_ARCHITECTURE; @@ -205,6 +206,7 @@ private static DeployInstanceConfiguration createCompactionPerformanceOnDataFusi properties.set(COMPACTION_TASK_ARM_MEMORY, "8192"); properties.set(MAXIMUM_CONCURRENT_COMPACTION_TASKS, "10"); properties.set(DEFAULT_COMPACTION_FILES_BATCH_SIZE, "11"); + properties.set(RUST_BACKTRACE, "1"); setSystemTestTags(properties, "compactionOnDataFusion", "Sleeper Maven system test compaction performance on DataFusion"); return createInstanceConfiguration(properties); } diff --git a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionVeryLargeST.java b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionVeryLargeST.java index 0ff9dbd08a..e1ba767a11 100644 --- a/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionVeryLargeST.java +++ b/java/system-test/system-test-suite/src/test/java/sleeper/systemtest/suite/CompactionVeryLargeST.java @@ -71,9 +71,7 @@ void shouldRunVeryLargeCompaction(SleeperSystemTest sleeper) { TABLE_ONLINE, "false", DATA_ENGINE, DataEngine.DATAFUSION.toString(), COMPACTION_FILES_BATCH_SIZE, "40", - // We've disabled readahead temporarily until the following bug is resolved: - // https://github.com/gchq/sleeper/issues/5777 - DATAFUSION_S3_READAHEAD_ENABLED, "false")); + DATAFUSION_S3_READAHEAD_ENABLED, "true")); // And 40 input files sleeper.systemTestCluster().runDataGenerationJobs(40, builder -> builder.ingestMode(DIRECT).rowsPerIngest(50_000_000), diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 76faff2111..ddb99956c4 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -4,9 +4,9 @@ version = 4 [[package]] name = "addr2line" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ "gimli", ] @@ -36,7 +36,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -44,9 +44,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -98,9 +98,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -148,9 +148,19 @@ dependencies = [ "predicates", "sleeper_core", "tokio", + "tracing-subscriber", "url", ] +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object 0.32.2", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -510,9 +520,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.13.3" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" +checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" dependencies = [ "aws-lc-sys", "zeroize", @@ -520,9 +530,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.30.0" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" +checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" dependencies = [ "bindgen", "cc", @@ -817,17 +827,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.75" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", "cfg-if", "libc", "miniz_oxide", - "object", + "object 0.37.3", "rustc-demangle", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -848,9 +858,9 @@ dependencies = [ [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -861,32 +871,29 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.5" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", "syn", - "which", ] [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "blake2" @@ -942,9 +949,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", "regex-automata", @@ -990,9 +997,9 @@ dependencies = [ [[package]] name = "bzip2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ "libbz2-rs-sys", ] @@ -1009,18 +1016,18 @@ dependencies = [ [[package]] name = "camino" -version = "1.1.12" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0b03af37dad7a14518b7691d81acb0f8222604ad3d1b02f6b4bed5188c0cd5" +checksum = "276a59bf2b2c967788139340c9f0c5b12d7fd6630315c15c217e559de85d2609" dependencies = [ - "serde", + "serde_core", ] [[package]] name = "cargo-platform" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8abf5d501fd757c2d2ee78d0cc40f606e92e3a63544420316565556ed28485e2" +checksum = "122ec45a44b270afd1402f351b782c676b173e3c3fb28d86ff7ebfb4d86a4ee4" dependencies = [ "serde", ] @@ -1041,9 +1048,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.35" +version = "1.2.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "590f9024a68a8c40351881787f1934dc11afd69090f5edb6831464694d836ea3" +checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2" dependencies = [ "find-msvc-tools", "jobserver", @@ -1062,9 +1069,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -1083,7 +1090,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.0", + "windows-link", ] [[package]] @@ -1143,9 +1150,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cmake" @@ -1158,9 +1165,9 @@ dependencies = [ [[package]] name = "codespan-reporting" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba7a06c0b31fff5ff2e1e7d37dbf940864e2a974b336e1a2938d10af6e8fb283" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", @@ -1295,21 +1302,21 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -1392,16 +1399,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6759cf9ef57c5c469e4027ac4b4cfa746e06a0f5472c2b922b6a403c2a64c4" +checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1447,9 +1454,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a1c48fc7e6d62590d45f7be7c531980b8ff091d1ab113a9ddf465bef41e4093" +checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" dependencies = [ "arrow", "async-trait", @@ -1473,9 +1480,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db1266da115de3ab0b2669fc027d96cf0ff777deb3216d52c74b528446ccdd6" +checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" dependencies = [ "arrow", "async-trait", @@ -1496,9 +1503,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad4eb2a48ca10fa1e1a487a28a5bf080e31efac2d4bf12bb7e92c2d9ea4f35e5" +checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" dependencies = [ "ahash", "arrow", @@ -1521,9 +1528,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0422ee64d5791599c46b786063e695f7699fadd3a12ad25038cb3094d05886a" +checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" dependencies = [ "futures", "log", @@ -1532,15 +1539,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904c2e1089b3ccf10786f2dae12bc560fda278e4194a8917c5844d2e8c212818" +checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1569,9 +1576,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8336a805c42ef4e359daaad142ddc53649f23c7e934c117d8516816afe6b7a3d" +checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" dependencies = [ "arrow", "async-trait", @@ -1594,9 +1601,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c691b1565e245ea369bc8418b472a75ea84c2ad2deb61b1521cfa38319a9cd47" +checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" dependencies = [ "arrow", "async-trait", @@ -1619,9 +1626,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f7576ceb5974c5f6874d7f2a5ebfeb58960a920da64017def849e0352fe2d8" +checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" dependencies = [ "arrow", "async-trait", @@ -1652,15 +1659,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dde7c10244f3657fc01eef8247c0b2b20eae4cf6439a0ebb27322f32026d6b8" +checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" [[package]] name = "datafusion-execution" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5143fc795cef959b6d5271b2e8f1120382fe929fc4bd027c7d7b993f5352ef7e" +checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" dependencies = [ "arrow", "async-trait", @@ -1678,9 +1685,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e826296bc5f5d0af3e39c1af473d4091ac6a152a5be2f80c256f0182938428" +checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" dependencies = [ "arrow", "async-trait", @@ -1700,9 +1707,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9096732d0d8862d1950ca70324fe91f9dee3799eeb0db53ef452bdb573484db6" +checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" dependencies = [ "arrow", "datafusion-common", @@ -1713,9 +1720,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f362c78ac283e64fd3976e060c1a8a57d5f4dcf844a6b6bd2eb320640a1572e" +checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", @@ -1742,9 +1749,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e2a80a80145a796ae3f02eb724ac516178556aec864fe89f6ab3741a4cd249" +checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" dependencies = [ "ahash", "arrow", @@ -1763,9 +1770,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7dcca2fe7c33409e9ab3f950366aa4cba5db6175a09599fdb658ad9f2cc4296" +checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" dependencies = [ "ahash", "arrow", @@ -1776,9 +1783,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1b298733377f3ec8c2868c75b5555b15396d9c13e36c5fda28e80feee34e3ed" +checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" dependencies = [ "arrow", "arrow-ord", @@ -1798,9 +1805,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fa4a380ca362eb0fbd33093e8ca6b7a31057616c7e6ee999b87a4ad3c7c0b3f" +checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" dependencies = [ "arrow", "async-trait", @@ -1814,9 +1821,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9068fc85b8e187c706427794d79bb7ee91132b6b192cb7b18e650a5f7c5c1340" +checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" dependencies = [ "arrow", "datafusion-common", @@ -1832,9 +1839,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2f80ec56e177d166269556649be817a382a374642872df4ca48cf9be3d09b3a" +checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1842,9 +1849,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4868fe261ba01e462033eff141e90453b7630722cad6420fddd81ebb786f6e2" +checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" dependencies = [ "datafusion-expr", "quote", @@ -1853,9 +1860,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ed8c51b5c37c057e5c7d5945ed807f1cecfba003bdb1a4c3036595dda287c7" +checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" dependencies = [ "arrow", "chrono", @@ -1873,9 +1880,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f678f5734147446e1adbee63be4b244c8f0e9cbd5c41525004ace3730190d03e" +checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" dependencies = [ "ahash", "arrow", @@ -1896,9 +1903,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "086877d4eca538e9cd1f28b917db0036efe0ad8b4fb7c702f520510672032c8d" +checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" dependencies = [ "arrow", "datafusion-common", @@ -1911,9 +1918,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5c5d17f6a4f28f9849ee3449bb9b83406a718e4275c218bf37ca247ee123779" +checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" dependencies = [ "ahash", "arrow", @@ -1925,9 +1932,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab9fb8b3fba2634d444e0177862797dc1231e0e20bc4db291a15d39c0d4136c3" +checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" dependencies = [ "arrow", "datafusion-common", @@ -1945,9 +1952,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5086cb2e579270173ff0eb38d60ba2a081f1d422a743fa673f6096920950eb5" +checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" dependencies = [ "ahash", "arrow", @@ -1976,9 +1983,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f84b866d906118c320459f30385048aeedbe36ac06973d3e4fa0cc5d60d722c" +checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" dependencies = [ "arrow", "arrow-schema", @@ -1994,9 +2001,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3820062b9dd2846954eeb844ff9fe3662977b7d2d74947647c779fabfa502508" +checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" dependencies = [ "arrow", "async-trait", @@ -2018,9 +2025,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.2.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375232baa851b2e9d09fcbe8906141a0ec6e0e058addc5565e0d3d790bb9d51d" +checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" dependencies = [ "arrow", "bigdecimal", @@ -2035,9 +2042,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.3" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", ] @@ -2090,9 +2097,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "env_filter" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ "log", "regex", @@ -2119,12 +2126,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2153,9 +2160,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.0" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e178e4fba8a2726903f6ba98a6d221e76f9c12c650d5dc0e6afdc50677b49650" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" [[package]] name = "fixedbitset" @@ -2165,9 +2172,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ "bitflags", "rustc_version", @@ -2175,9 +2182,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -2323,9 +2330,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "typenum", "version_check", @@ -2340,29 +2347,29 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi", - "wasi 0.14.3+wasi-0.2.4", + "wasip2", "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.31.1" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "git2" @@ -2406,13 +2413,14 @@ dependencies = [ [[package]] name = "half" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "zerocopy", ] [[package]] @@ -2431,8 +2439,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash 0.1.5", ] @@ -2463,15 +2469,6 @@ dependencies = [ "digest", ] -[[package]] -name = "home" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "http" version = "0.2.12" @@ -2552,9 +2549,9 @@ dependencies = [ [[package]] name = "humantime" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" @@ -2597,9 +2594,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ "base64", "bytes", @@ -2621,9 +2618,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2645,9 +2642,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -2658,9 +2655,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -2671,11 +2668,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -2686,42 +2682,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -2758,12 +2750,12 @@ checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5" [[package]] name = "indexmap" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", - "hashbrown 0.15.5", + "hashbrown 0.16.0", ] [[package]] @@ -2790,15 +2782,15 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -2848,15 +2840,15 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", @@ -2868,17 +2860,11 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lexical-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -2889,53 +2875,46 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ "lexical-parse-integer", "lexical-util", - "static_assertions", ] [[package]] name = "lexical-parse-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] name = "lexical-util" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] name = "lexical-write-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" dependencies = [ "lexical-util", "lexical-write-integer", - "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] @@ -2966,12 +2945,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-targets 0.53.3", + "windows-link", ] [[package]] @@ -3017,38 +2996,31 @@ dependencies = [ [[package]] name = "link-cplusplus" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c349c75e1ab4a03bd6b33fe6cbd3c479c5dd443e44ad732664d72cb0e755475" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" dependencies = [ "cc", ] [[package]] name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - -[[package]] -name = "linux-raw-sys" -version = "0.9.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -3105,9 +3077,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "minimal-lexical" @@ -3122,17 +3094,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] name = "mio" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] @@ -3185,11 +3158,11 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3284,9 +3257,18 @@ dependencies = [ [[package]] name = "object" -version = "0.36.7" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "object" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] @@ -3294,10 +3276,9 @@ dependencies = [ [[package]] name = "object_store" version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", + "backtrace", "base64", "bytes", "chrono", @@ -3358,9 +3339,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "openssl-probe" @@ -3370,9 +3351,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.109" +version = "0.9.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +checksum = "0a9f0075ba3c21b09f8e8b2026584b1d18d49388648f2fbbf3c97ea8deced8e2" dependencies = [ "cc", "libc", @@ -3415,9 +3396,9 @@ checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -3425,22 +3406,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "parquet" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b56b41d1bd36aae415e42f91cae70ee75cf6cba74416b14dce3e958d5990ec" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" dependencies = [ "ahash", "arrow-array", @@ -3457,7 +3438,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.5", + "hashbrown 0.16.0", "lz4_flex", "num", "num-bigint", @@ -3487,9 +3468,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", @@ -3535,9 +3516,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plist" -version = "1.7.4" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1" +checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07" dependencies = [ "base64", "indexmap", @@ -3563,9 +3544,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -3627,19 +3608,20 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "psm" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" dependencies = [ + "ar_archive_writer", "cc", ] @@ -3664,7 +3646,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "socket2", "thiserror", @@ -3680,11 +3662,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", "rand", "ring", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -3710,9 +3692,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] @@ -3749,7 +3731,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -3774,9 +3756,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -3795,9 +3777,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "722166aa0d7438abbaa4d5cc2c649dac844e8c56d82fb3d33e9c34b5cd268fc6" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -3806,21 +3788,19 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" +checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.12.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +version = "0.12.24" dependencies = [ "base64", "bytes", @@ -3851,6 +3831,7 @@ dependencies = [ "tower", "tower-http", "tower-service", + "tracing", "url", "wasm-bindgen", "wasm-bindgen-futures", @@ -3890,12 +3871,6 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -3913,35 +3888,22 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustix" -version = "1.0.8" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.9.4", - "windows-sys 0.60.2", + "linux-raw-sys", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.31" +version = "0.23.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" +checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" dependencies = [ "aws-lc-rs", "once_cell", @@ -3954,9 +3916,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -3966,9 +3928,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" dependencies = [ "web-time", "zeroize", @@ -3976,9 +3938,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.4" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "aws-lc-rs", "ring", @@ -4009,11 +3971,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4030,9 +3992,9 @@ checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2" [[package]] name = "security-framework" -version = "3.3.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", "core-foundation", @@ -4043,9 +4005,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.14.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -4053,11 +4015,12 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" dependencies = [ "serde", + "serde_core", ] [[package]] @@ -4068,9 +4031,9 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.223" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a505d71960adde88e293da5cb5eda57093379f64e61cf77bf0e6a63af07a7bac" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", @@ -4078,18 +4041,18 @@ dependencies = [ [[package]] name = "serde_core" -version = "1.0.223" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20f57cbd357666aa7b3ac84a90b4ea328f1d4ddb6772b430caa5d9e1309bb9e9" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.223" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d428d07faf17e306e699ec1e91996e5a165ba5d6bce5b5155173e91a8a01a56" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -4111,11 +4074,11 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "1.0.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392" dependencies = [ - "serde", + "serde_core", ] [[package]] @@ -4165,6 +4128,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simdutf8" version = "0.1.5" @@ -4228,6 +4197,7 @@ dependencies = [ "log", "sleeper_core", "tokio", + "tracing-subscriber", "url", ] @@ -4245,12 +4215,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -4277,15 +4247,15 @@ dependencies = [ [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -4294,12 +4264,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.11.1" @@ -4333,9 +4297,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", @@ -4369,10 +4333,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", - "rustix 1.0.8", - "windows-sys 0.61.1", + "rustix", + "windows-sys 0.61.2", ] [[package]] @@ -4476,11 +4440,12 @@ dependencies = [ [[package]] name = "time" -version = "0.3.43" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", + "itoa", "num-conv", "powerfmt", "serde", @@ -4515,9 +4480,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -4552,7 +4517,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -4568,9 +4533,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.2" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ "rustls", "tokio", @@ -4615,11 +4580,11 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8" +checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8" dependencies = [ - "serde", + "serde_core", "serde_spanned", "toml_datetime", "toml_writer", @@ -4627,18 +4592,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.0" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ - "serde", + "serde_core", ] [[package]] name = "toml_writer" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64" +checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2" [[package]] name = "tower" @@ -4749,6 +4714,7 @@ dependencies = [ "once_cell", "regex-automata", "sharded-slab", + "smallvec", "thread_local", "tracing", "tracing-core", @@ -4763,21 +4729,21 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" [[package]] name = "unicode-segmentation" @@ -4787,9 +4753,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "untrusted" @@ -4833,7 +4799,7 @@ version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", "wasm-bindgen", ] @@ -4897,45 +4863,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.14.3+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51ae83037bdd272a9e28ce236db8c07016dd0d50c27038b3f407533c030c95" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" dependencies = [ "cfg-if", "js-sys", @@ -4946,9 +4899,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4956,22 +4909,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] @@ -4991,9 +4944,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", @@ -5009,45 +4962,33 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - [[package]] name = "winapi-util" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", @@ -5056,9 +4997,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", @@ -5067,32 +5008,26 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - -[[package]] -name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -5119,16 +5054,16 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.3", + "windows-targets 0.53.5", ] [[package]] name = "windows-sys" -version = "0.61.1" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.0", + "windows-link", ] [[package]] @@ -5149,19 +5084,19 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.3" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.1.3", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -5172,9 +5107,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -5184,9 +5119,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -5196,9 +5131,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -5208,9 +5143,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -5220,9 +5155,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -5232,9 +5167,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -5244,9 +5179,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -5256,21 +5191,21 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "wit-bindgen" -version = "0.45.0" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "xmlparser" @@ -5289,11 +5224,10 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -5301,9 +5235,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", @@ -5313,18 +5247,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", @@ -5354,15 +5288,15 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -5371,9 +5305,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -5382,9 +5316,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", @@ -5417,9 +5351,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 5e60ae047d..d0cd568684 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -62,6 +62,11 @@ tokio = { version = "1.48.0", features = ["full"] } # Async runtime tokio-test = { version = "0.4.4" } # Doc tests url = { version = "2.5.7" } # URL processing for S3 mockall = { version = "0.13.1" } # Mock trait library +tracing-subscriber = { version = "0.3.20" } + +[patch.crates-io] +object_store = { path = "./object_store" } +reqwest = { path = "./reqwest" } [workspace.metadata.datasketches] git_repository_tag = "5.2.0" @@ -70,5 +75,5 @@ git_repository_tag = "5.2.0" incremental = true lto = false overflow-checks = true -debug = false -strip = true +debug = 1 +strip = false diff --git a/rust/apps/Cargo.toml b/rust/apps/Cargo.toml index 2ea405dcdf..1b4f858043 100644 --- a/rust/apps/Cargo.toml +++ b/rust/apps/Cargo.toml @@ -38,6 +38,7 @@ log = { workspace = true, features = ["release_max_level_debug"] } num-format = { workspace = true } tokio = { workspace = true, features = ["full"] } url = { workspace = true } +tracing-subscriber = { workspace = true, features = [ "env-filter" ] } [dev-dependencies] assert_cmd = { workspace = true } diff --git a/rust/apps/src/bin/compact.rs b/rust/apps/src/bin/compact.rs index f0f216cdf6..6aca92eea3 100644 --- a/rust/apps/src/bin/compact.rs +++ b/rust/apps/src/bin/compact.rs @@ -14,7 +14,6 @@ * limitations under the License. */ use apps::path_absolute; -use chrono::Local; use clap::Parser; use color_eyre::eyre::bail; use human_panic::setup_panic; @@ -26,7 +25,8 @@ use sleeper_core::{ filter_aggregation_config::{aggregate::Aggregate, filter::Filter}, run_compaction, }; -use std::{collections::HashMap, io::Write}; +use std::collections::HashMap; +use tracing_subscriber::{EnvFilter, filter::LevelFilter}; use url::Url; /// Runs a Sleeper compaction algorithm. @@ -56,10 +56,10 @@ struct CmdLineArgs { #[arg(short = 's', long)] sort_keys: Vec, /// Partition region minimum keys (inclusive). Must be one per row key specified. - #[arg(short='m',long,required=true,num_args=1..)] + #[arg(short='m',long,required=false,num_args=1..)] region_mins: Vec, /// Partition region maximum keys (exclusive). Must be one per row key specified. - #[arg(short='n',long,required=true,num_args=1..)] + #[arg(short='n',long,required=false,num_args=1..)] region_maxs: Vec, /// Sleeper aggregation configuration #[arg(short = 'a', long, required = false, num_args = 1)] @@ -71,30 +71,17 @@ struct CmdLineArgs { #[tokio::main(flavor = "multi_thread")] async fn main() -> color_eyre::Result<()> { + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env().add_directive(LevelFilter::INFO.into())) + .with_ansi(false) + .with_line_number(true) + .init(); // Install coloured errors color_eyre::install().unwrap(); // Install human readable panics setup_panic!(); - // Install and configure environment logger - env_logger::builder() - .format(|buf, record| { - writeln!( - buf, - "{} [{}] {}:{} - {}", - Local::now().format("%Y-%m-%dT%H:%M:%S"), - record.level(), - record.file().unwrap_or("??"), - record.line().unwrap_or(0), - record.args() - ) - }) - .format_timestamp(Some(env_logger::TimestampPrecision::Millis)) - .filter_level(log::LevelFilter::Info) - .format_target(false) - .init(); - let args = CmdLineArgs::parse(); // Check URL conversion @@ -110,24 +97,28 @@ async fn main() -> color_eyre::Result<()> { let output_file = Url::parse(&args.output) .or_else(|_e| Url::parse(&("file://".to_owned() + &path_absolute(&args.output))))?; - if args.row_keys.len() != args.region_maxs.len() { + if !args.region_maxs.is_empty() && args.row_keys.len() != args.region_maxs.len() { bail!("quantity of region maximums != quantity of row key fields"); } - if args.row_keys.len() != args.region_mins.len() { + if !args.region_mins.is_empty() && args.row_keys.len() != args.region_mins.len() { bail!("quantity of region minimums != quantity of row key fields"); } let mut map = HashMap::new(); - for (key, bounds) in args - .row_keys - .iter() - .zip(args.region_mins.iter().zip(args.region_maxs.iter())) - { + for (index, key) in args.row_keys.iter().enumerate() { map.insert( key.into(), ColRange { - lower: PartitionBound::String(bounds.0), + lower: if args.region_mins.is_empty() { + PartitionBound::String("") + } else { + PartitionBound::String(&args.region_mins[index]) + }, lower_inclusive: true, - upper: PartitionBound::String(bounds.1), + upper: if args.region_maxs.is_empty() { + PartitionBound::Unbounded + } else { + PartitionBound::String(&args.region_maxs[index]) + }, upper_inclusive: false, }, ); diff --git a/rust/object_store/.asf.yaml b/rust/object_store/.asf.yaml new file mode 100644 index 0000000000..5281db697a --- /dev/null +++ b/rust/object_store/.asf.yaml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Documentation can be found here: +# https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=127405038 + +notifications: + commits: commits@arrow.apache.org + issues: github@arrow.apache.org + pullrequests: github@arrow.apache.org + discussions: github@arrow.apache.org + jira_options: link label worklog +github: + description: "Rust object_store crate" + homepage: https://crates.io/crates/object_store + labels: + - object-store + enabled_merge_buttons: + squash: true + merge: false + rebase: false + features: + issues: true + discussions: true + protected_branches: + main: + required_status_checks: + # require branches to be up-to-date before merging + strict: true + # don't require any jobs to pass + contexts: [] diff --git a/rust/object_store/.cargo/config.toml b/rust/object_store/.cargo/config.toml new file mode 100644 index 0000000000..ab36bcafc7 --- /dev/null +++ b/rust/object_store/.cargo/config.toml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[target.wasm32-unknown-unknown] +rustflags = ['--cfg', 'getrandom_backend="wasm_js"'] \ No newline at end of file diff --git a/rust/object_store/.github/ISSUE_TEMPLATE/bug_report.md b/rust/object_store/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000..c2b7d626d1 --- /dev/null +++ b/rust/object_store/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** + + +**To Reproduce** + + +**Expected behavior** + + +**Additional context** + \ No newline at end of file diff --git a/rust/object_store/.github/ISSUE_TEMPLATE/feature_request.md b/rust/object_store/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000000..d7aad5e776 --- /dev/null +++ b/rust/object_store/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,29 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem or challenge? Please describe what you are trying to do.** + + +**Describe the solution you'd like** + + +**Describe alternatives you've considered** + + +**Additional context** + diff --git a/rust/object_store/.github/ISSUE_TEMPLATE/question.md b/rust/object_store/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000000..aafac7cb86 --- /dev/null +++ b/rust/object_store/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,23 @@ +--- +name: Question +about: Ask question about this project +title: '' +labels: question +assignees: '' + +--- + +**Which part is this question about** + + +**Describe your question** + + +**Additional context** + diff --git a/rust/object_store/.github/dependabot.yml b/rust/object_store/.github/dependabot.yml new file mode 100644 index 0000000000..22cd57caf8 --- /dev/null +++ b/rust/object_store/.github/dependabot.yml @@ -0,0 +1,15 @@ +version: 2 +updates: + - package-ecosystem: cargo + directory: "/" + schedule: + interval: daily + open-pull-requests-limit: 10 + target-branch: main + labels: [ auto-dependencies ] + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + open-pull-requests-limit: 10 + labels: [ auto-dependencies ] diff --git a/rust/object_store/.github/pull_request_template.md b/rust/object_store/.github/pull_request_template.md new file mode 100644 index 0000000000..b275cf64af --- /dev/null +++ b/rust/object_store/.github/pull_request_template.md @@ -0,0 +1,31 @@ +# Which issue does this PR close? + + + +Closes #. + +# Rationale for this change + + + +# What changes are included in this PR? + + + +# Are there any user-facing changes? + + + + + diff --git a/rust/object_store/.github/workflows/audit.yml b/rust/object_store/.github/workflows/audit.yml new file mode 100644 index 0000000000..a5646ea508 --- /dev/null +++ b/rust/object_store/.github/workflows/audit.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: audit + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +# trigger for all PRs that touch certain files and changes to main +on: + push: + branches: + - main + pull_request: + paths: + - '**/Cargo.toml' + - '**/Cargo.lock' + +jobs: + cargo-audit: + name: Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: Install cargo-audit + run: cargo install cargo-audit + - name: Run audit check + run: cargo audit diff --git a/rust/object_store/.github/workflows/ci.yml b/rust/object_store/.github/workflows/ci.yml new file mode 100644 index 0000000000..ab59e6f71f --- /dev/null +++ b/rust/object_store/.github/workflows/ci.yml @@ -0,0 +1,214 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- + +name: CI + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +# trigger for all PRs that touch certain files and changes to main +on: + push: + branches: + - main + pull_request: + +jobs: + clippy: + name: Clippy + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v5 + - name: Setup Clippy + run: rustup component add clippy + # Run different tests for the library on its own as well as + # all targets to ensure that it still works in the absence of + # features that might be enabled by dev-dependencies of other + # targets. + - name: Run clippy with default features + run: cargo clippy -- -D warnings + - name: Run clippy without default features + run: cargo clippy --no-default-features -- -D warnings + - name: Run clippy with fs features + run: cargo clippy --no-default-features --features fs -- -D warnings + - name: Run clippy with aws feature + run: cargo clippy --features aws -- -D warnings + - name: Run clippy with gcp feature + run: cargo clippy --features gcp -- -D warnings + - name: Run clippy with azure feature + run: cargo clippy --features azure -- -D warnings + - name: Run clippy with http feature + run: cargo clippy --features http -- -D warnings + - name: Run clippy with integration feature + run: cargo clippy --no-default-features --features integration -- -D warnings + - name: Run clippy with all features + run: cargo clippy --all-features -- -D warnings + - name: Run clippy with all features and all targets + run: cargo clippy --all-features --all-targets -- -D warnings + + # test doc links still work + docs: + name: Rustdocs + runs-on: ubuntu-latest + env: + RUSTDOCFLAGS: "-Dwarnings" + steps: + - uses: actions/checkout@v5 + - name: Run cargo doc + run: cargo doc --document-private-items --no-deps --all-features + + # test the crate + # This runs outside a container to workaround lack of support for passing arguments + # to service containers - https://github.com/orgs/community/discussions/26688 + linux-test: + name: Emulator Tests + runs-on: ubuntu-latest + env: + # Disable full debug symbol generation to speed up CI build and keep memory down + # "1" means line tables only, which is useful for panic tracebacks. + RUSTFLAGS: "-C debuginfo=1" + RUST_BACKTRACE: "1" + # Run integration tests + TEST_INTEGRATION: 1 + EC2_METADATA_ENDPOINT: http://localhost:1338 + AZURE_CONTAINER_NAME: test-bucket + AZURE_STORAGE_USE_EMULATOR: "1" + AZURITE_BLOB_STORAGE_URL: "http://localhost:10000" + AZURITE_QUEUE_STORAGE_URL: "http://localhost:10001" + AWS_BUCKET: test-bucket + AWS_DEFAULT_REGION: "us-east-1" + AWS_ACCESS_KEY_ID: test + AWS_SECRET_ACCESS_KEY: test + AWS_ENDPOINT: http://localhost:4566 + AWS_ALLOW_HTTP: true + AWS_COPY_IF_NOT_EXISTS: dynamo:test-table:2000 + AWS_CONDITIONAL_PUT: dynamo:test-table:2000 + AWS_SERVER_SIDE_ENCRYPTION: aws:kms + HTTP_URL: "http://localhost:8080" + GOOGLE_BUCKET: test-bucket + GOOGLE_SERVICE_ACCOUNT: "/tmp/gcs.json" + + steps: + - uses: actions/checkout@v5 + + # We are forced to use docker commands instead of service containers as we need to override the entrypoints + # which is currently not supported - https://github.com/actions/runner/discussions/1872 + - name: Configure Fake GCS Server (GCP emulation) + # Custom image - see fsouza/fake-gcs-server#1164 + run: | + echo "GCS_CONTAINER=$(docker run -d -p 4443:4443 tustvold/fake-gcs-server -scheme http -backend memory -public-host localhost:4443)" >> $GITHUB_ENV + # Give the container a moment to start up prior to configuring it + sleep 1 + curl -v -X POST --data-binary '{"name":"test-bucket"}' -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" + echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": "", "private_key_id": ""}' > "$GOOGLE_SERVICE_ACCOUNT" + + - name: Setup WebDav + run: docker run -d -p 8080:80 rclone/rclone serve webdav /data --addr :80 + + - name: Setup LocalStack (AWS emulation) + run: | + echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:4.0.3)" >> $GITHUB_ENV + echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV + aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket + aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-spawn + aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-checksum + aws --endpoint-url=http://localhost:4566 s3api create-bucket --bucket test-object-lock --object-lock-enabled-for-bucket + aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 + + KMS_KEY=$(aws --endpoint-url=http://localhost:4566 kms create-key --description "test key") + echo "AWS_SSE_KMS_KEY_ID=$(echo $KMS_KEY | jq -r .KeyMetadata.KeyId)" >> $GITHUB_ENV + + - name: Configure Azurite (Azure emulation) + # the magical connection string is from + # https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#http-connection-strings + run: | + echo "AZURITE_CONTAINER=$(docker run -d -p 10000:10000 -p 10001:10001 -p 10002:10002 mcr.microsoft.com/azure-storage/azurite)" >> $GITHUB_ENV + az storage container create -n test-bucket --connection-string 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;' + + - name: Setup Rust toolchain + run: | + rustup toolchain install stable + rustup default stable + + - name: Run object_store tests + run: cargo test --features=aws,azure,gcp,http + + # Don't rerun doc tests (some of them rely on features other than aws) + - name: Run object_store tests (AWS native conditional put) + run: cargo test --lib --tests --features=aws + env: + AWS_CONDITIONAL_PUT: etag + AWS_COPY_IF_NOT_EXISTS: multipart + + - name: GCS Output + if: ${{ !cancelled() }} + run: docker logs $GCS_CONTAINER + + - name: LocalStack Output + if: ${{ !cancelled() }} + run: docker logs $LOCALSTACK_CONTAINER + + - name: EC2 Metadata Output + if: ${{ !cancelled() }} + run: docker logs $EC2_METADATA_CONTAINER + + - name: Azurite Output + if: ${{ !cancelled() }} + run: docker logs $AZURITE_CONTAINER + + # test the object_store crate builds against wasm32 in stable rust + wasm32-build: + name: Build wasm32 + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v5 + with: + submodules: true + - name: Install clang (needed for ring) + run: apt-get update && apt-get install -y clang + - name: Install wasm32-unknown-unknown + run: rustup target add wasm32-unknown-unknown + - name: Build wasm32-unknown-unknown + run: cargo build --target wasm32-unknown-unknown + - name: Install wasm32-wasip1 + run: rustup target add wasm32-wasip1 + - name: Build wasm32-wasip1 + run: cargo build --all-features --target wasm32-wasip1 + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + - uses: actions/setup-node@v5 + with: + node-version: 20 + - name: Run wasm32-unknown-unknown tests (via Node) + run: wasm-pack test --node --features http --no-default-features + + windows: + name: cargo test LocalFileSystem (win64) + runs-on: windows-latest + steps: + - uses: actions/checkout@v5 + with: + submodules: true + - name: Run LocalFileSystem tests + run: cargo test local::tests diff --git a/rust/object_store/.github/workflows/dev.yml b/rust/object_store/.github/workflows/dev.yml new file mode 100644 index 0000000000..8acbb7a63c --- /dev/null +++ b/rust/object_store/.github/workflows/dev.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: dev + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +# trigger for all PRs and changes to main +on: + push: + branches: + - main + pull_request: + +jobs: + + rat: + name: Release Audit Tool (RAT) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: 3.8 + - name: Audit licenses + run: ./dev/release/run-rat.sh . \ No newline at end of file diff --git a/rust/object_store/.github/workflows/rust.yml b/rust/object_store/.github/workflows/rust.yml new file mode 100644 index 0000000000..462fe79b00 --- /dev/null +++ b/rust/object_store/.github/workflows/rust.yml @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# workspace wide tests +name: rust + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +# trigger for all PRs and changes to main +on: + push: + branches: + - main + pull_request: + +jobs: + # Run cargo fmt for all crates + lint: + name: Lint (cargo fmt) + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v5 + - name: Setup rustfmt + run: rustup component add rustfmt + - name: Format object_store + run: cargo fmt --all -- --check + + msrv: + name: Verify MSRV (Minimum Supported Rust Version) + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v5 + - name: Install cargo-msrv + run: cargo install cargo-msrv + - name: Downgrade object_store dependencies + # Necessary because tokio 1.30.0 updates MSRV to 1.63 + # and url 2.5.1, updates to 1.67 + run: | + cargo update -p tokio --precise 1.29.1 + cargo update -p url --precise 2.5.0 + cargo update -p once_cell --precise 1.20.3 + cargo update -p tracing-core --precise 0.1.33 + cargo update -p tracing-attributes --precise 0.1.28 + - name: Check + run: | + # run `cargo msrv verify` to see problems + cargo msrv verify --output-format=json || exit 1 diff --git a/rust/object_store/.github/workflows/take.yml b/rust/object_store/.github/workflows/take.yml new file mode 100644 index 0000000000..94a95f6e31 --- /dev/null +++ b/rust/object_store/.github/workflows/take.yml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Assign the issue via a `take` comment +on: + issue_comment: + types: created + +permissions: + issues: write + +jobs: + issue_assign: + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + runs-on: ubuntu-latest + steps: + - uses: actions/github-script@v8 + with: + script: | + github.rest.issues.addAssignees({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + assignees: [context.payload.comment.user.login], + }) diff --git a/rust/object_store/.github_changelog_generator b/rust/object_store/.github_changelog_generator new file mode 100644 index 0000000000..d84669766c --- /dev/null +++ b/rust/object_store/.github_changelog_generator @@ -0,0 +1,27 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Add special sections for documentation, security and performance +add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"security":{"prefix":"**Security updates:**","labels":["security"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}} +# so that the component is shown associated with the issue +issue-line-labels=object-store +# skip non object_store issues +exclude-labels=development-process,invalid +breaking_labels=api-change diff --git a/rust/object_store/.gitignore b/rust/object_store/.gitignore new file mode 100644 index 0000000000..176220206f --- /dev/null +++ b/rust/object_store/.gitignore @@ -0,0 +1,100 @@ +Cargo.lock +target +rusty-tags.vi +.history +.flatbuffers/ +.idea/ +.vscode +.zed +.devcontainer +venv/* +# created by doctests +parquet/data.parquet +# release notes cache +.githubchangeloggenerator.cache +.githubchangeloggenerator.cache.log +justfile +.prettierignore +.env +.editorconfig +# local azurite file +__azurite* +__blobstorage__ + +# .bak files +*.bak +*.bak2 +# OS-specific .gitignores + +# Mac .gitignore +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Linux .gitignore +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +# Windows .gitignore +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# Python virtual env in parquet crate +parquet/pytest/venv/ +__pycache__/ diff --git a/rust/object_store/CHANGELOG-old.md b/rust/object_store/CHANGELOG-old.md new file mode 100644 index 0000000000..23bc7cc7cb --- /dev/null +++ b/rust/object_store/CHANGELOG-old.md @@ -0,0 +1,1020 @@ + + +# Historical Changelog + + +## [v0.12.3](https://github.com/apache/arrow-rs-object-store/tree/v0.12.3) (2025-07-11) + +[Full Changelog](https://github.com/apache/arrow-rs-object-store/compare/v0.12.2...v0.12.3) + +**Implemented enhancements:** + +- S3 store fails without retrying [\#425](https://github.com/apache/arrow-rs-object-store/issues/425) +- Deprecate and Remove DynamoCommit [\#373](https://github.com/apache/arrow-rs-object-store/issues/373) +- Move payload helpers from `GetResult` to `GetResultPayload` [\#352](https://github.com/apache/arrow-rs-object-store/issues/352) +- Retry on 429s and equivalents [\#309](https://github.com/apache/arrow-rs-object-store/issues/309) +- object\_store: Support `container@account.dfs.core.windows.net/path` URL style for `az` protocol [\#285](https://github.com/apache/arrow-rs-object-store/issues/285) +- Rename `PutMultiPartOpts` to `PutMultiPartOptions`, the old name is deprecated and will be removed in the next major release [\#406](https://github.com/apache/arrow-rs-object-store/pull/406) + +**Fixed bugs:** + +- Builder panics on malformed GCS private key instead of returning error [\#419](https://github.com/apache/arrow-rs-object-store/issues/419) +- `cargo check --no-default-features --features=aws,azure,gcp,http` fails [\#411](https://github.com/apache/arrow-rs-object-store/issues/411) +- Incorrect prefix in `ObjectStoreScheme::parse` for Azure HTTP urls [\#398](https://github.com/apache/arrow-rs-object-store/issues/398) + +**Closed issues:** + +- `PutMode::Update` support for `LocalFileSystem`? [\#423](https://github.com/apache/arrow-rs-object-store/issues/423) + +**Merged pull requests:** + +- feat: retry on 408 [\#426](https://github.com/apache/arrow-rs-object-store/pull/426) ([criccomini](https://github.com/criccomini)) +- fix: expose source of `RetryError` [\#422](https://github.com/apache/arrow-rs-object-store/pull/422) ([crepererum](https://github.com/crepererum)) +- fix\(gcp\): throw error instead of panicking if read pem fails [\#421](https://github.com/apache/arrow-rs-object-store/pull/421) ([HugoCasa](https://github.com/HugoCasa)) +- chore: fix clippy 1.88 warnings [\#418](https://github.com/apache/arrow-rs-object-store/pull/418) ([mbrobbel](https://github.com/mbrobbel)) +- Bump quick-xml to version 0.38.0 [\#417](https://github.com/apache/arrow-rs-object-store/pull/417) ([raimannma](https://github.com/raimannma)) +- Prevent compilation error with all cloud features but fs turned on [\#412](https://github.com/apache/arrow-rs-object-store/pull/412) ([jder](https://github.com/jder)) +- Retry requests when status code is 429 [\#410](https://github.com/apache/arrow-rs-object-store/pull/410) ([paraseba](https://github.com/paraseba)) +- minor: Pin `tracing-attributes`, `tracing-core` to fix CI [\#404](https://github.com/apache/arrow-rs-object-store/pull/404) ([kylebarron](https://github.com/kylebarron)) +- feat \(azure\): support for account in `az://` URLs [\#403](https://github.com/apache/arrow-rs-object-store/pull/403) ([ByteBaker](https://github.com/ByteBaker)) +- Fix azure path parsing [\#399](https://github.com/apache/arrow-rs-object-store/pull/399) ([kylebarron](https://github.com/kylebarron)) + + +## [v0.12.2](https://github.com/apache/arrow-rs-object-store/tree/v0.12.2) (2025-06-06) + +[Full Changelog](https://github.com/apache/arrow-rs-object-store/compare/v0.12.1...v0.12.2) + +**Implemented enhancements:** + +- Add `ObjectStoreUrl` to resolve URLs to `ObjectStore` instances [\#356](https://github.com/apache/arrow-rs-object-store/issues/356) +- Retry / recover after partially reading a streaming response \( fix timeout errors / `error decoding response body` \) [\#15](https://github.com/apache/arrow-rs-object-store/issues/15) +- Expose `list_paginated` in object\_store [\#291](https://github.com/apache/arrow-rs-object-store/issues/291) + +**Fixed bugs:** + +- Emulator tests are broken on main [\#395](https://github.com/apache/arrow-rs-object-store/issues/395) +- Retry does not cover connection errors [\#368](https://github.com/apache/arrow-rs-object-store/issues/368) +- Error handling of HTTP storage backend not utilizing retry::RetryError::error when possible [\#365](https://github.com/apache/arrow-rs-object-store/issues/365) +- Error running `cargo publish`: wildcard \(`*`\) dependency constraints are not allowed on crates.io. [\#357](https://github.com/apache/arrow-rs-object-store/issues/357) +- No retries when connection closes abruptly \(i.e TCP-RST\) [\#350](https://github.com/apache/arrow-rs-object-store/issues/350) +- Compilation error in tests with Rust 1.87: integer out of range for `u16` in format string [\#343](https://github.com/apache/arrow-rs-object-store/issues/343) + +**Documentation updates:** + +- Improve `parse_url_opts` documentation [\#377](https://github.com/apache/arrow-rs-object-store/pull/377) ([alamb](https://github.com/alamb)) + +**Closed issues:** + +- object\_store pulls default reqwest features which always active native-tls [\#400](https://github.com/apache/arrow-rs-object-store/issues/400) +- Introduce retry to other methods than get after \#383 [\#387](https://github.com/apache/arrow-rs-object-store/issues/387) +- Security: AwsCredential prints plaintext may cause security issue. [\#363](https://github.com/apache/arrow-rs-object-store/issues/363) +- Docs build fails for object\_store 0.12.1 [\#360](https://github.com/apache/arrow-rs-object-store/issues/360) +- Is there a way to go from `ObjectStore` to `(URL, opts)`? [\#347](https://github.com/apache/arrow-rs-object-store/issues/347) + +**Merged pull requests:** + +- Chore: fix emulator tests due to changes in reqwest [\#401](https://github.com/apache/arrow-rs-object-store/pull/401) ([alamb](https://github.com/alamb)) +- Retry streaming get requests \(\#15\) [\#383](https://github.com/apache/arrow-rs-object-store/pull/383) ([tustvold](https://github.com/tustvold)) +- azure: do not set empty container name from parse\_url [\#379](https://github.com/apache/arrow-rs-object-store/pull/379) ([james-rms](https://github.com/james-rms)) +- Add ObjectStoreRegistry \(\#347\) [\#375](https://github.com/apache/arrow-rs-object-store/pull/375) ([tustvold](https://github.com/tustvold)) +- Deprecate DynamoCommit \(\#373\) [\#374](https://github.com/apache/arrow-rs-object-store/pull/374) ([tustvold](https://github.com/tustvold)) +- Add PaginatedListStore [\#371](https://github.com/apache/arrow-rs-object-store/pull/371) ([tustvold](https://github.com/tustvold)) +- Fix 1.87 Clippy Lints [\#370](https://github.com/apache/arrow-rs-object-store/pull/370) ([tustvold](https://github.com/tustvold)) +- Return Non-Generic Errors from HttpStore [\#366](https://github.com/apache/arrow-rs-object-store/pull/366) ([Rynoxx](https://github.com/Rynoxx)) +- fix: mask the aws credential info [\#364](https://github.com/apache/arrow-rs-object-store/pull/364) ([yanghua](https://github.com/yanghua)) +- Update integration test to avoid long format strings [\#359](https://github.com/apache/arrow-rs-object-store/pull/359) ([alamb](https://github.com/alamb)) +- fix: treat TCP reset as a retryable error [\#351](https://github.com/apache/arrow-rs-object-store/pull/351) ([OmriSteiner](https://github.com/OmriSteiner)) + + +## [v0.12.1](https://github.com/apache/arrow-rs-object-store/tree/v0.12.1) (2025-05-08) + +[Full Changelog](https://github.com/apache/arrow-rs-object-store/compare/v0.12.0...v0.12.1) + +**Implemented enhancements:** + +- Support Alibaba OSS Object Storage [\#323](https://github.com/apache/arrow-rs-object-store/issues/323) +- Enable anonymous access to GCS buckets [\#302](https://github.com/apache/arrow-rs-object-store/issues/302) +- \[object\_store\] Run requests on a different tokio runtime [\#13](https://github.com/apache/arrow-rs-object-store/issues/13) +- \[object\_store\] consider migrating `humantime` to `jiff` [\#292](https://github.com/apache/arrow-rs-object-store/issues/292) +- Support EKS Pod Identity \(alternative to IRSA\) [\#282](https://github.com/apache/arrow-rs-object-store/issues/282) +- Object\_store: Create an upload method that handles concurrency [\#279](https://github.com/apache/arrow-rs-object-store/issues/279) +- object\_store: Retry on connection duration timeouts \(retry / recover after partially reading a streaming response\) [\#53](https://github.com/apache/arrow-rs-object-store/issues/53) +- \[object-store\] re-export `hyper` [\#293](https://github.com/apache/arrow-rs-object-store/issues/293) +- object\_store: abort\_multipart\(\) should return NotFound error if not found [\#146](https://github.com/apache/arrow-rs-object-store/issues/146) +- Make `GetOptionsExt` publicly usable [\#261](https://github.com/apache/arrow-rs-object-store/issues/261) + +**Fixed bugs:** + +- Incorrect token sent as part of url signing function. [\#337](https://github.com/apache/arrow-rs-object-store/issues/337) +- Azure Gen2 broken on latest [\#320](https://github.com/apache/arrow-rs-object-store/issues/320) +- object\_store: Azure brokenness on 0.12.0 [\#326](https://github.com/apache/arrow-rs-object-store/issues/326) +- Generic S3 error: Client error with status 411 Length Required [\#278](https://github.com/apache/arrow-rs-object-store/issues/278) + +**Closed issues:** + +- CI doesn't run on PRs [\#335](https://github.com/apache/arrow-rs-object-store/issues/335) +- Some Inconsistencies in the Path and List [\#327](https://github.com/apache/arrow-rs-object-store/issues/327) +- Add allow-list to restrict access to local files with LocalFileSystem [\#312](https://github.com/apache/arrow-rs-object-store/issues/312) +- Query on usage of experimental package ring [\#310](https://github.com/apache/arrow-rs-object-store/issues/310) +- \[Object Store\] Make the service account used when interacting with the metadata url more flexible [\#265](https://github.com/apache/arrow-rs-object-store/issues/265) + +**Merged pull requests:** + +- chore: Add anda\_object\_store to README [\#346](https://github.com/apache/arrow-rs-object-store/pull/346) ([zensh](https://github.com/zensh)) +- Update nix requirement from 0.29.0 to 0.30.0 [\#344](https://github.com/apache/arrow-rs-object-store/pull/344) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix GCP signing token [\#338](https://github.com/apache/arrow-rs-object-store/pull/338) ([jackm-mimica](https://github.com/jackm-mimica)) +- Fix query parameter signing in Azure [\#334](https://github.com/apache/arrow-rs-object-store/pull/334) ([AdamGS](https://github.com/AdamGS)) +- feat: add EKS Pod Identity support \(\#282\) [\#333](https://github.com/apache/arrow-rs-object-store/pull/333) ([andreasbros](https://github.com/andreasbros)) +- feat: Add `SpawnService` and `SpawnedReqwestConnector` for running requests on a different runtime [\#332](https://github.com/apache/arrow-rs-object-store/pull/332) ([ion-elgreco](https://github.com/ion-elgreco)) +- Support `object_store` with wasm: Default wasm32-unknown-unknown HttpConnector [\#329](https://github.com/apache/arrow-rs-object-store/pull/329) ([H-Plus-Time](https://github.com/H-Plus-Time)) +- Enable anonymous access to GCS buckets [\#322](https://github.com/apache/arrow-rs-object-store/pull/322) ([kylebarron](https://github.com/kylebarron)) +- Fix semantic versioning link in README.md [\#317](https://github.com/apache/arrow-rs-object-store/pull/317) ([lewiszlw](https://github.com/lewiszlw)) +- feat: make some helpers/utils public [\#316](https://github.com/apache/arrow-rs-object-store/pull/316) ([crepererum](https://github.com/crepererum)) +- chore: fix `integration` feature [\#314](https://github.com/apache/arrow-rs-object-store/pull/314) ([crepererum](https://github.com/crepererum)) +- Bump `rand` to 0.9 [\#303](https://github.com/apache/arrow-rs-object-store/pull/303) ([mbrobbel](https://github.com/mbrobbel)) +- Add content length to PUT GCP multipart complete [\#257](https://github.com/apache/arrow-rs-object-store/pull/257) ([jkosh44](https://github.com/jkosh44)) +- Update README.md and Contributing guidelines [\#8](https://github.com/apache/arrow-rs-object-store/pull/8) ([alamb](https://github.com/alamb)) +- Tweaks: homepage and fix RAT [\#7](https://github.com/apache/arrow-rs-object-store/pull/7) ([alamb](https://github.com/alamb)) +- Import `object_store`, with history, from arrow-rs [\#3](https://github.com/apache/arrow-rs-object-store/pull/3) ([alamb](https://github.com/alamb)) + + + +## [object_store_0.12.0](https://github.com/apache/arrow-rs/tree/object_store_0.12.0) (2025-03-05) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.2...object_store_0.12.0) + +**Breaking changes:** + +- feat: add `Extensions` to object store `PutMultipartOpts` [\#7214](https://github.com/apache/arrow-rs/pull/7214) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- feat: add `Extensions` to object store `PutOptions` [\#7213](https://github.com/apache/arrow-rs/pull/7213) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- chore: enable conditional put by default for S3 [\#7181](https://github.com/apache/arrow-rs/pull/7181) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([meteorgan](https://github.com/meteorgan)) +- feat: add `Extensions` to object store `GetOptions` [\#7170](https://github.com/apache/arrow-rs/pull/7170) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- feat\(object\_store\): Override DNS Resolution to Randomize IP Selection [\#7123](https://github.com/apache/arrow-rs/pull/7123) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- Use `u64` range instead of `usize`, for better wasm32 support [\#6961](https://github.com/apache/arrow-rs/pull/6961) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([XiangpengHao](https://github.com/XiangpengHao)) +- object\_store: Add enabled-by-default "fs" feature [\#6636](https://github.com/apache/arrow-rs/pull/6636) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87)) +- Return `BoxStream` with `'static` lifetime from `ObjectStore::list` [\#6619](https://github.com/apache/arrow-rs/pull/6619) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylebarron](https://github.com/kylebarron)) +- object\_store: Migrate from snafu to thiserror [\#6266](https://github.com/apache/arrow-rs/pull/6266) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87)) + +**Implemented enhancements:** + +- Object Store: S3 IP address selection is biased [\#7117](https://github.com/apache/arrow-rs/issues/7117) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: GCSObjectStore should derive Clone [\#7113](https://github.com/apache/arrow-rs/issues/7113) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Remove all RCs after release [\#7059](https://github.com/apache/arrow-rs/issues/7059) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- LocalFileSystem::list\_with\_offset is very slow over network file system [\#7018](https://github.com/apache/arrow-rs/issues/7018) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Release object store `0.11.2` \(non API breaking\) Around Dec 15 2024 [\#6902](https://github.com/apache/arrow-rs/issues/6902) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- LocalFileSystem errors with satisfiable range request [\#6749](https://github.com/apache/arrow-rs/issues/6749) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- ObjectStore WASM32 Support [\#7226](https://github.com/apache/arrow-rs/pull/7226) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- \[main\] Bump arrow version to 54.2.1 \(\#7207\) [\#7212](https://github.com/apache/arrow-rs/pull/7212) ([alamb](https://github.com/alamb)) +- Decouple ObjectStore from Reqwest [\#7183](https://github.com/apache/arrow-rs/pull/7183) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- object\_store: Disable all compression formats in HTTP reqwest client [\#7143](https://github.com/apache/arrow-rs/pull/7143) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylewlacy](https://github.com/kylewlacy)) +- refactor: remove unused `async` from `InMemory::entry` [\#7133](https://github.com/apache/arrow-rs/pull/7133) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- object\_store/gcp: derive Clone for GoogleCloudStorage [\#7112](https://github.com/apache/arrow-rs/pull/7112) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([james-rms](https://github.com/james-rms)) +- Update version to 54.2.0 and add CHANGELOG [\#7110](https://github.com/apache/arrow-rs/pull/7110) ([alamb](https://github.com/alamb)) +- Remove all RCs after release [\#7060](https://github.com/apache/arrow-rs/pull/7060) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kou](https://github.com/kou)) +- Update release schedule README.md [\#7053](https://github.com/apache/arrow-rs/pull/7053) ([alamb](https://github.com/alamb)) +- Create GitHub releases automatically on tagging [\#7042](https://github.com/apache/arrow-rs/pull/7042) ([kou](https://github.com/kou)) +- Change Log On Succesful S3 Copy / Multipart Upload to Debug [\#7033](https://github.com/apache/arrow-rs/pull/7033) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([diptanu](https://github.com/diptanu)) +- Prepare for `54.1.0` release [\#7031](https://github.com/apache/arrow-rs/pull/7031) ([alamb](https://github.com/alamb)) +- Add a custom implementation `LocalFileSystem::list_with_offset` [\#7019](https://github.com/apache/arrow-rs/pull/7019) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([corwinjoy](https://github.com/corwinjoy)) +- Improve docs for `AmazonS3Builder::from_env` [\#6977](https://github.com/apache/arrow-rs/pull/6977) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylebarron](https://github.com/kylebarron)) +- Fix WASM CI for Rust 1.84 release [\#6963](https://github.com/apache/arrow-rs/pull/6963) ([alamb](https://github.com/alamb)) +- Update itertools requirement from 0.13.0 to 0.14.0 in /object\_store [\#6925](https://github.com/apache/arrow-rs/pull/6925) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Fix LocalFileSystem with range request that ends beyond end of file [\#6751](https://github.com/apache/arrow-rs/pull/6751) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylebarron](https://github.com/kylebarron)) + + + +## [object_store_0.11.2](https://github.com/apache/arrow-rs/tree/object_store_0.11.2) (2024-12-20) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.1...object_store_0.11.2) + +**Implemented enhancements:** + +- object-store's AzureClient should protect against multiple streams performing put\_block in parallel for the same BLOB path [\#6868](https://github.com/apache/arrow-rs/issues/6868) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support S3 Put IfMatch [\#6799](https://github.com/apache/arrow-rs/issues/6799) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store Azure Government using OAuth [\#6759](https://github.com/apache/arrow-rs/issues/6759) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support for AWS Requester Pays buckets [\#6716](https://github.com/apache/arrow-rs/issues/6716) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object-store\]: Implement credential\_process support for S3 [\#6422](https://github.com/apache/arrow-rs/issues/6422) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Conditional put and rename\_if\_not\_exist on S3 [\#6285](https://github.com/apache/arrow-rs/issues/6285) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- `object_store` errors when `reqwest` `gzip` feature is enabled [\#6842](https://github.com/apache/arrow-rs/issues/6842) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Multi-part s3 uploads fail when using checksum [\#6793](https://github.com/apache/arrow-rs/issues/6793) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- `with_unsigned_payload` shouldn't generate payload hash [\#6697](https://github.com/apache/arrow-rs/issues/6697) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[Object\_store\] min\_ttl is too high for GKE tokens [\#6625](https://github.com/apache/arrow-rs/issues/6625) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store `test_private_bucket` fails - store: "S3", source: BucketNotFound { bucket: "bloxbender" } [\#6600](https://github.com/apache/arrow-rs/issues/6600) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- S3 endpoint and trailing slash result in weird/invalid requests [\#6580](https://github.com/apache/arrow-rs/issues/6580) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Use randomized content ID for Azure multipart uploads [\#6869](https://github.com/apache/arrow-rs/pull/6869) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([avarnon](https://github.com/avarnon)) +- Always explicitly disable `gzip` automatic decompression on reqwest client used by object\_store [\#6843](https://github.com/apache/arrow-rs/pull/6843) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([phillipleblanc](https://github.com/phillipleblanc)) +- object-store: remove S3ConditionalPut::ETagPutIfNotExists [\#6802](https://github.com/apache/arrow-rs/pull/6802) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([benesch](https://github.com/benesch)) +- Fix multipart uploads with checksums on object locked buckets [\#6794](https://github.com/apache/arrow-rs/pull/6794) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Add AuthorityHost to AzureConfigKey [\#6773](https://github.com/apache/arrow-rs/pull/6773) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([zadeluca](https://github.com/zadeluca)) +- object\_store: Add support for requester pays buckets [\#6768](https://github.com/apache/arrow-rs/pull/6768) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylebarron](https://github.com/kylebarron)) +- check sign\_payload instead of skip\_signature before computing checksum [\#6698](https://github.com/apache/arrow-rs/pull/6698) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mherrerarendon](https://github.com/mherrerarendon)) +- Update quick-xml requirement from 0.36.0 to 0.37.0 in /object\_store [\#6687](https://github.com/apache/arrow-rs/pull/6687) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- Support native S3 conditional writes [\#6682](https://github.com/apache/arrow-rs/pull/6682) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([benesch](https://github.com/benesch)) +- \[object\_store\] fix S3 endpoint and trailing slash result in invalid requests [\#6641](https://github.com/apache/arrow-rs/pull/6641) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([adbmal](https://github.com/adbmal)) +- Lower GCP token min\_ttl to 4 minutes and add backoff to token refresh logic [\#6638](https://github.com/apache/arrow-rs/pull/6638) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mwylde](https://github.com/mwylde)) +- Remove `test_private_bucket` object\_store test [\#6601](https://github.com/apache/arrow-rs/pull/6601) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + +## [object_store_0.11.1](https://github.com/apache/arrow-rs/tree/object_store_0.11.1) (2024-10-15) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.0...object_store_0.11.1) + +**Implemented enhancements:** + +- There is no way to pass object store client options as environment variables [\#6333](https://github.com/apache/arrow-rs/issues/6333) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Better Document Backoff Algorithm [\#6324](https://github.com/apache/arrow-rs/issues/6324) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add direction to `list_with_offset` [\#6274](https://github.com/apache/arrow-rs/issues/6274) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support server-side encryption with customer-provided keys \(SSE-C\) [\#6229](https://github.com/apache/arrow-rs/issues/6229) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- \[object-store\] Requested tokio version is too old - does not compile [\#6458](https://github.com/apache/arrow-rs/issues/6458) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Azure SAS tokens are visible when retry errors are logged via object\_store [\#6322](https://github.com/apache/arrow-rs/issues/6322) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- object\_store: fix typo in with\_connect\_timeout\_disabled that actually disabled non-connect timeouts [\#6563](https://github.com/apache/arrow-rs/pull/6563) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([adriangb](https://github.com/adriangb)) +- object\_store: Clarify what is a prefix in list\(\) documentation [\#6520](https://github.com/apache/arrow-rs/pull/6520) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([progval](https://github.com/progval)) +- object\_store: enable lint `unreachable_pub` [\#6512](https://github.com/apache/arrow-rs/pull/6512) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker)) +- \[object\_store\] Retry S3 requests with 200 response with "Error" in body [\#6508](https://github.com/apache/arrow-rs/pull/6508) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([PeterKeDer](https://github.com/PeterKeDer)) +- \[object-store\] Require tokio 1.29.0. [\#6459](https://github.com/apache/arrow-rs/pull/6459) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ashtuchkin](https://github.com/ashtuchkin)) +- feat: expose HTTP/2 max frame size in `object_store` [\#6442](https://github.com/apache/arrow-rs/pull/6442) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- Derive `Clone` for `object_store::aws::AmazonS3` [\#6414](https://github.com/apache/arrow-rs/pull/6414) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ethe](https://github.com/ethe)) +- object\_score: Support Azure Fabric OAuth Provider [\#6382](https://github.com/apache/arrow-rs/pull/6382) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([RobinLin666](https://github.com/RobinLin666)) +- `object_store::GetOptions` derive `Clone` [\#6361](https://github.com/apache/arrow-rs/pull/6361) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([samuelcolvin](https://github.com/samuelcolvin)) +- \[object\_store\] Propagate env vars as object store client options [\#6334](https://github.com/apache/arrow-rs/pull/6334) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ccciudatu](https://github.com/ccciudatu)) +- docs\[object\_store\]: clarify the backoff strategy that is actually implemented [\#6325](https://github.com/apache/arrow-rs/pull/6325) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([westonpace](https://github.com/westonpace)) +- fix: azure sas token visible in logs [\#6323](https://github.com/apache/arrow-rs/pull/6323) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel)) +- object\_store/delimited: Fix `TrailingEscape` condition [\#6265](https://github.com/apache/arrow-rs/pull/6265) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87)) +- fix\(object\_store\): only add encryption headers for SSE-C in get request [\#6260](https://github.com/apache/arrow-rs/pull/6260) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb)) +- docs: Add parquet\_opendal in related projects [\#6236](https://github.com/apache/arrow-rs/pull/6236) ([Xuanwo](https://github.com/Xuanwo)) +- feat\(object\_store\): add support for server-side encryption with customer-provided keys \(SSE-C\) [\#6230](https://github.com/apache/arrow-rs/pull/6230) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb)) +- feat: further TLS options on ClientOptions: \#5034 [\#6148](https://github.com/apache/arrow-rs/pull/6148) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker)) + + + +## [object_store_0.11.0](https://github.com/apache/arrow-rs/tree/object_store_0.11.0) (2024-08-12) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.10.2...object_store_0.11.0) + +**Breaking changes:** + +- Make object\_store errors non-exhaustive [\#6165](https://github.com/apache/arrow-rs/pull/6165) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update snafu to `0.8.0` in object\_store \(\#5930\) [\#6070](https://github.com/apache/arrow-rs/pull/6070) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + + +**Merged pull requests:** + +- Add LICENSE and NOTICE files to object_store [\#6234](https://github.com/apache/arrow-rs/pull/6234) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- feat\(object\_store\): add `PermissionDenied` variant to top-level error [\#6194](https://github.com/apache/arrow-rs/pull/6194) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kyle-mccarthy](https://github.com/kyle-mccarthy)) +- Update object store MSRV to `1.64` [\#6123](https://github.com/apache/arrow-rs/pull/6123) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Fix clippy in object\_store crate [\#6120](https://github.com/apache/arrow-rs/pull/6120) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + +## [object_store_0.10.2](https://github.com/apache/arrow-rs/tree/object_store_0.10.2) (2024-07-17) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.10.1...object_store_0.10.2) + +**Implemented enhancements:** + +- Relax `WriteMultipart` API to support aborting after completion [\#5977](https://github.com/apache/arrow-rs/issues/5977) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Make ObjectStoreScheme in the object\_store crate public [\#5911](https://github.com/apache/arrow-rs/issues/5911) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add BufUploader to implement same feature upon `WriteMultipart` like `BufWriter` [\#5834](https://github.com/apache/arrow-rs/issues/5834) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- Investigate why `InstanceCredentialProvider::cache` is flagged as dead code [\#5884](https://github.com/apache/arrow-rs/issues/5884) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object\_store\] Potential race condition in `list_with_delimiter` on `Local` [\#5800](https://github.com/apache/arrow-rs/issues/5800) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Documentation updates:** + +- Correct timeout in comment from 5s to 30s [\#6073](https://github.com/apache/arrow-rs/pull/6073) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([trungda](https://github.com/trungda)) +- docs: Fix broken links of object\_store\_opendal README [\#5929](https://github.com/apache/arrow-rs/pull/5929) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- docs: Add object\_store\_opendal as related projects [\#5926](https://github.com/apache/arrow-rs/pull/5926) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- chore: update docs to delineate which ObjectStore lists are recursive [\#5794](https://github.com/apache/arrow-rs/pull/5794) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wiedld](https://github.com/wiedld)) +- Document object store release cadence [\#5750](https://github.com/apache/arrow-rs/pull/5750) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + +**Merged pull requests:** + +- Sanitize error message for sensitive requests [\#6074](https://github.com/apache/arrow-rs/pull/6074) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update quick-xml requirement from 0.35.0 to 0.36.0 in /object\_store [\#6032](https://github.com/apache/arrow-rs/pull/6032) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- use GCE metadata server env var overrides [\#6015](https://github.com/apache/arrow-rs/pull/6015) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([barronw](https://github.com/barronw)) +- Update quick-xml requirement from 0.34.0 to 0.35.0 in /object\_store [\#5983](https://github.com/apache/arrow-rs/pull/5983) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Automatically cleanup empty dirs in LocalFileSystem [\#5978](https://github.com/apache/arrow-rs/pull/5978) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([fsdvh](https://github.com/fsdvh)) +- WriteMultipart Abort on MultipartUpload::complete Error [\#5974](https://github.com/apache/arrow-rs/pull/5974) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([fsdvh](https://github.com/fsdvh)) +- Update quick-xml requirement from 0.33.0 to 0.34.0 in /object\_store [\#5954](https://github.com/apache/arrow-rs/pull/5954) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update quick-xml requirement from 0.32.0 to 0.33.0 in /object\_store [\#5946](https://github.com/apache/arrow-rs/pull/5946) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add `MultipartUpload` blanket implementation for `Box` [\#5919](https://github.com/apache/arrow-rs/pull/5919) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([fsdvh](https://github.com/fsdvh)) +- Add user defined metadata [\#5915](https://github.com/apache/arrow-rs/pull/5915) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([criccomini](https://github.com/criccomini)) +- Make ObjectStoreScheme public [\#5912](https://github.com/apache/arrow-rs/pull/5912) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([orf](https://github.com/orf)) +- chore: Remove not used cache in InstanceCredentialProvider [\#5888](https://github.com/apache/arrow-rs/pull/5888) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- Fix clippy for object\_store [\#5883](https://github.com/apache/arrow-rs/pull/5883) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Update quick-xml requirement from 0.31.0 to 0.32.0 in /object\_store [\#5870](https://github.com/apache/arrow-rs/pull/5870) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- feat\(object\_store\): Add `put` API for buffered::BufWriter [\#5835](https://github.com/apache/arrow-rs/pull/5835) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- Fix 5592: Colon \(:\) in in object\_store::path::{Path} is not handled on Windows [\#5830](https://github.com/apache/arrow-rs/pull/5830) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([hesampakdaman](https://github.com/hesampakdaman)) +- Fix issue \#5800: Handle missing files in list\_with\_delimiter [\#5803](https://github.com/apache/arrow-rs/pull/5803) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([hesampakdaman](https://github.com/hesampakdaman)) +- Update nix requirement from 0.28.0 to 0.29.0 in /object\_store [\#5799](https://github.com/apache/arrow-rs/pull/5799) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update itertools requirement from 0.12.0 to 0.13.0 in /object\_store [\#5780](https://github.com/apache/arrow-rs/pull/5780) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add additional WriteMultipart tests \(\#5743\) [\#5746](https://github.com/apache/arrow-rs/pull/5746) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* + +## [object_store_0.10.1](https://github.com/apache/arrow-rs/tree/object_store_0.10.1) (2024-05-10) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.10.0...object_store_0.10.1) + +**Implemented enhancements:** + +- Allow specifying PUT options when using `BufWriter` [\#5692](https://github.com/apache/arrow-rs/issues/5692) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add more attributes to `object_store::Attribute` [\#5689](https://github.com/apache/arrow-rs/issues/5689) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- feat object\_store: moving tests from src/ to a tests/ folder and enabling access to test functions for enabling a shared integration test suite [\#5685](https://github.com/apache/arrow-rs/issues/5685) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Release Object Store 0.10.0 [\#5647](https://github.com/apache/arrow-rs/issues/5647) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- Using WriteMultipart::put results in 0 bytes being written [\#5743](https://github.com/apache/arrow-rs/issues/5743) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Fix PutPayloadMut::push not updating content\_length \(\#5743\) [\#5744](https://github.com/apache/arrow-rs/pull/5744) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Export object\_store integration tests [\#5709](https://github.com/apache/arrow-rs/pull/5709) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add `BufWriter::with_attributes` and `::with_tags` in `object_store` [\#5693](https://github.com/apache/arrow-rs/pull/5693) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([netthier](https://github.com/netthier)) +- Add more attributes to `object_store::Attribute` [\#5690](https://github.com/apache/arrow-rs/pull/5690) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([netthier](https://github.com/netthier)) + + +## [object_store_0.10.0](https://github.com/apache/arrow-rs/tree/object_store_0.10.0) (2024-04-17) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.9.1...object_store_0.10.0) + +**Breaking changes:** + +- Add put\_multipart\_opts \(\#5435\) [\#5652](https://github.com/apache/arrow-rs/pull/5652) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add Attributes API \(\#5329\) [\#5650](https://github.com/apache/arrow-rs/pull/5650) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Support non-contiguous put payloads / vectored writes \(\#5514\) [\#5538](https://github.com/apache/arrow-rs/pull/5538) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Replace AsyncWrite with Upload trait and rename MultiPartStore to MultipartStore \(\#5458\) [\#5500](https://github.com/apache/arrow-rs/pull/5500) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Improve Retry Coverage [\#5608](https://github.com/apache/arrow-rs/issues/5608) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Zero Copy Support [\#5593](https://github.com/apache/arrow-rs/issues/5593) +- ObjectStore bulk delete [\#5591](https://github.com/apache/arrow-rs/issues/5591) +- Retry on Broken Connection [\#5589](https://github.com/apache/arrow-rs/issues/5589) +- Inconsistent Multipart Nomenclature [\#5526](https://github.com/apache/arrow-rs/issues/5526) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[ObjectStore\] Non-Contiguous Write Payloads [\#5514](https://github.com/apache/arrow-rs/issues/5514) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- In Object Store, return version & etag on multipart put. [\#5443](https://github.com/apache/arrow-rs/issues/5443) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Release Object Store 0.9.1 [\#5436](https://github.com/apache/arrow-rs/issues/5436) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: allow setting content-type per request [\#5329](https://github.com/apache/arrow-rs/issues/5329) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- GCS Signed URL Support [\#5233](https://github.com/apache/arrow-rs/issues/5233) + +**Fixed bugs:** + +- \[object\_store\] minor bug: typos present in local variable [\#5628](https://github.com/apache/arrow-rs/issues/5628) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[arrow-csv\] Schema inference requires csv on disk [\#5551](https://github.com/apache/arrow-rs/issues/5551) +- Local object store copy/rename with nonexistent `from` file loops forever instead of erroring [\#5503](https://github.com/apache/arrow-rs/issues/5503) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object store ApplicationDefaultCredentials auth is not working on windows [\#5466](https://github.com/apache/arrow-rs/issues/5466) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- MicrosoftAzure store list result omits empty objects [\#5451](https://github.com/apache/arrow-rs/issues/5451) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Documentation updates:** + +- Minor: add additional documentation about `BufWriter` [\#5519](https://github.com/apache/arrow-rs/pull/5519) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + +**Merged pull requests:** + +- minor-fix: removed typos in object\_store sub crate [\#5629](https://github.com/apache/arrow-rs/pull/5629) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Silemo](https://github.com/Silemo)) +- Retry on More Error Classes [\#5609](https://github.com/apache/arrow-rs/pull/5609) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([andrebsguedes](https://github.com/andrebsguedes)) +- Fix handling of empty multipart uploads for GCS [\#5590](https://github.com/apache/arrow-rs/pull/5590) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Upgrade object\_store dependency to use chrono `0.4.34` [\#5578](https://github.com/apache/arrow-rs/pull/5578) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([l1nxy](https://github.com/l1nxy)) +- Fix Latest Clippy Lints for object\_store [\#5546](https://github.com/apache/arrow-rs/pull/5546) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update reqwest 0.12 and http 1.0 [\#5536](https://github.com/apache/arrow-rs/pull/5536) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Implement MultipartStore for ThrottledStore [\#5533](https://github.com/apache/arrow-rs/pull/5533) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- fix: copy/rename return error if source is nonexistent [\#5528](https://github.com/apache/arrow-rs/pull/5528) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dimbtp](https://github.com/dimbtp)) +- Prepare arrow 51.0.0 [\#5516](https://github.com/apache/arrow-rs/pull/5516) ([tustvold](https://github.com/tustvold)) +- Implement MultiPartStore for InMemory [\#5495](https://github.com/apache/arrow-rs/pull/5495) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add more comprehensive documentation on testing and benchmarking to CONTRIBUTING.md [\#5478](https://github.com/apache/arrow-rs/pull/5478) ([monkwire](https://github.com/monkwire)) +- add support for gcp application default auth on windows in object store [\#5473](https://github.com/apache/arrow-rs/pull/5473) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Itayazolay](https://github.com/Itayazolay)) +- Update base64 requirement from 0.21 to 0.22 in /object\_store [\#5465](https://github.com/apache/arrow-rs/pull/5465) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Uses ResourceType for filtering list directories instead of workaround [\#5452](https://github.com/apache/arrow-rs/pull/5452) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([andrebsguedes](https://github.com/andrebsguedes)) +- Add GCS signed URL support [\#5300](https://github.com/apache/arrow-rs/pull/5300) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([l1nxy](https://github.com/l1nxy)) + +## [object_store_0.9.1](https://github.com/apache/arrow-rs/tree/object_store_0.9.1) (2024-03-01) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.9.0...object_store_0.9.1) + +**Implemented enhancements:** + +- \[object\_store\] Enable anonymous read access for Azure [\#5424](https://github.com/apache/arrow-rs/issues/5424) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support for additional URL formats in object\_store for Azure blob [\#5370](https://github.com/apache/arrow-rs/issues/5370) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Mention "Http" support in README [\#5320](https://github.com/apache/arrow-rs/issues/5320) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Pass Options to HttpBuilder in parse\_url\_opts [\#5310](https://github.com/apache/arrow-rs/issues/5310) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Remove Localstack DynamoDB Workaround Once Fixed Upstream [\#5267](https://github.com/apache/arrow-rs/issues/5267) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Can I use S3 server side encryption [\#5087](https://github.com/apache/arrow-rs/issues/5087) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- delete\_stream fails in MinIO [\#5414](https://github.com/apache/arrow-rs/issues/5414) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object\_store\] Completing an empty Multipart Upload fails for AWS S3 [\#5404](https://github.com/apache/arrow-rs/issues/5404) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Multipart upload can leave futures unpolled, leading to timeout [\#5366](https://github.com/apache/arrow-rs/issues/5366) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Broken Link in README \(Rust Object Store\) Content [\#5309](https://github.com/apache/arrow-rs/issues/5309) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Expose path\_to\_filesystem public [\#5441](https://github.com/apache/arrow-rs/pull/5441) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([metesynnada](https://github.com/metesynnada)) +- Update nix requirement from 0.27.1 to 0.28.0 in /object\_store [\#5432](https://github.com/apache/arrow-rs/pull/5432) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add BufWriter for Adapative Put / Multipart Upload [\#5431](https://github.com/apache/arrow-rs/pull/5431) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Enable anonymous access for MicrosoftAzure [\#5425](https://github.com/apache/arrow-rs/pull/5425) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([andrebsguedes](https://github.com/andrebsguedes)) +- fix\(object\_store\): Include Content-MD5 header for S3 DeleteObjects [\#5415](https://github.com/apache/arrow-rs/pull/5415) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([paraseba](https://github.com/paraseba)) +- docds\(object\_store\): Mention HTTP/WebDAV in README [\#5409](https://github.com/apache/arrow-rs/pull/5409) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- \[object\_store\] Fix empty Multipart Upload for AWS S3 [\#5405](https://github.com/apache/arrow-rs/pull/5405) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([andrebsguedes](https://github.com/andrebsguedes)) +- feat: S3 server-side encryption [\#5402](https://github.com/apache/arrow-rs/pull/5402) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- Pull container name from URL for Azure blob [\#5371](https://github.com/apache/arrow-rs/pull/5371) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([bradvoth](https://github.com/bradvoth)) +- docs\(object-store\): add warning to flush [\#5369](https://github.com/apache/arrow-rs/pull/5369) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- Minor\(docs\): update master to main for DataFusion/Ballista [\#5363](https://github.com/apache/arrow-rs/pull/5363) ([caicancai](https://github.com/caicancai)) +- Test parse\_url\_opts for HTTP \(\#5310\) [\#5316](https://github.com/apache/arrow-rs/pull/5316) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update IOx links [\#5312](https://github.com/apache/arrow-rs/pull/5312) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Pass options to HTTPBuilder in parse\_url\_opts \(\#5310\) [\#5311](https://github.com/apache/arrow-rs/pull/5311) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Bump actions/cache from 3 to 4 [\#5308](https://github.com/apache/arrow-rs/pull/5308) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Remove localstack DynamoDB workaround \(\#5267\) [\#5307](https://github.com/apache/arrow-rs/pull/5307) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- refactor: log server error during object store retries [\#5294](https://github.com/apache/arrow-rs/pull/5294) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) +- Prepare arrow 50.0.0 [\#5291](https://github.com/apache/arrow-rs/pull/5291) ([tustvold](https://github.com/tustvold)) +- Enable JS tests again [\#5287](https://github.com/apache/arrow-rs/pull/5287) ([domoritz](https://github.com/domoritz)) + +## [object_store_0.9.0](https://github.com/apache/arrow-rs/tree/object_store_0.9.0) (2024-01-05) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.8.0...object_store_0.9.0) + +**Breaking changes:** + +- Remove deprecated try\_with\_option methods [\#5237](https://github.com/apache/arrow-rs/pull/5237) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- object\_store: full HTTP range support [\#5222](https://github.com/apache/arrow-rs/pull/5222) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([clbarnes](https://github.com/clbarnes)) +- feat\(object\_store\): use http1 by default [\#5204](https://github.com/apache/arrow-rs/pull/5204) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- refactor: change `object_store` CA handling [\#5056](https://github.com/apache/arrow-rs/pull/5056) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum)) + +**Implemented enhancements:** + +- Azure Signed URL Support [\#5232](https://github.com/apache/arrow-rs/issues/5232) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object-store\] Make aws region optional. [\#5211](https://github.com/apache/arrow-rs/issues/5211) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object\_store,gcp\] Document GoogleCloudStorage Default Credentials [\#5187](https://github.com/apache/arrow-rs/issues/5187) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support S3 Express One Zone [\#5140](https://github.com/apache/arrow-rs/issues/5140) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- `object_store`: Allow 403 Forbidden for `copy_if_not_exists` S3 status code [\#5132](https://github.com/apache/arrow-rs/issues/5132) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add `copy_if_not_exists` support for AmazonS3 via DynamoDB Lock Support [\#4880](https://github.com/apache/arrow-rs/issues/4880) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: native certs, w/o webpki-roots [\#4870](https://github.com/apache/arrow-rs/issues/4870) +- object\_store: range request with suffix [\#4611](https://github.com/apache/arrow-rs/issues/4611) + +**Fixed bugs:** + +- ObjectStore::get\_opts Incorrectly Returns Response Size not Object Size [\#5272](https://github.com/apache/arrow-rs/issues/5272) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Single object store has limited throughput on GCS [\#5194](https://github.com/apache/arrow-rs/issues/5194) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- local::tests::invalid\_path fails during object store release verification [\#5035](https://github.com/apache/arrow-rs/issues/5035) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Object Store Doctest Failure with Default Features [\#5025](https://github.com/apache/arrow-rs/issues/5025) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Documentation updates:** + +- Document default value of InstanceCredentialProvider [\#5188](https://github.com/apache/arrow-rs/pull/5188) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([justinabrahms](https://github.com/justinabrahms)) + +**Merged pull requests:** + +- Retry Safe/Read-Only Requests on Timeout [\#5278](https://github.com/apache/arrow-rs/pull/5278) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix ObjectMeta::size for range requests \(\#5272\) [\#5276](https://github.com/apache/arrow-rs/pull/5276) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- docs\(object\_store\): Mention `with_allow_http` in docs of `with_endpoint` [\#5275](https://github.com/apache/arrow-rs/pull/5275) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Xuanwo](https://github.com/Xuanwo)) +- Support S3 Express One Zone [\#5268](https://github.com/apache/arrow-rs/pull/5268) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- feat\(object\_store\): Azure url signing [\#5259](https://github.com/apache/arrow-rs/pull/5259) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- DynamoDB ConditionalPut [\#5247](https://github.com/apache/arrow-rs/pull/5247) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Default AWS region to us-east-1 \(\#5211\) [\#5244](https://github.com/apache/arrow-rs/pull/5244) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- ci: Fail Miri CI on first failure [\#5243](https://github.com/apache/arrow-rs/pull/5243) ([Jefffrey](https://github.com/Jefffrey)) +- Bump actions/upload-pages-artifact from 2 to 3 [\#5229](https://github.com/apache/arrow-rs/pull/5229) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-python from 4 to 5 [\#5175](https://github.com/apache/arrow-rs/pull/5175) ([dependabot[bot]](https://github.com/apps/dependabot)) +- fix: ensure take\_fixed\_size\_list can handle null indices [\#5170](https://github.com/apache/arrow-rs/pull/5170) ([westonpace](https://github.com/westonpace)) +- Bump actions/labeler from 4.3.0 to 5.0.0 [\#5167](https://github.com/apache/arrow-rs/pull/5167) ([dependabot[bot]](https://github.com/apps/dependabot)) +- object\_store: fix failing doctest with default features [\#5161](https://github.com/apache/arrow-rs/pull/5161) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Jefffrey](https://github.com/Jefffrey)) +- Update rustls-pemfile requirement from 1.0 to 2.0 in /object\_store [\#5155](https://github.com/apache/arrow-rs/pull/5155) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Allow 403 for overwrite prevention [\#5134](https://github.com/apache/arrow-rs/pull/5134) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([emcake](https://github.com/emcake)) +- Fix ObjectStore.LocalFileSystem.put\_opts for blobfuse [\#5094](https://github.com/apache/arrow-rs/pull/5094) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([RobinLin666](https://github.com/RobinLin666)) +- Update itertools requirement from 0.11.0 to 0.12.0 in /object\_store [\#5077](https://github.com/apache/arrow-rs/pull/5077) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add a PR under "Breaking changes" in the object\_store 0.8.0 changelog [\#5063](https://github.com/apache/arrow-rs/pull/5063) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([carols10cents](https://github.com/carols10cents)) +- Prepare arrow 49.0.0 [\#5054](https://github.com/apache/arrow-rs/pull/5054) ([tustvold](https://github.com/tustvold)) +- Fix invalid\_path test [\#5026](https://github.com/apache/arrow-rs/pull/5026) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Implement `copy_if_not_exist` for `AmazonS3` using DynamoDB \(\#4880\) [\#4918](https://github.com/apache/arrow-rs/pull/4918) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +## [object_store_0.8.0](https://github.com/apache/arrow-rs/tree/object_store_0.8.0) (2023-11-02) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.7.1...object_store_0.8.0) + +**Breaking changes:** + +- Remove ObjectStore::append [\#5016](https://github.com/apache/arrow-rs/pull/5016) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Don't panic on invalid Azure access key \(\#4972\) [\#4974](https://github.com/apache/arrow-rs/pull/4974) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Return `PutResult` with an ETag from ObjectStore::put \(\#4934\) [\#4944](https://github.com/apache/arrow-rs/pull/4944) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add ObjectMeta::version and GetOptions::version \(\#4925\) [\#4935](https://github.com/apache/arrow-rs/pull/4935) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add GetOptions::head [\#4931](https://github.com/apache/arrow-rs/pull/4931) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Remove Nested async and Fallibility from ObjectStore::list [\#4930](https://github.com/apache/arrow-rs/pull/4930) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add ObjectStore::put_opts / Conditional Put [\#4879](https://github.com/apache/arrow-rs/pull/4984) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Relax Path Safety on Parse [\#5019](https://github.com/apache/arrow-rs/issues/5019) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- ObjectStore: hard to determine the cause of the error thrown from retry [\#5013](https://github.com/apache/arrow-rs/issues/5013) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- continue existing multi-part upload [\#4961](https://github.com/apache/arrow-rs/issues/4961) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Simplify ObjectStore::List [\#4946](https://github.com/apache/arrow-rs/issues/4946) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Return ETag and Version on Put [\#4934](https://github.com/apache/arrow-rs/issues/4934) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support Not Signing Requests in AmazonS3 [\#4927](https://github.com/apache/arrow-rs/issues/4927) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Get Object By Version [\#4925](https://github.com/apache/arrow-rs/issues/4925) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Plans for supporting Extension Array to support Fixed shape tensor Array [\#4890](https://github.com/apache/arrow-rs/issues/4890) +- Conditional Put Support [\#4879](https://github.com/apache/arrow-rs/issues/4879) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- creates\_dir\_if\_not\_present\_append Test is Flaky [\#4872](https://github.com/apache/arrow-rs/issues/4872) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Release object\_store `0.7.1` [\#4858](https://github.com/apache/arrow-rs/issues/4858) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support User-Defined Object Metadata [\#4754](https://github.com/apache/arrow-rs/issues/4754) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- APIs for directly managing multi-part uploads and saving potential parquet footers [\#4608](https://github.com/apache/arrow-rs/issues/4608) + +**Fixed bugs:** + +- ObjectStore parse\_url Incorrectly Handles URLs with Spaces [\#5017](https://github.com/apache/arrow-rs/issues/5017) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[objects-store\]: periods/dots error in GCP bucket [\#4991](https://github.com/apache/arrow-rs/issues/4991) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Azure ImdsManagedIdentityProvider does not work in Azure functions [\#4976](https://github.com/apache/arrow-rs/issues/4976) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Panic when using an azure object store with an invalid access key [\#4972](https://github.com/apache/arrow-rs/issues/4972) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Handle Body Errors in AWS CompleteMultipartUpload [\#4965](https://github.com/apache/arrow-rs/issues/4965) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- ObjectStore multiple\_append Test is Flaky [\#4868](https://github.com/apache/arrow-rs/issues/4868) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[objectstore\] Problem with special characters in file path [\#4454](https://github.com/apache/arrow-rs/issues/4454) + +**Closed issues:** + +- Include onelake fabric path for https [\#5000](https://github.com/apache/arrow-rs/issues/5000) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object\_store\] Support generating and using signed upload URLs [\#4763](https://github.com/apache/arrow-rs/issues/4763) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Relax path safety \(\#5019\) [\#5020](https://github.com/apache/arrow-rs/pull/5020) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Decode URL paths \(\#5017\) [\#5018](https://github.com/apache/arrow-rs/pull/5018) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- ObjectStore: make error msg thrown from retry more detailed [\#5012](https://github.com/apache/arrow-rs/pull/5012) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Rachelint](https://github.com/Rachelint)) +- Support onelake fabric paths in parse\_url \(\#5000\) [\#5002](https://github.com/apache/arrow-rs/pull/5002) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Object tagging \(\#4754\) [\#4999](https://github.com/apache/arrow-rs/pull/4999) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- \[MINOR\] No need to jump to web pages [\#4994](https://github.com/apache/arrow-rs/pull/4994) ([smallzhongfeng](https://github.com/smallzhongfeng)) +- Pushdown list\_with\_offset for GCS [\#4993](https://github.com/apache/arrow-rs/pull/4993) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Support bucket name with `.` when parsing GCS URL \(\#4991\) [\#4992](https://github.com/apache/arrow-rs/pull/4992) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Increase default timeout to 30 seconds [\#4989](https://github.com/apache/arrow-rs/pull/4989) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Conditional Put \(\#4879\) [\#4984](https://github.com/apache/arrow-rs/pull/4984) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update quick-xml requirement from 0.30.0 to 0.31.0 in /object\_store [\#4983](https://github.com/apache/arrow-rs/pull/4983) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Bump actions/setup-node from 3 to 4 [\#4982](https://github.com/apache/arrow-rs/pull/4982) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Support ImdsManagedIdentityProvider in Azure Functions \(\#4976\) [\#4977](https://github.com/apache/arrow-rs/pull/4977) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add MultiPartStore \(\#4961\) \(\#4608\) [\#4971](https://github.com/apache/arrow-rs/pull/4971) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Split gcp Module [\#4956](https://github.com/apache/arrow-rs/pull/4956) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add module links in docs root [\#4955](https://github.com/apache/arrow-rs/pull/4955) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Prepare arrow 48.0.0 [\#4948](https://github.com/apache/arrow-rs/pull/4948) ([tustvold](https://github.com/tustvold)) +- Allow opting out of request signing \(\#4927\) [\#4929](https://github.com/apache/arrow-rs/pull/4929) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Default connection and request timeouts of 5 seconds [\#4928](https://github.com/apache/arrow-rs/pull/4928) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Support service\_account in ApplicationDefaultCredentials and Use SelfSignedJwt [\#4926](https://github.com/apache/arrow-rs/pull/4926) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Generate `ETag`s for `InMemory` and `LocalFileSystem` \(\#4879\) [\#4922](https://github.com/apache/arrow-rs/pull/4922) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Cleanup `object_store::retry` client error handling [\#4915](https://github.com/apache/arrow-rs/pull/4915) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix integration tests [\#4889](https://github.com/apache/arrow-rs/pull/4889) ([tustvold](https://github.com/tustvold)) +- Support Parsing Avro File Headers [\#4888](https://github.com/apache/arrow-rs/pull/4888) ([tustvold](https://github.com/tustvold)) +- Update ring requirement from 0.16 to 0.17 in /object\_store [\#4887](https://github.com/apache/arrow-rs/pull/4887) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add AWS presigned URL support [\#4876](https://github.com/apache/arrow-rs/pull/4876) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([carols10cents](https://github.com/carols10cents)) +- Flush in creates\_dir\_if\_not\_present\_append \(\#4872\) [\#4874](https://github.com/apache/arrow-rs/pull/4874) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Flush in multiple\_append test \(\#4868\) [\#4869](https://github.com/apache/arrow-rs/pull/4869) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Enable new integration tests \(\#4828\) [\#4862](https://github.com/apache/arrow-rs/pull/4862) ([tustvold](https://github.com/tustvold)) + +## [object_store_0.7.1](https://github.com/apache/arrow-rs/tree/object_store_0.7.1) (2023-09-26) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.7.0...object_store_0.7.1) + +**Implemented enhancements:** + +- Automatically Cleanup LocalFileSystem Temporary Files [\#4778](https://github.com/apache/arrow-rs/issues/4778) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object-store: Expose an async reader API for object store [\#4762](https://github.com/apache/arrow-rs/issues/4762) +- Improve proxy support by using reqwest::Proxy as configuration [\#4713](https://github.com/apache/arrow-rs/issues/4713) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- object-store: http shouldn't perform range requests unless `accept-ranges: bytes` header is present [\#4839](https://github.com/apache/arrow-rs/issues/4839) +- object-store: http-store fails when url doesn't have last-modified header on 0.7.0 [\#4831](https://github.com/apache/arrow-rs/issues/4831) +- object-store fails to compile for `wasm32-unknown-unknown` with `http` feature [\#4776](https://github.com/apache/arrow-rs/issues/4776) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object-store: could not find `header` in `client` for `http` feature [\#4775](https://github.com/apache/arrow-rs/issues/4775) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- LocalFileSystem Copy and Rename Don't Create Intermediate Directories [\#4760](https://github.com/apache/arrow-rs/issues/4760) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- LocalFileSystem Copy is not Atomic [\#4758](https://github.com/apache/arrow-rs/issues/4758) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Closed issues:** + +- object\_store Azure Government Cloud functionality? [\#4853](https://github.com/apache/arrow-rs/issues/4853) + +**Merged pull requests:** + +- Add ObjectStore BufReader \(\#4762\) [\#4857](https://github.com/apache/arrow-rs/pull/4857) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Allow overriding azure endpoint [\#4854](https://github.com/apache/arrow-rs/pull/4854) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Minor: Improve object\_store docs.rs landing page [\#4849](https://github.com/apache/arrow-rs/pull/4849) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Error if Remote Ignores HTTP Range Header [\#4841](https://github.com/apache/arrow-rs/pull/4841) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([universalmind303](https://github.com/universalmind303)) +- Perform HEAD request for HttpStore::head [\#4837](https://github.com/apache/arrow-rs/pull/4837) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- fix: object store http header last modified [\#4834](https://github.com/apache/arrow-rs/pull/4834) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([universalmind303](https://github.com/universalmind303)) +- Prepare arrow 47.0.0 [\#4827](https://github.com/apache/arrow-rs/pull/4827) ([tustvold](https://github.com/tustvold)) +- ObjectStore Wasm32 Fixes \(\#4775\) \(\#4776\) [\#4796](https://github.com/apache/arrow-rs/pull/4796) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Best effort cleanup of staged upload files \(\#4778\) [\#4792](https://github.com/apache/arrow-rs/pull/4792) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Relaxing type bounds on coalesce\_ranges and collect\_bytes [\#4787](https://github.com/apache/arrow-rs/pull/4787) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([sumerman](https://github.com/sumerman)) +- Update object\_store chrono deprecations [\#4786](https://github.com/apache/arrow-rs/pull/4786) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Make coalesce\_ranges and collect\_bytes available for crate users [\#4784](https://github.com/apache/arrow-rs/pull/4784) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([sumerman](https://github.com/sumerman)) +- Bump actions/checkout from 3 to 4 [\#4767](https://github.com/apache/arrow-rs/pull/4767) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Make ObjectStore::copy Atomic and Automatically Create Parent Directories \(\#4758\) \(\#4760\) [\#4759](https://github.com/apache/arrow-rs/pull/4759) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update nix requirement from 0.26.1 to 0.27.1 in /object\_store [\#4744](https://github.com/apache/arrow-rs/pull/4744) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([viirya](https://github.com/viirya)) +- Add `with_proxy_ca_certificate` and `with_proxy_excludes` [\#4714](https://github.com/apache/arrow-rs/pull/4714) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([gordonwang0](https://github.com/gordonwang0)) +- Update object\_store Dependencies and Configure Dependabot [\#4700](https://github.com/apache/arrow-rs/pull/4700) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +## [object_store_0.7.0](https://github.com/apache/arrow-rs/tree/object_store_0.7.0) (2023-08-15) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.6.1...object_store_0.7.0) + +**Breaking changes:** + +- Add range and ObjectMeta to GetResult \(\#4352\) \(\#4495\) [\#4677](https://github.com/apache/arrow-rs/pull/4677) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- Add AzureConfigKey::ContainerName [\#4629](https://github.com/apache/arrow-rs/issues/4629) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: multipart ranges for HTTP [\#4612](https://github.com/apache/arrow-rs/issues/4612) +- Make object\_store::multipart public [\#4569](https://github.com/apache/arrow-rs/issues/4569) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Export `ClientConfigKey` and make the `HttpBuilder` more consistent with other builders [\#4515](https://github.com/apache/arrow-rs/issues/4515) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store/InMemory: Make `clone()` non-async [\#4496](https://github.com/apache/arrow-rs/issues/4496) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add Range to GetResult::File [\#4352](https://github.com/apache/arrow-rs/issues/4352) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support copy\_if\_not\_exists for Cloudflare R2 \(S3 API\) [\#4190](https://github.com/apache/arrow-rs/issues/4190) + +**Fixed bugs:** + +- object\_store documentation is broken [\#4683](https://github.com/apache/arrow-rs/issues/4683) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Exports are not sufficient for configuring some object stores, for example minio running locally [\#4530](https://github.com/apache/arrow-rs/issues/4530) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Uploading empty file to S3 results in "411 Length Required" [\#4514](https://github.com/apache/arrow-rs/issues/4514) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- GCP doesn't fetch public objects [\#4417](https://github.com/apache/arrow-rs/issues/4417) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Closed issues:** + +- \[object\_store\] when Create a AmazonS3 instance work with MinIO without set endpoint got error MissingRegion [\#4617](https://github.com/apache/arrow-rs/issues/4617) +- AWS Profile credentials no longer working in object\_store 0.6.1 [\#4556](https://github.com/apache/arrow-rs/issues/4556) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Add AzureConfigKey::ContainerName \(\#4629\) [\#4686](https://github.com/apache/arrow-rs/pull/4686) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix MSRV CI [\#4671](https://github.com/apache/arrow-rs/pull/4671) ([tustvold](https://github.com/tustvold)) +- Use Config System for Object Store Integration Tests [\#4628](https://github.com/apache/arrow-rs/pull/4628) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Prepare arrow 45 [\#4590](https://github.com/apache/arrow-rs/pull/4590) ([tustvold](https://github.com/tustvold)) +- Add Support for Microsoft Fabric / OneLake [\#4573](https://github.com/apache/arrow-rs/pull/4573) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vmuddassir-msft](https://github.com/vmuddassir-msft)) +- Cleanup multipart upload trait [\#4572](https://github.com/apache/arrow-rs/pull/4572) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Make object\_store::multipart public [\#4570](https://github.com/apache/arrow-rs/pull/4570) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([yjshen](https://github.com/yjshen)) +- Handle empty S3 payloads \(\#4514\) [\#4518](https://github.com/apache/arrow-rs/pull/4518) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- object\_store: Export `ClientConfigKey` and add `HttpBuilder::with_config` [\#4516](https://github.com/apache/arrow-rs/pull/4516) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([thehabbos007](https://github.com/thehabbos007)) +- object\_store: Implement `ObjectStore` for `Arc` [\#4502](https://github.com/apache/arrow-rs/pull/4502) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87)) +- object\_store/InMemory: Add `fork()` fn and deprecate `clone()` fn [\#4499](https://github.com/apache/arrow-rs/pull/4499) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87)) +- Bump actions/deploy-pages from 1 to 2 [\#4449](https://github.com/apache/arrow-rs/pull/4449) ([dependabot[bot]](https://github.com/apps/dependabot)) +- gcp: Exclude authorization header when bearer empty [\#4418](https://github.com/apache/arrow-rs/pull/4418) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vrongmeal](https://github.com/vrongmeal)) +- Support copy\_if\_not\_exists for Cloudflare R2 \(\#4190\) [\#4239](https://github.com/apache/arrow-rs/pull/4239) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +## [object_store_0.6.0](https://github.com/apache/arrow-rs/tree/object_store_0.6.0) (2023-05-18) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.6...object_store_0.6.0) + +**Breaking changes:** + +- Add ObjectStore::get\_opts \(\#2241\) [\#4212](https://github.com/apache/arrow-rs/pull/4212) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Simplify ObjectStore configuration pattern [\#4189](https://github.com/apache/arrow-rs/pull/4189) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- object\_store: fix: Incorrect parsing of https Path Style S3 url [\#4082](https://github.com/apache/arrow-rs/pull/4082) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- feat: add etag for objectMeta [\#3937](https://github.com/apache/arrow-rs/pull/3937) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Weijun-H](https://github.com/Weijun-H)) + +**Implemented enhancements:** + +- Object Store Authorization [\#4223](https://github.com/apache/arrow-rs/issues/4223) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Use XML API for GCS [\#4209](https://github.com/apache/arrow-rs/issues/4209) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- ObjectStore with\_url Should Handle Path [\#4199](https://github.com/apache/arrow-rs/issues/4199) +- Return Error on Invalid Config Value [\#4191](https://github.com/apache/arrow-rs/issues/4191) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Extensible ObjectStore Authentication [\#4163](https://github.com/apache/arrow-rs/issues/4163) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: When using an AWS profile, obtain the default AWS region from the active profile [\#4158](https://github.com/apache/arrow-rs/issues/4158) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- InMemory append API [\#4152](https://github.com/apache/arrow-rs/issues/4152) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support accessing ipc Reader/Writer inner by reference [\#4121](https://github.com/apache/arrow-rs/issues/4121) +- \[object\_store\] Retry requests on connection error [\#4119](https://github.com/apache/arrow-rs/issues/4119) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Instantiate object store from provided url with store options [\#4047](https://github.com/apache/arrow-rs/issues/4047) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Builders \(S3/Azure/GCS\) are missing the `get method` to get the actual configuration information [\#4021](https://github.com/apache/arrow-rs/issues/4021) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- ObjectStore::head Returns Directory for LocalFileSystem and Hierarchical Azure [\#4230](https://github.com/apache/arrow-rs/issues/4230) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: different behavior from aws cli for default profile [\#4137](https://github.com/apache/arrow-rs/issues/4137) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- ImdsManagedIdentityOAuthProvider should send resource ID instead of OIDC scope [\#4096](https://github.com/apache/arrow-rs/issues/4096) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Update readme to remove reference to Jira [\#4091](https://github.com/apache/arrow-rs/issues/4091) +- object\_store: Incorrect parsing of https Path Style S3 url [\#4078](https://github.com/apache/arrow-rs/issues/4078) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- \[object\_store\] `local::tests::test_list_root` test fails during release verification [\#3772](https://github.com/apache/arrow-rs/issues/3772) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Remove AWS\_PROFILE support [\#4238](https://github.com/apache/arrow-rs/pull/4238) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Expose AwsAuthorizer [\#4237](https://github.com/apache/arrow-rs/pull/4237) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Expose CredentialProvider [\#4235](https://github.com/apache/arrow-rs/pull/4235) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Return NotFound for directories in Head and Get \(\#4230\) [\#4231](https://github.com/apache/arrow-rs/pull/4231) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Standardise credentials API \(\#4223\) \(\#4163\) [\#4225](https://github.com/apache/arrow-rs/pull/4225) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Extract Common Listing and Retrieval Functionality [\#4220](https://github.com/apache/arrow-rs/pull/4220) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- feat\(object-store\): extend Options API for http client [\#4208](https://github.com/apache/arrow-rs/pull/4208) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Consistently use GCP XML API [\#4207](https://github.com/apache/arrow-rs/pull/4207) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Implement list\_with\_offset for PrefixStore [\#4203](https://github.com/apache/arrow-rs/pull/4203) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Allow setting ClientOptions with Options API [\#4202](https://github.com/apache/arrow-rs/pull/4202) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Create ObjectStore from URL and Options \(\#4047\) [\#4200](https://github.com/apache/arrow-rs/pull/4200) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Skip test\_list\_root on OS X \(\#3772\) [\#4198](https://github.com/apache/arrow-rs/pull/4198) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Recognise R2 URLs for S3 object store \(\#4190\) [\#4194](https://github.com/apache/arrow-rs/pull/4194) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix ImdsManagedIdentityProvider \(\#4096\) [\#4193](https://github.com/apache/arrow-rs/pull/4193) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Deffered Object Store Config Parsing \(\#4191\) [\#4192](https://github.com/apache/arrow-rs/pull/4192) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Object Store \(AWS\): Support dynamically resolving S3 bucket region [\#4188](https://github.com/apache/arrow-rs/pull/4188) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mr-brobot](https://github.com/mr-brobot)) +- Faster prefix match in object\_store path handling [\#4164](https://github.com/apache/arrow-rs/pull/4164) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Object Store \(AWS\): Support region configured via named profile [\#4161](https://github.com/apache/arrow-rs/pull/4161) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mr-brobot](https://github.com/mr-brobot)) +- InMemory append API [\#4153](https://github.com/apache/arrow-rs/pull/4153) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([berkaysynnada](https://github.com/berkaysynnada)) +- docs: fix the wrong ln command in CONTRIBUTING.md [\#4139](https://github.com/apache/arrow-rs/pull/4139) ([SteveLauC](https://github.com/SteveLauC)) +- Display the file path in the error message when failed to open credentials file for GCS [\#4124](https://github.com/apache/arrow-rs/pull/4124) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([haoxins](https://github.com/haoxins)) +- Retry on Connection Errors [\#4120](https://github.com/apache/arrow-rs/pull/4120) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kindly](https://github.com/kindly)) +- Simplify reference to GitHub issues [\#4092](https://github.com/apache/arrow-rs/pull/4092) ([bkmgit](https://github.com/bkmgit)) +- Use reqwest build\_split [\#4039](https://github.com/apache/arrow-rs/pull/4039) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix object\_store CI [\#4037](https://github.com/apache/arrow-rs/pull/4037) ([tustvold](https://github.com/tustvold)) +- Add get\_config\_value to AWS/Azure/GCP Builders [\#4035](https://github.com/apache/arrow-rs/pull/4035) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([r4ntix](https://github.com/r4ntix)) +- Update AWS SDK [\#3993](https://github.com/apache/arrow-rs/pull/3993) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +## [object_store_0.5.6](https://github.com/apache/arrow-rs/tree/object_store_0.5.6) (2023-03-30) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.5...object_store_0.5.6) + +**Implemented enhancements:** + +- Document ObjectStore::list Ordering [\#3975](https://github.com/apache/arrow-rs/issues/3975) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add option to start listing at a particular key [\#3970](https://github.com/apache/arrow-rs/issues/3970) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Implement `ObjectStore` for trait objects [\#3865](https://github.com/apache/arrow-rs/issues/3865) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add ObjectStore::append [\#3790](https://github.com/apache/arrow-rs/issues/3790) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Make `InMemory` object store track last modified time for each entry [\#3782](https://github.com/apache/arrow-rs/issues/3782) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support Unsigned S3 Payloads [\#3737](https://github.com/apache/arrow-rs/issues/3737) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add Content-MD5 or checksum header for using an Object Locked S3 [\#3725](https://github.com/apache/arrow-rs/issues/3725) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- LocalFileSystem::put is not Atomic [\#3780](https://github.com/apache/arrow-rs/issues/3780) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Add ObjectStore::list\_with\_offset \(\#3970\) [\#3973](https://github.com/apache/arrow-rs/pull/3973) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Remove incorrect validation logic on S3 bucket names [\#3947](https://github.com/apache/arrow-rs/pull/3947) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([rtyler](https://github.com/rtyler)) +- Prepare arrow 36 [\#3935](https://github.com/apache/arrow-rs/pull/3935) ([tustvold](https://github.com/tustvold)) +- fix: Specify content length for gcp copy request [\#3921](https://github.com/apache/arrow-rs/pull/3921) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([scsmithr](https://github.com/scsmithr)) +- Revert structured ArrayData \(\#3877\) [\#3894](https://github.com/apache/arrow-rs/pull/3894) ([tustvold](https://github.com/tustvold)) +- Add support for checksum algorithms in AWS [\#3873](https://github.com/apache/arrow-rs/pull/3873) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([trueleo](https://github.com/trueleo)) +- Rename PrefixObjectStore to PrefixStore [\#3870](https://github.com/apache/arrow-rs/pull/3870) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Implement append for LimitStore, PrefixObjectStore, ThrottledStore [\#3869](https://github.com/apache/arrow-rs/pull/3869) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Supporting metadata fetch without open file read mode [\#3868](https://github.com/apache/arrow-rs/pull/3868) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([metesynnada](https://github.com/metesynnada)) +- Impl ObjectStore for trait object [\#3866](https://github.com/apache/arrow-rs/pull/3866) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Kinrany](https://github.com/Kinrany)) +- Update quick-xml requirement from 0.27.0 to 0.28.0 [\#3857](https://github.com/apache/arrow-rs/pull/3857) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Update changelog for 35.0.0 [\#3843](https://github.com/apache/arrow-rs/pull/3843) ([tustvold](https://github.com/tustvold)) +- Cleanup ApplicationDefaultCredentials [\#3799](https://github.com/apache/arrow-rs/pull/3799) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Make InMemory object store track last modified time for each entry [\#3796](https://github.com/apache/arrow-rs/pull/3796) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Weijun-H](https://github.com/Weijun-H)) +- Add ObjectStore::append [\#3791](https://github.com/apache/arrow-rs/pull/3791) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Make LocalFileSystem::put atomic \(\#3780\) [\#3781](https://github.com/apache/arrow-rs/pull/3781) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add support for unsigned payloads in aws [\#3741](https://github.com/apache/arrow-rs/pull/3741) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([trueleo](https://github.com/trueleo)) + +## [object_store_0.5.5](https://github.com/apache/arrow-rs/tree/object_store_0.5.5) (2023-02-27) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.4...object_store_0.5.5) + +**Implemented enhancements:** + +- object\_store: support azure cli credential [\#3697](https://github.com/apache/arrow-rs/issues/3697) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: support encoded path as input [\#3651](https://github.com/apache/arrow-rs/issues/3651) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- object-store: aws\_profile fails to load static credentials [\#3765](https://github.com/apache/arrow-rs/issues/3765) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Inconsistent Behaviour Listing File [\#3712](https://github.com/apache/arrow-rs/issues/3712) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: bearer token is azure is used like access key [\#3696](https://github.com/apache/arrow-rs/issues/3696) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- object-store: fix handling of AWS profile credentials without expiry [\#3766](https://github.com/apache/arrow-rs/pull/3766) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([helmus](https://github.com/helmus)) +- update object\_store deps to patch potential security vulnerabilities [\#3761](https://github.com/apache/arrow-rs/pull/3761) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([spencerbart](https://github.com/spencerbart)) +- Filter exact list prefix matches for azure gen2 accounts [\#3714](https://github.com/apache/arrow-rs/pull/3714) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Filter exact list prefix matches for MemoryStore and HttpStore \(\#3712\) [\#3713](https://github.com/apache/arrow-rs/pull/3713) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- object\_store: azure cli authorization [\#3698](https://github.com/apache/arrow-rs/pull/3698) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- object\_store: add Path::from\_url\_path [\#3663](https://github.com/apache/arrow-rs/pull/3663) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jychen7](https://github.com/jychen7)) + +## [object_store_0.5.4](https://github.com/apache/arrow-rs/tree/object_store_0.5.4) (2023-01-30) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.3...object_store_0.5.4) + +**Implemented enhancements:** + +- \[object\_store\] support more identity based auth flows for azure [\#3580](https://github.com/apache/arrow-rs/issues/3580) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Implement workload identity and application default credentials for GCP object store. [\#3533](https://github.com/apache/arrow-rs/issues/3533) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support GCP Workload Identity [\#3490](https://github.com/apache/arrow-rs/issues/3490) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Allow providing service account key directly when building GCP object store client [\#3488](https://github.com/apache/arrow-rs/issues/3488) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Closed issues:** + +- object\_store: temporary aws credentials not refreshed? [\#3446](https://github.com/apache/arrow-rs/issues/3446) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Final tweaks to 32.0.0 changelog [\#3618](https://github.com/apache/arrow-rs/pull/3618) ([tustvold](https://github.com/tustvold)) +- Update AWS SDK [\#3617](https://github.com/apache/arrow-rs/pull/3617) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add ClientOption.allow\_insecure [\#3600](https://github.com/apache/arrow-rs/pull/3600) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([poelzi](https://github.com/poelzi)) +- \[object\_store\] support azure managed and workload identities [\#3581](https://github.com/apache/arrow-rs/pull/3581) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Additional GCP authentication [\#3541](https://github.com/apache/arrow-rs/pull/3541) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([winding-lines](https://github.com/winding-lines)) +- Update aws-config and aws-types requirements from 0.52 to 0.53 [\#3539](https://github.com/apache/arrow-rs/pull/3539) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([viirya](https://github.com/viirya)) +- Use GHA concurrency groups \(\#3495\) [\#3538](https://github.com/apache/arrow-rs/pull/3538) ([tustvold](https://github.com/tustvold)) +- Remove azurite test exception [\#3497](https://github.com/apache/arrow-rs/pull/3497) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- feat: Allow providing a service account key directly for GCS [\#3489](https://github.com/apache/arrow-rs/pull/3489) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([scsmithr](https://github.com/scsmithr)) + +## [object_store_0.5.3](https://github.com/apache/arrow-rs/tree/object_store_0.5.3) (2023-01-04) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.2...object_store_0.5.3) + +**Implemented enhancements:** + +- Derive Clone for the builders in object-store. [\#3419](https://github.com/apache/arrow-rs/issues/3419) +- Add a constant prefix object store wrapper [\#3328](https://github.com/apache/arrow-rs/issues/3328) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add support for content-type while uploading files through ObjectStore API [\#3300](https://github.com/apache/arrow-rs/issues/3300) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add HttpStore [\#3294](https://github.com/apache/arrow-rs/issues/3294) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Add support for Azure Data Lake Storage Gen2 \(aka: ADLS Gen2\) in Object Store library [\#3283](https://github.com/apache/arrow-rs/issues/3283) +- object\_store: Add Put and Multipart Upload Doc Examples [\#2863](https://github.com/apache/arrow-rs/issues/2863) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Closed issues:** + +- Only flush buffered multi-part data on poll\_shutdown not on poll\_flush [\#3390](https://github.com/apache/arrow-rs/issues/3390) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- object\_store: builder configuration api [\#3436](https://github.com/apache/arrow-rs/pull/3436) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Derive Clone for ObjectStore builders and Make URL Parsing Stricter \(\#3419\) [\#3424](https://github.com/apache/arrow-rs/pull/3424) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add Put and Multipart Put doc examples [\#3420](https://github.com/apache/arrow-rs/pull/3420) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([GeauxEric](https://github.com/GeauxEric)) +- object\_store: update localstack instructions [\#3403](https://github.com/apache/arrow-rs/pull/3403) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- object\_store: Flush buffered multipart only during poll\_shutdown [\#3397](https://github.com/apache/arrow-rs/pull/3397) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([askoa](https://github.com/askoa)) +- Update quick-xml to 0.27 [\#3395](https://github.com/apache/arrow-rs/pull/3395) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add HttpStore \(\#3294\) [\#3380](https://github.com/apache/arrow-rs/pull/3380) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- add support for content-type in `ClientOptions` [\#3358](https://github.com/apache/arrow-rs/pull/3358) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker)) +- Update AWS SDK [\#3349](https://github.com/apache/arrow-rs/pull/3349) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Upstream newline\_delimited\_stream and ChunkedStore from DataFusion [\#3341](https://github.com/apache/arrow-rs/pull/3341) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- feat\(object\_store\): add PrefixObjectStore [\#3329](https://github.com/apache/arrow-rs/pull/3329) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- feat\(object\_store\): parse well-known storage urls [\#3327](https://github.com/apache/arrow-rs/pull/3327) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Disable getrandom object\_store [\#3278](https://github.com/apache/arrow-rs/pull/3278) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Reload token from AWS\_WEB\_IDENTITY\_TOKEN\_FILE [\#3274](https://github.com/apache/arrow-rs/pull/3274) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Minor: skip aws integration test if TEST\_INTEGRATION is not set [\#3262](https://github.com/apache/arrow-rs/pull/3262) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([viirya](https://github.com/viirya)) + +## [object_store_0.5.2](https://github.com/apache/arrow-rs/tree/object_store_0.5.2) (2022-12-02) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.1...object_store_0.5.2) + +**Implemented enhancements:** + +- Object Store: Allow custom reqwest client [\#3127](https://github.com/apache/arrow-rs/issues/3127) +- socks5 proxy support for the object\_store crate [\#2989](https://github.com/apache/arrow-rs/issues/2989) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Cannot query S3 paths containing whitespace [\#2799](https://github.com/apache/arrow-rs/issues/2799) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- object\_store\(gcp\): GCP complains about content-length for copy [\#3235](https://github.com/apache/arrow-rs/issues/3235) +- object\_store\(aws\): EntityTooSmall error on multi-part upload [\#3233](https://github.com/apache/arrow-rs/issues/3233) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Add more ClientConfig Options for Object Store RequestBuilder \(\#3127\) [\#3256](https://github.com/apache/arrow-rs/pull/3256) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add ObjectStore ClientConfig [\#3252](https://github.com/apache/arrow-rs/pull/3252) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- fix\(object\_store,gcp\): test copy\_if\_not\_exist [\#3236](https://github.com/apache/arrow-rs/pull/3236) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- fix\(object\_store,aws,gcp\): multipart upload enforce size limit of 5 MiB not 5MB [\#3234](https://github.com/apache/arrow-rs/pull/3234) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- object\_store: add support for using proxy\_url for connection testing [\#3109](https://github.com/apache/arrow-rs/pull/3109) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([sum12](https://github.com/sum12)) +- Update AWS SDK [\#2974](https://github.com/apache/arrow-rs/pull/2974) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update quick-xml requirement from 0.25.0 to 0.26.0 [\#2918](https://github.com/apache/arrow-rs/pull/2918) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot)) +- Support building object_store and parquet on wasm32-unknown-unknown target [\#2896](https://github.com/apache/arrow-rs/pull/2899) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jondo2010](https://github.com/jondo2010)) +- Add experimental AWS\_PROFILE support \(\#2178\) [\#2891](https://github.com/apache/arrow-rs/pull/2891) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +## [object_store_0.5.1](https://github.com/apache/arrow-rs/tree/object_store_0.5.1) (2022-10-04) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.0...object_store_0.5.1) + +**Implemented enhancements:** + +- Allow HTTP S3 URLs [\#2806](https://github.com/apache/arrow-rs/issues/2806) +- object\_store: support AWS ECS instance credentials [\#2802](https://github.com/apache/arrow-rs/issues/2802) +- Object Store S3 Alibaba Cloud OSS support [\#2777](https://github.com/apache/arrow-rs/issues/2777) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Expose option to use GCS object store in integration tests [\#2627](https://github.com/apache/arrow-rs/issues/2627) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- S3 Signature Error Performing List With Prefix Containing Spaces [\#2800](https://github.com/apache/arrow-rs/issues/2800) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Erratic Behaviour if Incorrect S3 Region Configured [\#2795](https://github.com/apache/arrow-rs/issues/2795) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Merged pull requests:** + +- Support for overriding instance metadata endpoint [\#2811](https://github.com/apache/arrow-rs/pull/2811) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([wjones127](https://github.com/wjones127)) +- Allow Configuring non-TLS HTTP Connections in AmazonS3Builder::from\_env [\#2807](https://github.com/apache/arrow-rs/pull/2807) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([avantgardnerio](https://github.com/avantgardnerio)) +- Fix S3 query canonicalization \(\#2800\) [\#2801](https://github.com/apache/arrow-rs/pull/2801) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Handle incomplete HTTP redirects missing LOCATION \(\#2795\) [\#2796](https://github.com/apache/arrow-rs/pull/2796) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Handle S3 virtual host request type [\#2782](https://github.com/apache/arrow-rs/pull/2782) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([askoa](https://github.com/askoa)) +- Fix object\_store multipart uploads on S3 Compatible Stores [\#2731](https://github.com/apache/arrow-rs/pull/2731) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mildbyte](https://github.com/mildbyte)) + + +## [object_store_0.5.0](https://github.com/apache/arrow-rs/tree/object_store_0.5.0) (2022-09-08) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.4.0...object_store_0.5.0) + +**Breaking changes:** + +- Replace azure sdk with custom implementation [\#2509](https://github.com/apache/arrow-rs/pull/2509) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) +- Replace rusoto with custom implementation for AWS \(\#2176\) [\#2352](https://github.com/apache/arrow-rs/pull/2352) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) + +**Implemented enhancements:** + +- IMDSv1 Fallback for S3 [\#2609](https://github.com/apache/arrow-rs/issues/2609) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Print Response Body On Error [\#2572](https://github.com/apache/arrow-rs/issues/2572) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Coalesce Ranges Parallel Fetch [\#2562](https://github.com/apache/arrow-rs/issues/2562) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support Coalescing Out-of-Order Ranges [\#2561](https://github.com/apache/arrow-rs/issues/2561) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Add TokenProvider authorization to azure [\#2373](https://github.com/apache/arrow-rs/issues/2373) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- AmazonS3Builder::from\_env to populate credentials from environment [\#2361](https://github.com/apache/arrow-rs/issues/2361) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- AmazonS3 Support IMDSv2 [\#2350](https://github.com/apache/arrow-rs/issues/2350) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- Retry Logic Fails to Retry Server Errors [\#2573](https://github.com/apache/arrow-rs/issues/2573) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Fix multiple part uploads at once making vector size inconsistent [\#2681](https://github.com/apache/arrow-rs/pull/2681) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([gruuya](https://github.com/gruuya)) +- Fix panic in `object_store::util::coalesce_ranges` [\#2554](https://github.com/apache/arrow-rs/pull/2554) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([thinkharderdev](https://github.com/thinkharderdev)) + +**Merged pull requests:** + +- update doc for object\_store copy\_if\_not\_exists [\#2653](https://github.com/apache/arrow-rs/pull/2653) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([JanKaul](https://github.com/JanKaul)) +- Update quick-xml 0.24 [\#2625](https://github.com/apache/arrow-rs/pull/2625) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add IMDSv1 fallback \(\#2609\) [\#2610](https://github.com/apache/arrow-rs/pull/2610) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- ObjectStore cleanup \(\#2587\) [\#2590](https://github.com/apache/arrow-rs/pull/2590) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix retry logic \(\#2573\) \(\#2572\) [\#2574](https://github.com/apache/arrow-rs/pull/2574) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Improve coalesce\_ranges \(\#2561\) \(\#2562\) [\#2563](https://github.com/apache/arrow-rs/pull/2563) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Update environment variable name for amazonS3builder in integration \(\#2550\) [\#2553](https://github.com/apache/arrow-rs/pull/2553) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([amrltqt](https://github.com/amrltqt)) +- Build AmazonS3builder from environment variables \(\#2361\) [\#2536](https://github.com/apache/arrow-rs/pull/2536) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([amrltqt](https://github.com/amrltqt)) +- feat: add token provider authorization to azure store [\#2374](https://github.com/apache/arrow-rs/pull/2374) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([roeap](https://github.com/roeap)) + +## [object_store_0.4.0](https://github.com/apache/arrow-rs/tree/object_store_0.4.0) (2022-08-10) + +[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.3.0...object_store_0.4.0) + +**Implemented enhancements:** + +- Relax Path Validation to Allow Any Percent-Encoded Sequence [\#2355](https://github.com/apache/arrow-rs/issues/2355) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Support get\_multi\_ranges in ObjectStore [\#2293](https://github.com/apache/arrow-rs/issues/2293) +- object\_store: Create explicit test for symlinks [\#2206](https://github.com/apache/arrow-rs/issues/2206) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Make builder style configuration for object stores [\#2203](https://github.com/apache/arrow-rs/issues/2203) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- object\_store: Add example in the main documentation readme [\#2202](https://github.com/apache/arrow-rs/issues/2202) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Fixed bugs:** + +- Azure/S3 Storage Fails to Copy Blob with URL-encoded Path [\#2353](https://github.com/apache/arrow-rs/issues/2353) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] +- Accessing a file with a percent-encoded name on the filesystem with ObjectStore LocalFileSystem [\#2349](https://github.com/apache/arrow-rs/issues/2349) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] + +**Documentation updates:** + +- Improve `object_store crate` documentation [\#2260](https://github.com/apache/arrow-rs/pull/2260) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) + +**Merged pull requests:** + +- Canonicalize filesystem paths in user-facing APIs \(\#2370\) [\#2371](https://github.com/apache/arrow-rs/pull/2371) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix object\_store lint [\#2367](https://github.com/apache/arrow-rs/pull/2367) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Relax path validation \(\#2355\) [\#2356](https://github.com/apache/arrow-rs/pull/2356) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Fix Copy from percent-encoded path \(\#2353\) [\#2354](https://github.com/apache/arrow-rs/pull/2354) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add ObjectStore::get\_ranges \(\#2293\) [\#2336](https://github.com/apache/arrow-rs/pull/2336) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Remove vestigal ` object_store/.circleci/` [\#2337](https://github.com/apache/arrow-rs/pull/2337) ([alamb](https://github.com/alamb)) +- Handle symlinks in LocalFileSystem \(\#2206\) [\#2269](https://github.com/apache/arrow-rs/pull/2269) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Retry GCP requests on server error [\#2243](https://github.com/apache/arrow-rs/pull/2243) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Add LimitStore \(\#2175\) [\#2242](https://github.com/apache/arrow-rs/pull/2242) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold)) +- Only trigger `arrow` CI on changes to arrow [\#2227](https://github.com/apache/arrow-rs/pull/2227) ([alamb](https://github.com/alamb)) +- Update instructions on how to join the Slack channel [\#2219](https://github.com/apache/arrow-rs/pull/2219) ([HaoYang670](https://github.com/HaoYang670)) +- Add Builder style config objects for object\_store [\#2204](https://github.com/apache/arrow-rs/pull/2204) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Ignore broken symlinks for LocalFileSystem object store [\#2195](https://github.com/apache/arrow-rs/pull/2195) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jccampagne](https://github.com/jccampagne)) +- Change CI names to match crate names [\#2189](https://github.com/apache/arrow-rs/pull/2189) ([alamb](https://github.com/alamb)) +- Split most arrow specific CI checks into their own workflows \(reduce common CI time to 21 minutes\) [\#2168](https://github.com/apache/arrow-rs/pull/2168) ([alamb](https://github.com/alamb)) +- Remove another attempt to cache target directory in action.yaml [\#2167](https://github.com/apache/arrow-rs/pull/2167) ([alamb](https://github.com/alamb)) +- Run actions on push to master, pull requests [\#2166](https://github.com/apache/arrow-rs/pull/2166) ([alamb](https://github.com/alamb)) +- Break parquet\_derive and arrow\_flight tests into their own workflows [\#2165](https://github.com/apache/arrow-rs/pull/2165) ([alamb](https://github.com/alamb)) +- Only run integration tests when `arrow` changes [\#2152](https://github.com/apache/arrow-rs/pull/2152) ([alamb](https://github.com/alamb)) +- Break out docs CI job to its own github action [\#2151](https://github.com/apache/arrow-rs/pull/2151) ([alamb](https://github.com/alamb)) +- Do not pretend to cache rust build artifacts, speed up CI by ~20% [\#2150](https://github.com/apache/arrow-rs/pull/2150) ([alamb](https://github.com/alamb)) +- Port `object_store` integration tests, use github actions [\#2148](https://github.com/apache/arrow-rs/pull/2148) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Port Add stream upload \(multi-part upload\) [\#2147](https://github.com/apache/arrow-rs/pull/2147) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb)) +- Increase upper wait time to reduce flakiness of object store test [\#2142](https://github.com/apache/arrow-rs/pull/2142) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([viirya](https://github.com/viirya)) + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/rust/object_store/CHANGELOG.md b/rust/object_store/CHANGELOG.md new file mode 100644 index 0000000000..4b46a1c9ed --- /dev/null +++ b/rust/object_store/CHANGELOG.md @@ -0,0 +1,90 @@ + + +# Changelog + +## [v0.12.4](https://github.com/apache/arrow-rs-object-store/tree/v0.12.4) (2025-09-19) + +[Full Changelog](https://github.com/apache/arrow-rs-object-store/compare/v0.12.3...v0.12.4) + +**Implemented enhancements:** + +- Allow flagging `PUT` operations as idempotent. [\#464](https://github.com/apache/arrow-rs-object-store/issues/464) +- Release object store `0.12.3` \(non breaking API\) Release July 2025 [\#428](https://github.com/apache/arrow-rs-object-store/issues/428) +- LocalFileSystem: offset for `list_with_offset` can't be identified / List results \*must\* be sorted [\#388](https://github.com/apache/arrow-rs-object-store/issues/388) +- Support setting storage class when objects are written [\#330](https://github.com/apache/arrow-rs-object-store/issues/330) +- Support auth using AssumeRoleWithWebIdentity for non-AWS S3-compatible implementations [\#283](https://github.com/apache/arrow-rs-object-store/issues/283) +- Types from http through request leak into object\_store public interfaces but aren't re-exported [\#263](https://github.com/apache/arrow-rs-object-store/issues/263) + +**Fixed bugs:** + +- Retry does not cover connection errors [\#368](https://github.com/apache/arrow-rs-object-store/issues/368) + +**Documentation updates:** + +- Improve documentation for http client timeout [\#390](https://github.com/apache/arrow-rs-object-store/pull/390) ([alamb](https://github.com/alamb)) + +**Closed issues:** + +- When a client http request is retried, I would like more information in the `info!` about the retry [\#486](https://github.com/apache/arrow-rs-object-store/issues/486) +- Range header causing AWS Signature issues [\#471](https://github.com/apache/arrow-rs-object-store/issues/471) +- Impossible to downcast an Error::Generic into a RetryError [\#469](https://github.com/apache/arrow-rs-object-store/issues/469) +- JWT session tokens cause SignatureDoesNotMatch with Supabase S3 [\#466](https://github.com/apache/arrow-rs-object-store/issues/466) +- Double url-encoding of special characters in key names [\#457](https://github.com/apache/arrow-rs-object-store/issues/457) +- Make `MultipartUpload` Sync [\#439](https://github.com/apache/arrow-rs-object-store/issues/439) +- Integrate HDFS object store [\#424](https://github.com/apache/arrow-rs-object-store/issues/424) +- Error performing POST when trying to write to S3 with a custom endpoint URL [\#408](https://github.com/apache/arrow-rs-object-store/issues/408) + +**Merged pull requests:** + +- Revert "refactor: remove AWS dynamo integration \(\#407\)" [\#493](https://github.com/apache/arrow-rs-object-store/pull/493) ([alamb](https://github.com/alamb)) +- Fix for clippy 1.90 [\#492](https://github.com/apache/arrow-rs-object-store/pull/492) ([alamb](https://github.com/alamb)) +- Add version 0.12.4 release plan to README [\#490](https://github.com/apache/arrow-rs-object-store/pull/490) ([alamb](https://github.com/alamb)) +- chore\(client/retry\): include error info in logs when retry occurs [\#487](https://github.com/apache/arrow-rs-object-store/pull/487) ([philjb](https://github.com/philjb)) +- AWS S3: Support STS endpoint, WebIdentity, RoleArn, RoleSession configuration [\#480](https://github.com/apache/arrow-rs-object-store/pull/480) ([Friede80](https://github.com/Friede80)) +- build\(deps\): bump actions/github-script from 7 to 8 [\#478](https://github.com/apache/arrow-rs-object-store/pull/478) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump actions/setup-node from 4 to 5 [\#477](https://github.com/apache/arrow-rs-object-store/pull/477) ([dependabot[bot]](https://github.com/apps/dependabot)) +- build\(deps\): bump actions/setup-python from 5 to 6 [\#476](https://github.com/apache/arrow-rs-object-store/pull/476) ([dependabot[bot]](https://github.com/apps/dependabot)) +- chore: fix some clippy 1.89 warnings and ignore some doctests on wasm32 [\#468](https://github.com/apache/arrow-rs-object-store/pull/468) ([mbrobbel](https://github.com/mbrobbel)) +- Allow "application\_credentials" in `impl FromStr for GoogleConfigKey` [\#467](https://github.com/apache/arrow-rs-object-store/pull/467) ([kylebarron](https://github.com/kylebarron)) +- build\(deps\): bump actions/checkout from 4 to 5 [\#463](https://github.com/apache/arrow-rs-object-store/pull/463) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add storage class for aws, gcp, and azure [\#456](https://github.com/apache/arrow-rs-object-store/pull/456) ([matthewmturner](https://github.com/matthewmturner)) +- Remove use of deprecated StepRng from tests [\#449](https://github.com/apache/arrow-rs-object-store/pull/449) ([tustvold](https://github.com/tustvold)) +- Fix not retrying connection errors [\#445](https://github.com/apache/arrow-rs-object-store/pull/445) ([johnnyg](https://github.com/johnnyg)) +- Dont unwrap on body send [\#442](https://github.com/apache/arrow-rs-object-store/pull/442) ([cetra3](https://github.com/cetra3)) +- feat: re-export HTTP types used in public API [\#441](https://github.com/apache/arrow-rs-object-store/pull/441) ([ByteBaker](https://github.com/ByteBaker)) +- fix: update links in release docs and script [\#440](https://github.com/apache/arrow-rs-object-store/pull/440) ([mbrobbel](https://github.com/mbrobbel)) +- chore: prepare `0.12.3` release [\#437](https://github.com/apache/arrow-rs-object-store/pull/437) ([crepererum](https://github.com/crepererum)) +- aws: downgrade credential provider info! log messages to debug! [\#436](https://github.com/apache/arrow-rs-object-store/pull/436) ([asubiotto](https://github.com/asubiotto)) +- feat: retry on 408 [\#426](https://github.com/apache/arrow-rs-object-store/pull/426) ([criccomini](https://github.com/criccomini)) +- fix: expose source of `RetryError` [\#422](https://github.com/apache/arrow-rs-object-store/pull/422) ([crepererum](https://github.com/crepererum)) +- fix\(gcp\): throw error instead of panicking if read pem fails [\#421](https://github.com/apache/arrow-rs-object-store/pull/421) ([hugocasa](https://github.com/hugocasa)) +- chore: fix clippy 1.88 warnings [\#418](https://github.com/apache/arrow-rs-object-store/pull/418) ([mbrobbel](https://github.com/mbrobbel)) +- Bump quick-xml to version 0.38.0 [\#417](https://github.com/apache/arrow-rs-object-store/pull/417) ([raimannma](https://github.com/raimannma)) +- Prevent compilation error with all cloud features but fs turned on [\#412](https://github.com/apache/arrow-rs-object-store/pull/412) ([jder](https://github.com/jder)) +- Retry requests when status code is 429 [\#410](https://github.com/apache/arrow-rs-object-store/pull/410) ([paraseba](https://github.com/paraseba)) +- refactor: remove AWS dynamo integration [\#407](https://github.com/apache/arrow-rs-object-store/pull/407) ([crepererum](https://github.com/crepererum)) +- refactor: `PutMultiPartOpts` =\> `PutMultiPartOptions` [\#406](https://github.com/apache/arrow-rs-object-store/pull/406) ([crepererum](https://github.com/crepererum)) +- minor: Pin `tracing-attributes`, `tracing-core` to fix CI [\#404](https://github.com/apache/arrow-rs-object-store/pull/404) ([kylebarron](https://github.com/kylebarron)) +- feat \(azure\): support for account in `az://` URLs [\#403](https://github.com/apache/arrow-rs-object-store/pull/403) ([ByteBaker](https://github.com/ByteBaker)) +- Fix azure path parsing [\#399](https://github.com/apache/arrow-rs-object-store/pull/399) ([kylebarron](https://github.com/kylebarron)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/rust/object_store/CONTRIBUTING.md b/rust/object_store/CONTRIBUTING.md new file mode 100644 index 0000000000..0cc703db74 --- /dev/null +++ b/rust/object_store/CONTRIBUTING.md @@ -0,0 +1,230 @@ + + +# Development instructions + +## Running Tests + +Tests can be run using `cargo` + +```shell +cargo test +``` + +## Running Integration Tests + +By default, integration tests are not run. To run them you will need to set `TEST_INTEGRATION=1` and then provide the +necessary configuration for that object store + +### AWS + +To test the S3 integration against [localstack](https://localstack.cloud/) + +First start up a container running localstack + +```shell +LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 +podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION +podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 +``` + +Setup environment + +```shell +export TEST_INTEGRATION=1 +export AWS_DEFAULT_REGION=us-east-1 +export AWS_ACCESS_KEY_ID=test +export AWS_SECRET_ACCESS_KEY=test +export AWS_ENDPOINT=http://localhost:4566 +export AWS_ALLOW_HTTP=true +export AWS_BUCKET_NAME=test-bucket +``` + +Create a bucket using the AWS CLI + +```shell +podman run --net=host --env-host amazon/aws-cli --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket +``` + +Or directly with: + +```shell +aws s3 mb s3://test-bucket --endpoint-url=http://localhost:4566 +aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket-for-spawn +aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 +``` + +Run tests + +```shell +cargo test --features aws +``` + +#### Encryption tests + +To create an encryption key for the tests, you can run the following command: + +```shell +export AWS_SSE_KMS_KEY_ID=$(aws --endpoint-url=http://localhost:4566 \ + kms create-key --description "test key" | + jq -r '.KeyMetadata.KeyId') +``` + +To run integration tests with encryption, you can set the following environment variables: + +```shell +export AWS_SERVER_SIDE_ENCRYPTION=aws:kms +export AWS_SSE_BUCKET_KEY=false +cargo test --features aws +``` + +As well as: + +```shell +unset AWS_SSE_BUCKET_KEY +export AWS_SERVER_SIDE_ENCRYPTION=aws:kms:dsse +cargo test --features aws +``` + +#### SSE-C Encryption tests + +Unfortunately, localstack does not support SSE-C encryption (https://github.com/localstack/localstack/issues/11356). + +We will use [MinIO](https://min.io/docs/minio/container/operations/server-side-encryption.html) to test SSE-C encryption. + +First, create a self-signed certificate to enable HTTPS for MinIO, as SSE-C requires HTTPS. + +```shell +mkdir ~/certs +cd ~/certs +openssl genpkey -algorithm RSA -out private.key +openssl req -new -key private.key -out request.csr -subj "/C=US/ST=State/L=City/O=Organization/OU=Unit/CN=example.com/emailAddress=email@example.com" +openssl x509 -req -days 365 -in request.csr -signkey private.key -out public.crt +rm request.csr +``` + +Second, start MinIO with the self-signed certificate. + +```shell +docker run -d \ + -p 9000:9000 \ + --name minio \ + -v ${HOME}/certs:/root/.minio/certs \ + -e "MINIO_ROOT_USER=minio" \ + -e "MINIO_ROOT_PASSWORD=minio123" \ + minio/minio server /data +``` + +Create a test bucket. + +```shell +export AWS_BUCKET_NAME=test-bucket +export AWS_ACCESS_KEY_ID=minio +export AWS_SECRET_ACCESS_KEY=minio123 +export AWS_ENDPOINT=https://localhost:9000 +aws s3 mb s3://test-bucket --endpoint-url=https://localhost:9000 --no-verify-ssl +``` + +Run the tests. The real test is `test_s3_ssec_encryption_with_minio()` + +```shell +export TEST_S3_SSEC_ENCRYPTION=1 +cargo test --features aws --package object_store --lib aws::tests::test_s3_ssec_encryption_with_minio -- --exact --nocapture +``` + +### Azure + +To test the Azure integration +against [azurite](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio) + +Startup azurite + +```shell +podman run -p 10000:10000 -p 10001:10001 -p 10002:10002 mcr.microsoft.com/azure-storage/azurite +``` + +Create a bucket + +```shell +podman run --net=host mcr.microsoft.com/azure-cli az storage container create -n test-bucket --connection-string 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' +``` + +Run tests + +```shell +AZURE_USE_EMULATOR=1 \ +TEST_INTEGRATION=1 \ +AZURE_CONTAINER_NAME=test-bucket \ +AZURE_STORAGE_ACCOUNT_NAME=devstoreaccount1 \ +AZURE_STORAGE_ACCESS_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== \ +AZURE_ENDPOINT=http://127.0.0.1:10000/devstoreaccount1 \ +AZURE_ALLOW_HTTP=true \ +cargo test --features azure +``` + +### GCP + +To test the GCS integration, we use [Fake GCS Server](https://github.com/fsouza/fake-gcs-server) + +Startup the fake server: + +```shell +docker run -p 4443:4443 tustvold/fake-gcs-server -scheme http +``` + +Configure the account: +```shell +curl -v -X POST --data-binary '{"name":"test-bucket"}' -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b" +echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": ""}' > /tmp/gcs.json +``` + +Now run the tests: +```shell +TEST_INTEGRATION=1 \ +OBJECT_STORE_BUCKET=test-bucket \ +GOOGLE_SERVICE_ACCOUNT=/tmp/gcs.json \ +cargo test -p object_store --features=gcp +``` + +# Deprecation Guidelines + +Minor releases may deprecate, but not remove APIs. Deprecating APIs allows +downstream Rust programs to still compile, but generate compiler warnings. This +gives downstream crates time to migrate prior to API removal. + +To deprecate an API: + +- Mark the API as deprecated using `#[deprecated]` and specify the exact object_store version in which it was deprecated +- Concisely describe the preferred API to help the user transition + +The deprecated version is the next version which will be released (please +consult the list above). To mark the API as deprecated, use the +`#[deprecated(since = "...", note = "...")]` attribute. + +For example + +```rust +#[deprecated(since = "0.11.0", note = "Use `date_part` instead")] +``` + +In general, deprecated APIs will remain in the codebase for at least two major releases after +they were deprecated (typically between 6 - 9 months later). For example, an API +deprecated in `0.10.0` can be removed in `0.13.0` (or later). Deprecated APIs +may be removed earlier or later than these guidelines at the discretion of the +maintainers. diff --git a/rust/object_store/Cargo.toml b/rust/object_store/Cargo.toml new file mode 100644 index 0000000000..7fa3a7928c --- /dev/null +++ b/rust/object_store/Cargo.toml @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "object_store" +version = "0.12.4" +edition = "2021" +license = "MIT/Apache-2.0" +readme = "README.md" +description = "A generic object store interface for uniformly interacting with AWS S3, Google Cloud Storage, Azure Blob Storage and local files." +keywords = ["object", "storage", "cloud"] +repository = "https://github.com/apache/arrow-rs-object-store" +rust-version = "1.64.0" + +[package.metadata.docs.rs] +all-features = true + +[dependencies] # In alphabetical order +async-trait = "0.1.53" +bytes = "1.0" +chrono = { version = "0.4.34", default-features = false, features = ["clock"] } +futures = "0.3" +http = "1.2.0" +humantime = "2.1" +itertools = "0.14.0" +parking_lot = { version = "0.12" } +percent-encoding = "2.1" +thiserror = "2.0.2" +tracing = { version = "0.1" } +url = "2.2" +walkdir = { version = "2", optional = true } +backtrace = { version = "0.3.76" } + +# Cloud storage support +base64 = { version = "0.22", default-features = false, features = ["std"], optional = true } +form_urlencoded = { version = "1.2", optional = true } +http-body-util = { version = "0.1.2", optional = true } +httparse = { version = "1.8.0", default-features = false, features = ["std"], optional = true } +hyper = { version = "1.2", default-features = false, optional = true } +md-5 = { version = "0.10.6", default-features = false, optional = true } +quick-xml = { version = "0.38.0", features = ["serialize", "overlapped-lists"], optional = true } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"], optional = true } +ring = { version = "0.17", default-features = false, features = ["std"], optional = true } +rustls-pemfile = { version = "2.0", default-features = false, features = ["std"], optional = true } +serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } +serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } +serde_urlencoded = { version = "0.7", optional = true } +tokio = { version = "1.29.0", features = ["sync", "macros", "rt", "time", "io-util"] } + +[target.'cfg(target_family="unix")'.dev-dependencies] +nix = { version = "0.30.0", features = ["fs"] } + +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] +web-time = { version = "1.1.0" } +wasm-bindgen-futures = "0.4.18" + +[features] +default = ["fs"] +cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/stream", "chrono/serde", "base64", "rand", "ring", "http-body-util", "form_urlencoded", "serde_urlencoded"] +azure = ["cloud", "httparse"] +fs = ["walkdir"] +gcp = ["cloud", "rustls-pemfile"] +aws = ["cloud", "md-5"] +http = ["cloud"] +tls-webpki-roots = ["reqwest?/rustls-tls-webpki-roots"] +integration = ["rand"] + +[dev-dependencies] # In alphabetical order +hyper = { version = "1.2", features = ["server"] } +hyper-util = "0.1" +rand = "0.9" +tempfile = "3.1.0" +regex = "1.11.1" +# The "gzip" feature for reqwest is enabled for an integration test. +reqwest = { version = "0.12", features = ["gzip"] } + +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies] +wasm-bindgen-test = "0.3.50" + +[dev-dependencies.getrandom_v03] +package = "getrandom" +version = "0.3" +features = ["wasm_js"] + +[dev-dependencies.getrandom_v02] +package = "getrandom" +version = "0.2" +features = ["js"] + +[[test]] +name = "get_range_file" +path = "tests/get_range_file.rs" +required-features = ["fs"] diff --git a/rust/object_store/LICENSE.txt b/rust/object_store/LICENSE.txt new file mode 100644 index 0000000000..de4b130f35 --- /dev/null +++ b/rust/object_store/LICENSE.txt @@ -0,0 +1,204 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + diff --git a/rust/object_store/NOTICE.txt b/rust/object_store/NOTICE.txt new file mode 100644 index 0000000000..0a23eee663 --- /dev/null +++ b/rust/object_store/NOTICE.txt @@ -0,0 +1,5 @@ +Apache Arrow Object Store +Copyright 2020-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/rust/object_store/README.md b/rust/object_store/README.md new file mode 100644 index 0000000000..30ebf2e32c --- /dev/null +++ b/rust/object_store/README.md @@ -0,0 +1,102 @@ + + +# Rust Object Store + +A focused, easy to use, idiomatic, high performance, `async` object +store library for interacting with object stores. + +Using this crate, the same binary and code can easily run in multiple +clouds and local test environments, via a simple runtime configuration +change. Supported object stores include: + +* [AWS S3](https://aws.amazon.com/s3/) +* [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) +* [Google Cloud Storage](https://cloud.google.com/storage) +* Local files +* Memory +* [HTTP/WebDAV Storage](https://datatracker.ietf.org/doc/html/rfc2518) +* Custom implementations + +Originally developed by [InfluxData](https://www.influxdata.com/) and later donated to [Apache Arrow](https://arrow.apache.org/). + +See [docs.rs](https://docs.rs/object_store) for usage instructions + +See [CONTRIBUTING.md] to learn how to contribute to this project. + +[CONTRIBUTING.md]: https://github.com/apache/arrow-rs-object-store/blob/main/CONTRIBUTING.md + +## Support for `wasm32-unknown-unknown` target + +It's possible to build `object_store` for the `wasm32-unknown-unknown` target, however the cloud storage features `aws`, `azure`, `gcp`, and `http` are not supported. + +``` +cargo build -p object_store --target wasm32-unknown-unknown +``` + +## Related Apache Crates + +Here are several related crates in different repositories from other Apache projects. + +| Crate | Description | Documentation | +| ------------------------ | ------------------------------------------- | --------------------------------------- | +| [`object_store_opendal`] | Use [`opendal`] as [`object_store`] backend | [(README)][object_store_opendal-readme] | + +[`object_store_opendal`]: https://crates.io/crates/object_store_opendal +[`opendal`]: https://crates.io/crates/opendal +[object_store_opendal-readme]: https://github.com/apache/opendal/blob/main/integrations/object_store/README.md + +## Community Extensions + +There following community maintained crates provide additional functionality for `object_store` and are NOT governed by the Apache Software Foundation. We list them below in the hope they may be useful, but they are not official Apache projects or endorsed by the Apache Arrow project. + +| Crate | Description | Documentation | +| ---------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------- | +| [`hdfs_native_object_store`] | Use HDFS as [`object_store`] backend | [(README)][hdfs_native_object_store-readme] | +| [`ic_object_store`] | Use [ICP] blockchain as [`object_store`] backend | [(README)][ic_object_store-readme] | +| [`anda_object_store`] | Extends the [`object_store`] with metadata management and AES-256-GCM encryption | [(README)][anda_object_store-readme] | + +[`hdfs_native_object_store`]: https://crates.io/crates/hdfs_native_object_store +[hdfs_native_object_store-readme]: https://github.com/datafusion-contrib/hdfs-native-object-store +[`ic_object_store`]: https://crates.io/crates/ic_object_store +[ic_object_store-readme]: https://github.com/ldclabs/ic-oss/tree/main/src/ic_object_store +[`anda_object_store`]: https://crates.io/crates/anda_object_store +[anda_object_store-readme]: https://github.com/ldclabs/anda-db/blob/main/rs/anda_object_store +[ICP]: https://www.internetcomputer.org/ + +## Release Schedule + +The [`object_store`] crate follows [Semantic Versioning]. We aim to release new +versions approximately every 2 months. + +Please see [the release tracker] for an up to date release schedule and to track +the progress of upcoming releases. + +[`object_store`]: https://crates.io/crates/object_store +[semantic versioning]: https://semver.org/ +[the release tracker]: https://github.com/apache/arrow-rs-object-store/issues/392 + +Planned Release Schedule + +| Approximate Date | Version | Notes | Ticket | +|------------------|----------|--------------------------------|:-------------------------------------------------------------------| +| July 2025 | `0.12.3` | Minor, NO breaking API changes | [#428](https://github.com/apache/arrow-rs-object-store/issues/428) | +| Sep 2025 | `0.12.4` | Minor, NO breaking API changes | [#498](https://github.com/apache/arrow-rs-object-store/issues/489) | +| TBD | `0.13.0` | Major, breaking API changes | [#367](https://github.com/apache/arrow-rs-object-store/issues/367) | +| TBD | `0.13.1` | Minor, NO breaking API changes | [#393](https://github.com/apache/arrow-rs-object-store/issues/393) | diff --git a/rust/object_store/deny.toml b/rust/object_store/deny.toml new file mode 100644 index 0000000000..bfd060a0b9 --- /dev/null +++ b/rust/object_store/deny.toml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Configuration documentation: +#  https://embarkstudios.github.io/cargo-deny/index.html + +[advisories] +vulnerability = "deny" +yanked = "deny" +unmaintained = "warn" +notice = "warn" +ignore = [ +] +git-fetch-with-cli = true + +[licenses] +default = "allow" +unlicensed = "allow" +copyleft = "allow" + +[bans] +multiple-versions = "warn" +deny = [ + # We are using rustls as the TLS implementation, so we shouldn't be linking + # in OpenSSL too. + # + # If you're hitting this, you might want to take a look at what new + # dependencies you have introduced and check if there's a way to depend on + # rustls instead of OpenSSL (tip: check the crate's feature flags). + { name = "openssl-sys" } +] diff --git a/rust/object_store/dev/release/README.md b/rust/object_store/dev/release/README.md new file mode 100644 index 0000000000..ef3ad55b51 --- /dev/null +++ b/rust/object_store/dev/release/README.md @@ -0,0 +1,222 @@ + + +# Release Process + +## Overview + +This file documents the release process for the `object_store` crate. + +We release a new version of `object_store` according to the schedule listed in +the [main README.md] + +[main README.md]: https://github.com/apache/arrow-rs-object-store?tab=readme-ov-file#release-schedule + +As we are still in an early phase, we use the 0.x version scheme. If any code has +been merged to main that has a breaking API change, as defined in [Rust RFC 1105] +the minor version number is incremented changed (e.g. `0.3.0` to `0.4.0`). +Otherwise the patch version is incremented (e.g. `0.3.0` to `0.3.1`). + +[Rust RFC 1105]: https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md + +# Release Mechanics + +## Process Overview + +As part of the Apache governance model, official releases consist of +signed source tarballs approved by the PMC. + +We then use the code in the approved source tarball to release to +crates.io, the Rust ecosystem's package manager. + +We create a `CHANGELOG.md` so our users know what has been changed between releases. + +The CHANGELOG is created automatically using +[update_change_log.sh](https://github.com/apache/arrow-rs-object-store/blob/main/dev/release/update_change_log.sh) + +This script creates a changelog using github issues and the +labels associated with them. + +## Prepare CHANGELOG and version: + +Now prepare a PR to update `CHANGELOG.md` and versions on `main` to reflect the planned release. + +See [#437] for an example. + +[#437]: https://github.com/apache/arrow-rs-object-store/pull/437 + +```bash +git checkout main +git pull +git checkout -b + +# Update versions. Make sure to run it before the next step since we do not want CHANGELOG-old.md affected. +sed -i '' -e 's/0.11.0/0.11.1/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG` +git commit -a -m 'Update version' + +# ensure your github token is available +export CHANGELOG_GITHUB_TOKEN= + +# manually edit ./dev/release/update_change_log.sh to reflect the release version +# create the changelog +./dev/release/update_change_log.sh + +# review change log / and edit associated issues and labels if needed, rerun update_change_log.sh + +# Commit changes +git commit -a -m 'Create changelog' + +# push changes to fork and create a PR to main +git push +``` + +Note that when reviewing the change log, rather than editing the +`CHANGELOG.md`, it is preferred to update the issues and their labels +(e.g. add `invalid` label to exclude them from release notes) + +Merge this PR to `main` prior to the next step. + +## Prepare release candidate tarball + +After you have merged the updates to the `CHANGELOG` and version, +create a release candidate using the following steps. Note you need to +be a committer to run these scripts as they upload to the apache `svn` +distribution servers. + +### Create git tag for the release: + +While the official release artifact is a signed tarball, we also tag the commit it was created for convenience and code archaeology. + +Use a string such as `v0.4.0` as the ``. + +Create and push the tag thusly: + +```shell +git fetch apache +git tag apache/main +# push tag to apache +git push apache +``` + +### Pick an Release Candidate (RC) number + +Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc. + +### Create, sign, and upload tarball + +Run `create-tarball.sh` with the `` tag and `` and you found in previous steps. + +```shell +./dev/release/create-tarball.sh 0.11.1 1 +``` + +The `create-tarball.sh` script + +1. creates and uploads a release candidate tarball to the [arrow + dev](https://dist.apache.org/repos/dist/dev/arrow) location on the + apache distribution svn server + +2. provide you an email template to + send to dev@arrow.apache.org for release voting. + +### Vote on Release Candidate tarball + +Send an email, based on the output from the script to dev@arrow.apache.org. The email should look like + +``` +Draft email for dev@arrow.apache.org mailing list + +--------------------------------------------------------- +To: dev@arrow.apache.org +Subject: [VOTE][RUST] Release Apache Arrow Rust Object Store 0.11.1 RC1 + +Hi, + +I would like to propose a release of Apache Arrow Rust Object +Store Implementation, version 0.11.1. + +This release candidate is based on commit: b945b15de9085f5961a478d4f35b0c5c3427e248 [1] + +The proposed release tarball and signatures are hosted at [2]. + +The changelog is located at [3]. + +Please download, verify checksums and signatures, run the unit tests, +and vote on the release. There is a script [4] that automates some of +the verification. + +The vote will be open for at least 72 hours. + +[ ] +1 Release this as Apache Arrow Rust Object Store +[ ] +0 +[ ] -1 Do not release this as Apache Arrow Rust Object Store because... + +[1]: https://github.com/apache/arrow-rs-object-store/tree/b945b15de9085f5961a478d4f35b0c5c3427e248 +[2]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-object-store-rs-0.11.1-rc1/ +[3]: https://github.com/apache/arrow-rs-object-store/blob/b945b15de9085f5961a478d4f35b0c5c3427e248/CHANGELOG.md +[4]: https://github.com/apache/arrow-rs-object-store/blob/main/dev/release/verify-release-candidate.sh +``` + +For the release to become "official" it needs at least three Apache Arrow PMC members to vote +1 on it. + +## Verifying release candidates + +The `dev/release/verify-release-candidate.sh` script can assist in the verification process. Run it like: + +``` +./dev/release/verify-release-candidate.sh 0.11.0 1 +``` + +#### If the release is not approved + +If the release is not approved, fix whatever the problem is and try again with the next RC number + +### If the release is approved, + +Move tarball to the release location in SVN, e.g. https://dist.apache.org/repos/dist/release/arrow/apache-arrow-object-store-rs-4.1.0-rc4/, using the `release-tarball.sh` script: + +```shell +./dev/release/release-tarball.sh 4.1.0 2 +``` + +Congratulations! The release is now official! + +### Publish on Crates.io + +Only approved releases of the tarball should be published to +crates.io, in order to conform to Apache Software Foundation +governance standards. + +An Arrow committer can publish this crate after an official project release has +been made to crates.io using the following instructions. + +Follow [these +instructions](https://doc.rust-lang.org/cargo/reference/publishing.html) to +create an account and login to crates.io before asking to be added as an owner +of the [object store crate](https://crates.io/crates/object_store). + +Download and unpack the official release tarball + +Verify that the Cargo.toml in the tarball contains the correct version +(e.g. `version = "0.11.0"`) and then publish the crate with the +following commands + +```shell +cargo publish +``` diff --git a/rust/object_store/dev/release/check-rat-report.py b/rust/object_store/dev/release/check-rat-report.py new file mode 100644 index 0000000000..e30d72bddd --- /dev/null +++ b/rust/object_store/dev/release/check-rat-report.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +############################################################################## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################## +import fnmatch +import re +import sys +import xml.etree.ElementTree as ET + +if len(sys.argv) != 3: + sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % + sys.argv[0]) + sys.exit(1) + +exclude_globs_filename = sys.argv[1] +xml_filename = sys.argv[2] + +globs = [line.strip() for line in open(exclude_globs_filename, "r")] + +tree = ET.parse(xml_filename) +root = tree.getroot() +resources = root.findall('resource') + +all_ok = True +for r in resources: + approvals = r.findall('license-approval') + if not approvals or approvals[0].attrib['name'] == 'true': + continue + clean_name = re.sub('^[^/]+/', '', r.attrib['name']) + excluded = False + for g in globs: + if fnmatch.fnmatch(clean_name, g): + excluded = True + break + if not excluded: + sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % ( + clean_name, r.attrib['name'], approvals[0].attrib['name'])) + all_ok = False + +if not all_ok: + sys.exit(1) + +print('OK') +sys.exit(0) diff --git a/rust/object_store/dev/release/create-tarball.sh b/rust/object_store/dev/release/create-tarball.sh new file mode 100755 index 0000000000..f57a147c08 --- /dev/null +++ b/rust/object_store/dev/release/create-tarball.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# This script creates a signed tarball in +# dev/dist/apache-arrow-object-store-rs--.tar.gz and uploads it to +# the "dev" area of the dist.apache.arrow repository and prepares an +# email for sending to the dev@arrow.apache.org list for a formal +# vote. +# +# Note the tags are expected to be `object_sore_` +# +# See release/README.md for full release instructions +# +# Requirements: +# +# 1. gpg setup for signing and have uploaded your public +# signature to https://pgp.mit.edu/ +# +# 2. Logged into the apache svn server with the appropriate +# credentials +# +# +# Based in part on 02-source.sh from apache/arrow +# + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + echo "ex. $0 0.4.0 1" + exit +fi + +object_store_version=$1 +rc=$2 +tag=v${object_store_version} + +release=apache-arrow-object-store-rs-${object_store_version} +distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc} +tarname=${release}.tar.gz +tarball=${distdir}/${tarname} +url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}" + +echo "Attempting to create ${tarball} from tag ${tag}" + +release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag}) + +if [ -z "$release_hash" ]; then + echo "Cannot continue: unknown git tag: $tag" +fi + +echo "Draft email for dev@arrow.apache.org mailing list" +echo "" +echo "---------------------------------------------------------" +cat < containing the files in git at $release_hash +# the files in the tarball are prefixed with {tag=} (e.g. 0.4.0) +mkdir -p ${distdir} +(cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball}) + +echo "Running rat license checker on ${tarball}" +${SOURCE_DIR}/../../dev/release/run-rat.sh ${tarball} + +echo "Signing tarball and creating checksums" +gpg --armor --output ${tarball}.asc --detach-sig ${tarball} +# create signing with relative path of tarball +# so that they can be verified with a command such as +# shasum --check apache-arrow-rs-4.1.0-rc2.tar.gz.sha512 +(cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256 +(cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512 + +echo "Uploading to apache dist/dev to ${url}" +svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist +svn add ${distdir} +svn ci -m "Apache Arrow Rust ${object_store_version=} ${rc}" ${distdir} diff --git a/rust/object_store/dev/release/rat_exclude_files.txt b/rust/object_store/dev/release/rat_exclude_files.txt new file mode 100644 index 0000000000..d08a0ea8c7 --- /dev/null +++ b/rust/object_store/dev/release/rat_exclude_files.txt @@ -0,0 +1,22 @@ +venv/* +testing/* +target/* +dev/release/rat_exclude_files.txt +arrow/test/data/* +arrow-csv/test/data/* +arrow-json/test/data/* +arrow/test/dependency/* +arrow-integration-test/data/* +parquet_derive/test/dependency/* +.gitattributes +**.gitignore +.gitmodules +Cargo.lock +filtered_rat.txt +rat.txt +# auto-generated +arrow-flight/src/arrow.flight.protocol.rs +arrow-flight/src/sql/arrow.flight.protocol.sql.rs +.github/* +parquet/src/bin/parquet-fromcsv-help.txt +arrow-flight/examples/data/* diff --git a/rust/object_store/dev/release/release-tarball.sh b/rust/object_store/dev/release/release-tarball.sh new file mode 100755 index 0000000000..16b10e0226 --- /dev/null +++ b/rust/object_store/dev/release/release-tarball.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# This script copies a tarball from the "dev" area of the +# dist.apache.arrow repository to the "release" area +# +# This script should only be run after the release has been approved +# by the arrow PMC committee. +# +# See release/README.md for full release instructions +# +# Based in part on post-01-upload.sh from apache/arrow + + +set -e +set -u + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + echo "ex. $0 0.4.0 1" + exit +fi + +version=$1 +rc=$2 + +tmp_dir=tmp-apache-arrow-dist + +echo "Recreate temporary directory: ${tmp_dir}" +rm -rf ${tmp_dir} +mkdir -p ${tmp_dir} + +echo "Clone dev dist repository" +svn \ + co \ + https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-object-store-rs-${version}-rc${rc} \ + ${tmp_dir}/dev + +echo "Clone release dist repository" +svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release + +echo "Copy ${version}-rc${rc} to release working copy" +release_version=arrow-object-store-rs-${version} +mkdir -p ${tmp_dir}/release/${release_version} +cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ +svn add ${tmp_dir}/release/${release_version} + +echo "Commit release" +svn ci -m "Apache Arrow Rust Object Store ${version}" ${tmp_dir}/release + +echo "Clean up" +rm -rf ${tmp_dir} + +echo "Success!" +echo "The release is available here:" +echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}" + +echo "Clean up old artifacts from svn" +"${SOURCE_TOP_DIR}"/dev/release/remove-old-artifacts.sh diff --git a/rust/object_store/dev/release/remove-old-artifacts.sh b/rust/object_store/dev/release/remove-old-artifacts.sh new file mode 100755 index 0000000000..bbbbe0c363 --- /dev/null +++ b/rust/object_store/dev/release/remove-old-artifacts.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# This script removes all RCs and all but the most recent versions of +# object_store from svn. +# +# The older versions are in SVN history as well as available on the +# archive page https://archive.apache.org/dist/ +# +# See +# https://infra.apache.org/release-download-pages.html + +set -e +set -u +set -o pipefail + +echo "Remove all RCs" +dev_base_url=https://dist.apache.org/repos/dist/dev/arrow +old_rcs=$( + svn ls ${dev_base_url}/ | \ + grep -E '^apache-arrow-object-store-rs-[0-9]' | \ + sort --version-sort +) +for old_rc in $old_rcs; do + echo "Remove RC: ${old_rc}" + svn \ + delete \ + -m "Remove old Apache Arrow Rust Object Store RC: ${old_rc}" \ + ${dev_base_url}/${old_rc} +done + +echo "Remove all but the most recent version" +release_base_url="https://dist.apache.org/repos/dist/release/arrow" +old_releases=$( + svn ls ${release_base_url} | \ + grep -E '^arrow-object-store-rs-[0-9\.]+' | \ + sort --version-sort --reverse | \ + tail -n +2 +) +for old_release_version in $old_releases; do + echo "Remove old release: ${old_release_version}" + svn \ + delete \ + -m "Remove Apache Arrow Rust Object Store release: ${old_release_version}" \ + ${release_base_url}/${old_release_version} +done diff --git a/rust/object_store/dev/release/run-rat.sh b/rust/object_store/dev/release/run-rat.sh new file mode 100755 index 0000000000..94fa55fbe0 --- /dev/null +++ b/rust/object_store/dev/release/run-rat.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +RAT_VERSION=0.13 + +# download apache rat +if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then + curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar +fi + +RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " + +RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) + +# generate the rat report +$RAT $1 > rat.txt +python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt +cat filtered_rat.txt +UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` + +if [ "0" -eq "${UNAPPROVED}" ]; then + echo "No unapproved licenses" +else + echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" + exit 1 +fi diff --git a/rust/object_store/dev/release/update_change_log.sh b/rust/object_store/dev/release/update_change_log.sh new file mode 100755 index 0000000000..793f685aa7 --- /dev/null +++ b/rust/object_store/dev/release/update_change_log.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# invokes the changelog generator from +# https://github.com/github-changelog-generator/github-changelog-generator +# +# With the config located in +# arrow-rs-object-store/.github_changelog_generator +# +# Usage: +# CHANGELOG_GITHUB_TOKEN= ./update_change_log.sh + +set -e + +SINCE_TAG="v0.12.3" +FUTURE_RELEASE="v0.12.4" + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md" + +# remove license header so github-changelog-generator has a clean base to append +sed -i.bak '1,18d' "${OUTPUT_PATH}" + +pushd "${SOURCE_TOP_DIR}" +docker run -it --rm -e CHANGELOG_GITHUB_TOKEN="$CHANGELOG_GITHUB_TOKEN" -v "$(pwd)":/usr/local/src/your-app githubchangeloggenerator/github-changelog-generator \ + --user apache \ + --project arrow-rs-object-store \ + --cache-file=.githubchangeloggenerator.cache \ + --cache-log=.githubchangeloggenerator.cache.log \ + --http-cache \ + --max-issues=600 \ + --since-tag ${SINCE_TAG} \ + --future-release ${FUTURE_RELEASE} + +sed -i.bak "s/\\\n/\n\n/" "${OUTPUT_PATH}" + +# Put license header back on +echo ' +' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp +mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}" diff --git a/rust/object_store/dev/release/verify-release-candidate.sh b/rust/object_store/dev/release/verify-release-candidate.sh new file mode 100755 index 0000000000..b24bd8fbb7 --- /dev/null +++ b/rust/object_store/dev/release/verify-release-candidate.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +case $# in + 2) VERSION="$1" + RC_NUMBER="$2" + ;; + *) echo "Usage: $0 X.Y.Z RC_NUMBER" + exit 1 + ;; +esac + +set -e +set -x +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" +ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' + +download_dist_file() { + curl \ + --silent \ + --show-error \ + --fail \ + --location \ + --remote-name $ARROW_DIST_URL/$1 +} + +download_rc_file() { + download_dist_file apache-arrow-object-store-rs-${VERSION}-rc${RC_NUMBER}/$1 +} + +import_gpg_keys() { + download_dist_file KEYS + gpg --import KEYS +} + +if type shasum >/dev/null 2>&1; then + sha256_verify="shasum -a 256 -c" + sha512_verify="shasum -a 512 -c" +else + sha256_verify="sha256sum -c" + sha512_verify="sha512sum -c" +fi + +fetch_archive() { + local dist_name=$1 + download_rc_file ${dist_name}.tar.gz + download_rc_file ${dist_name}.tar.gz.asc + download_rc_file ${dist_name}.tar.gz.sha256 + download_rc_file ${dist_name}.tar.gz.sha512 + gpg --verify ${dist_name}.tar.gz.asc ${dist_name}.tar.gz + ${sha256_verify} ${dist_name}.tar.gz.sha256 + ${sha512_verify} ${dist_name}.tar.gz.sha512 +} + +setup_tempdir() { + cleanup() { + if [ "${TEST_SUCCESS}" = "yes" ]; then + rm -fr "${ARROW_TMPDIR}" + else + echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details." + fi + } + + if [ -z "${ARROW_TMPDIR}" ]; then + # clean up automatically if ARROW_TMPDIR is not defined + ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX") + trap cleanup EXIT + else + # don't clean up automatically + mkdir -p "${ARROW_TMPDIR}" + fi +} + +test_source_distribution() { + # install rust toolchain in a similar fashion like test-miniconda + export RUSTUP_HOME=$PWD/test-rustup + export CARGO_HOME=$PWD/test-rustup + + curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path + + export PATH=$RUSTUP_HOME/bin:$PATH + source $RUSTUP_HOME/env + + # build and test rust + cargo build + cargo test --all --all-features + + # verify that the crate can be published to crates.io + cargo publish --dry-run +} + +TEST_SUCCESS=no + +setup_tempdir "arrow-${VERSION}" +echo "Working in sandbox ${ARROW_TMPDIR}" +cd ${ARROW_TMPDIR} + +dist_name="apache-arrow-object-store-rs-${VERSION}" +import_gpg_keys +fetch_archive ${dist_name} +tar xf ${dist_name}.tar.gz +pushd ${dist_name} +test_source_distribution +popd + +TEST_SUCCESS=yes +echo 'Release candidate looks good!' +exit 0 diff --git a/rust/object_store/src/attributes.rs b/rust/object_store/src/attributes.rs new file mode 100644 index 0000000000..cac5b36b5c --- /dev/null +++ b/rust/object_store/src/attributes.rs @@ -0,0 +1,256 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::borrow::Cow; +use std::collections::HashMap; +use std::ops::Deref; + +/// Additional object attribute types +#[non_exhaustive] +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub enum Attribute { + /// Specifies how the object should be handled by a browser + /// + /// See [Content-Disposition](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition) + ContentDisposition, + /// Specifies the encodings applied to the object + /// + /// See [Content-Encoding](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding) + ContentEncoding, + /// Specifies the language of the object + /// + /// See [Content-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language) + ContentLanguage, + /// Specifies the MIME type of the object + /// + /// This takes precedence over any [ClientOptions](crate::ClientOptions) configuration + /// + /// See [Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type) + ContentType, + /// Overrides cache control policy of the object + /// + /// See [Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control) + CacheControl, + /// Specifies the storage class of the object. + /// + /// See [AWS](https://aws.amazon.com/s3/storage-classes/), + /// [GCP](https://cloud.google.com/storage/docs/storage-classes), and + /// [Azure](https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-tier). + /// `StorageClass` is used as the name for this attribute because 2 of the 3 storage providers + /// use that name + StorageClass, + /// Specifies a user-defined metadata field for the object + /// + /// The String is a user-defined key + Metadata(Cow<'static, str>), +} + +/// The value of an [`Attribute`] +/// +/// Provides efficient conversion from both static and owned strings +/// +/// ``` +/// # use object_store::AttributeValue; +/// // Can use static strings without needing an allocation +/// let value = AttributeValue::from("bar"); +/// // Can also store owned strings +/// let value = AttributeValue::from("foo".to_string()); +/// ``` +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub struct AttributeValue(Cow<'static, str>); + +impl AsRef for AttributeValue { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl From<&'static str> for AttributeValue { + fn from(value: &'static str) -> Self { + Self(Cow::Borrowed(value)) + } +} + +impl From for AttributeValue { + fn from(value: String) -> Self { + Self(Cow::Owned(value)) + } +} + +impl Deref for AttributeValue { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +/// Additional attributes of an object +/// +/// Attributes can be specified in [PutOptions](crate::PutOptions) and retrieved +/// from APIs returning [GetResult](crate::GetResult). +/// +/// Unlike [`ObjectMeta`](crate::ObjectMeta), [`Attributes`] are not returned by +/// listing APIs +#[derive(Debug, Default, Eq, PartialEq, Clone)] +pub struct Attributes(HashMap); + +impl Attributes { + /// Create a new empty [`Attributes`] + pub fn new() -> Self { + Self::default() + } + + /// Create a new [`Attributes`] with space for `capacity` [`Attribute`] + pub fn with_capacity(capacity: usize) -> Self { + Self(HashMap::with_capacity(capacity)) + } + + /// Insert a new [`Attribute`], [`AttributeValue`] pair + /// + /// Returns the previous value for `key` if any + pub fn insert(&mut self, key: Attribute, value: AttributeValue) -> Option { + self.0.insert(key, value) + } + + /// Returns the [`AttributeValue`] for `key` if any + pub fn get(&self, key: &Attribute) -> Option<&AttributeValue> { + self.0.get(key) + } + + /// Removes the [`AttributeValue`] for `key` if any + pub fn remove(&mut self, key: &Attribute) -> Option { + self.0.remove(key) + } + + /// Returns an [`AttributesIter`] over this + pub fn iter(&self) -> AttributesIter<'_> { + self.into_iter() + } + + /// Returns the number of [`Attribute`] in this collection + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns true if this contains no [`Attribute`] + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl FromIterator<(K, V)> for Attributes +where + K: Into, + V: Into, +{ + fn from_iter>(iter: T) -> Self { + Self( + iter.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(), + ) + } +} + +impl<'a> IntoIterator for &'a Attributes { + type Item = (&'a Attribute, &'a AttributeValue); + type IntoIter = AttributesIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + AttributesIter(self.0.iter()) + } +} + +/// Iterator over [`Attributes`] +#[derive(Debug)] +pub struct AttributesIter<'a>(std::collections::hash_map::Iter<'a, Attribute, AttributeValue>); + +impl<'a> Iterator for AttributesIter<'a> { + type Item = (&'a Attribute, &'a AttributeValue); + + fn next(&mut self) -> Option { + self.0.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_attributes_basic() { + let mut attributes = Attributes::from_iter([ + (Attribute::ContentDisposition, "inline"), + (Attribute::ContentEncoding, "gzip"), + (Attribute::ContentLanguage, "en-US"), + (Attribute::ContentType, "test"), + (Attribute::CacheControl, "control"), + (Attribute::Metadata("key1".into()), "value1"), + ]); + + assert!(!attributes.is_empty()); + assert_eq!(attributes.len(), 6); + + assert_eq!( + attributes.get(&Attribute::ContentType), + Some(&"test".into()) + ); + + let metav = "control".into(); + assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav)); + assert_eq!( + attributes.insert(Attribute::CacheControl, "v1".into()), + Some(metav) + ); + assert_eq!(attributes.len(), 6); + + assert_eq!( + attributes.remove(&Attribute::CacheControl).unwrap(), + "v1".into() + ); + assert_eq!(attributes.len(), 5); + + let metav: AttributeValue = "v2".into(); + attributes.insert(Attribute::CacheControl, metav.clone()); + assert_eq!(attributes.get(&Attribute::CacheControl), Some(&metav)); + assert_eq!(attributes.len(), 6); + + assert_eq!( + attributes.get(&Attribute::ContentDisposition), + Some(&"inline".into()) + ); + assert_eq!( + attributes.get(&Attribute::ContentEncoding), + Some(&"gzip".into()) + ); + assert_eq!( + attributes.get(&Attribute::ContentLanguage), + Some(&"en-US".into()) + ); + assert_eq!( + attributes.get(&Attribute::Metadata("key1".into())), + Some(&"value1".into()) + ); + } +} diff --git a/rust/object_store/src/aws/builder.rs b/rust/object_store/src/aws/builder.rs new file mode 100644 index 0000000000..6e6f8e2fb0 --- /dev/null +++ b/rust/object_store/src/aws/builder.rs @@ -0,0 +1,1737 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::aws::client::{S3Client, S3Config}; +use crate::aws::credential::{ + EKSPodCredentialProvider, InstanceCredentialProvider, SessionProvider, TaskCredentialProvider, + WebIdentityProvider, +}; +use crate::aws::{ + AmazonS3, AwsCredential, AwsCredentialProvider, Checksum, S3ConditionalPut, S3CopyIfNotExists, + STORE, +}; +use crate::client::{http_connector, HttpConnector, TokenCredentialProvider}; +use crate::config::ConfigValue; +use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider}; +use base64::prelude::BASE64_STANDARD; +use base64::Engine; +use itertools::Itertools; +use md5::{Digest, Md5}; +use reqwest::header::{HeaderMap, HeaderValue}; +use serde::{Deserialize, Serialize}; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; +use tracing::debug; +use url::Url; + +/// Default metadata endpoint +static DEFAULT_METADATA_ENDPOINT: &str = "http://169.254.169.254"; + +/// A specialized `Error` for object store-related errors +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Missing bucket name")] + MissingBucketName, + + #[error("Missing AccessKeyId")] + MissingAccessKeyId, + + #[error("Missing SecretAccessKey")] + MissingSecretAccessKey, + + #[error("Unable parse source url. Url: {}, Error: {}", url, source)] + UnableToParseUrl { + source: url::ParseError, + url: String, + }, + + #[error( + "Unknown url scheme cannot be parsed into storage location: {}", + scheme + )] + UnknownUrlScheme { scheme: String }, + + #[error("URL did not match any known pattern for scheme: {}", url)] + UrlNotRecognised { url: String }, + + #[error("Configuration key: '{}' is not known.", key)] + UnknownConfigurationKey { key: String }, + + #[error("Invalid Zone suffix for bucket '{bucket}'")] + ZoneSuffix { bucket: String }, + + #[error("Invalid encryption type: {}. Valid values are \"AES256\", \"sse:kms\", \"sse:kms:dsse\" and \"sse-c\".", passed)] + InvalidEncryptionType { passed: String }, + + #[error( + "Invalid encryption header values. Header: {}, source: {}", + header, + source + )] + InvalidEncryptionHeader { + header: &'static str, + source: Box, + }, +} + +impl From for crate::Error { + fn from(source: Error) -> Self { + match source { + Error::UnknownConfigurationKey { key } => { + Self::UnknownConfigurationKey { store: STORE, key } + } + _ => Self::Generic { + store: STORE, + source: Box::new(source), + }, + } + } +} + +/// Configure a connection to Amazon S3 using the specified credentials in +/// the specified Amazon region and bucket. +/// +/// # Example +/// ``` +/// # let REGION = "foo"; +/// # let BUCKET_NAME = "foo"; +/// # let ACCESS_KEY_ID = "foo"; +/// # let SECRET_KEY = "foo"; +/// # use object_store::aws::AmazonS3Builder; +/// let s3 = AmazonS3Builder::new() +/// .with_region(REGION) +/// .with_bucket_name(BUCKET_NAME) +/// .with_access_key_id(ACCESS_KEY_ID) +/// .with_secret_access_key(SECRET_KEY) +/// .build(); +/// ``` +#[derive(Debug, Default, Clone)] +pub struct AmazonS3Builder { + /// Access key id + access_key_id: Option, + /// Secret access_key + secret_access_key: Option, + /// Region + region: Option, + /// Bucket name + bucket_name: Option, + /// Endpoint for communicating with AWS S3 + endpoint: Option, + /// Token to use for requests + token: Option, + /// Url + url: Option, + /// Retry config + retry_config: RetryConfig, + /// When set to true, fallback to IMDSv1 + imdsv1_fallback: ConfigValue, + /// When set to true, virtual hosted style request has to be used + virtual_hosted_style_request: ConfigValue, + /// When set to true, S3 express is used + s3_express: ConfigValue, + /// When set to true, unsigned payload option has to be used + unsigned_payload: ConfigValue, + /// Checksum algorithm which has to be used for object integrity check during upload + checksum_algorithm: Option>, + /// Metadata endpoint, see + metadata_endpoint: Option, + /// Container credentials URL, see + container_credentials_relative_uri: Option, + /// Container credentials full URL, see + container_credentials_full_uri: Option, + /// Container authorization token file, see + container_authorization_token_file: Option, + /// Web identity token file path for AssumeRoleWithWebIdentity + web_identity_token_file: Option, + /// Role ARN to assume when using web identity token + role_arn: Option, + /// Session name for web identity role assumption + role_session_name: Option, + /// Custom STS endpoint for web identity token exchange + sts_endpoint: Option, + /// Client options + client_options: ClientOptions, + /// Credentials + credentials: Option, + /// Skip signing requests + skip_signature: ConfigValue, + /// Copy if not exists + copy_if_not_exists: Option>, + /// Put precondition + conditional_put: ConfigValue, + /// Ignore tags + disable_tagging: ConfigValue, + /// Encryption (See [`S3EncryptionConfigKey`]) + encryption_type: Option>, + encryption_kms_key_id: Option, + encryption_bucket_key_enabled: Option>, + /// base64-encoded 256-bit customer encryption key for SSE-C. + encryption_customer_key_base64: Option, + /// When set to true, charge requester for bucket operations + request_payer: ConfigValue, + /// The [`HttpConnector`] to use + http_connector: Option>, +} + +/// Configuration keys for [`AmazonS3Builder`] +/// +/// Configuration via keys can be done via [`AmazonS3Builder::with_config`] +/// +/// # Example +/// ``` +/// # use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey}; +/// let builder = AmazonS3Builder::new() +/// .with_config("aws_access_key_id".parse().unwrap(), "my-access-key-id") +/// .with_config(AmazonS3ConfigKey::DefaultRegion, "my-default-region"); +/// ``` +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)] +#[non_exhaustive] +pub enum AmazonS3ConfigKey { + /// AWS Access Key + /// + /// See [`AmazonS3Builder::with_access_key_id`] for details. + /// + /// Supported keys: + /// - `aws_access_key_id` + /// - `access_key_id` + AccessKeyId, + + /// Secret Access Key + /// + /// See [`AmazonS3Builder::with_secret_access_key`] for details. + /// + /// Supported keys: + /// - `aws_secret_access_key` + /// - `secret_access_key` + SecretAccessKey, + + /// Region + /// + /// See [`AmazonS3Builder::with_region`] for details. + /// + /// Supported keys: + /// - `aws_region` + /// - `region` + Region, + + /// Default region + /// + /// See [`AmazonS3Builder::with_region`] for details. + /// + /// Supported keys: + /// - `aws_default_region` + /// - `default_region` + DefaultRegion, + + /// Bucket name + /// + /// See [`AmazonS3Builder::with_bucket_name`] for details. + /// + /// Supported keys: + /// - `aws_bucket` + /// - `aws_bucket_name` + /// - `bucket` + /// - `bucket_name` + Bucket, + + /// Sets custom endpoint for communicating with AWS S3. + /// + /// See [`AmazonS3Builder::with_endpoint`] for details. + /// + /// Supported keys: + /// - `aws_endpoint` + /// - `aws_endpoint_url` + /// - `endpoint` + /// - `endpoint_url` + Endpoint, + + /// Token to use for requests (passed to underlying provider) + /// + /// See [`AmazonS3Builder::with_token`] for details. + /// + /// Supported keys: + /// - `aws_session_token` + /// - `aws_token` + /// - `session_token` + /// - `token` + Token, + + /// Fall back to ImdsV1 + /// + /// See [`AmazonS3Builder::with_imdsv1_fallback`] for details. + /// + /// Supported keys: + /// - `aws_imdsv1_fallback` + /// - `imdsv1_fallback` + ImdsV1Fallback, + + /// If virtual hosted style request has to be used + /// + /// See [`AmazonS3Builder::with_virtual_hosted_style_request`] for details. + /// + /// Supported keys: + /// - `aws_virtual_hosted_style_request` + /// - `virtual_hosted_style_request` + VirtualHostedStyleRequest, + + /// Avoid computing payload checksum when calculating signature. + /// + /// See [`AmazonS3Builder::with_unsigned_payload`] for details. + /// + /// Supported keys: + /// - `aws_unsigned_payload` + /// - `unsigned_payload` + UnsignedPayload, + + /// Set the checksum algorithm for this client + /// + /// See [`AmazonS3Builder::with_checksum_algorithm`] + Checksum, + + /// Set the instance metadata endpoint + /// + /// See [`AmazonS3Builder::with_metadata_endpoint`] for details. + /// + /// Supported keys: + /// - `aws_metadata_endpoint` + /// - `metadata_endpoint` + MetadataEndpoint, + + /// Set the container credentials relative URI when used in ECS + /// + /// + ContainerCredentialsRelativeUri, + + /// Set the container credentials full URI when used in EKS + /// + /// + ContainerCredentialsFullUri, + + /// Set the authorization token in plain text when used in EKS to authenticate with ContainerCredentialsFullUri + /// + /// + ContainerAuthorizationTokenFile, + + /// Web identity token file path for AssumeRoleWithWebIdentity + /// + /// Supported keys: + /// - `aws_web_identity_token_file` + /// - `web_identity_token_file` + WebIdentityTokenFile, + + /// Role ARN to assume when using web identity token + /// + /// Supported keys: + /// - `aws_role_arn` + /// - `role_arn` + RoleArn, + + /// Session name for web identity role assumption + /// + /// Supported keys: + /// - `aws_role_session_name` + /// - `role_session_name` + RoleSessionName, + + /// Custom STS endpoint for web identity token exchange + /// + /// Supported keys: + /// - `aws_endpoint_url_sts` + /// - `endpoint_url_sts` + StsEndpoint, + + /// Configure how to provide `copy_if_not_exists` + /// + /// See [`S3CopyIfNotExists`] + CopyIfNotExists, + + /// Configure how to provide conditional put operations + /// + /// See [`S3ConditionalPut`] + ConditionalPut, + + /// Skip signing request + SkipSignature, + + /// Disable tagging objects + /// + /// This can be desirable if not supported by the backing store + /// + /// Supported keys: + /// - `aws_disable_tagging` + /// - `disable_tagging` + DisableTagging, + + /// Enable Support for S3 Express One Zone + /// + /// Supported keys: + /// - `aws_s3_express` + /// - `s3_express` + S3Express, + + /// Enable Support for S3 Requester Pays + /// + /// Supported keys: + /// - `aws_request_payer` + /// - `request_payer` + RequestPayer, + + /// Client options + Client(ClientConfigKey), + + /// Encryption options + Encryption(S3EncryptionConfigKey), +} + +impl AsRef for AmazonS3ConfigKey { + fn as_ref(&self) -> &str { + match self { + Self::AccessKeyId => "aws_access_key_id", + Self::SecretAccessKey => "aws_secret_access_key", + Self::Region => "aws_region", + Self::Bucket => "aws_bucket", + Self::Endpoint => "aws_endpoint", + Self::Token => "aws_session_token", + Self::ImdsV1Fallback => "aws_imdsv1_fallback", + Self::VirtualHostedStyleRequest => "aws_virtual_hosted_style_request", + Self::S3Express => "aws_s3_express", + Self::DefaultRegion => "aws_default_region", + Self::MetadataEndpoint => "aws_metadata_endpoint", + Self::UnsignedPayload => "aws_unsigned_payload", + Self::Checksum => "aws_checksum_algorithm", + Self::ContainerCredentialsRelativeUri => "aws_container_credentials_relative_uri", + Self::ContainerCredentialsFullUri => "aws_container_credentials_full_uri", + Self::ContainerAuthorizationTokenFile => "aws_container_authorization_token_file", + Self::WebIdentityTokenFile => "aws_web_identity_token_file", + Self::RoleArn => "aws_role_arn", + Self::RoleSessionName => "aws_role_session_name", + Self::StsEndpoint => "aws_endpoint_url_sts", + Self::SkipSignature => "aws_skip_signature", + Self::CopyIfNotExists => "aws_copy_if_not_exists", + Self::ConditionalPut => "aws_conditional_put", + Self::DisableTagging => "aws_disable_tagging", + Self::RequestPayer => "aws_request_payer", + Self::Client(opt) => opt.as_ref(), + Self::Encryption(opt) => opt.as_ref(), + } + } +} + +impl FromStr for AmazonS3ConfigKey { + type Err = crate::Error; + + fn from_str(s: &str) -> Result { + match s { + "aws_access_key_id" | "access_key_id" => Ok(Self::AccessKeyId), + "aws_secret_access_key" | "secret_access_key" => Ok(Self::SecretAccessKey), + "aws_default_region" | "default_region" => Ok(Self::DefaultRegion), + "aws_region" | "region" => Ok(Self::Region), + "aws_bucket" | "aws_bucket_name" | "bucket_name" | "bucket" => Ok(Self::Bucket), + "aws_endpoint_url" | "aws_endpoint" | "endpoint_url" | "endpoint" => Ok(Self::Endpoint), + "aws_session_token" | "aws_token" | "session_token" | "token" => Ok(Self::Token), + "aws_virtual_hosted_style_request" | "virtual_hosted_style_request" => { + Ok(Self::VirtualHostedStyleRequest) + } + "aws_s3_express" | "s3_express" => Ok(Self::S3Express), + "aws_imdsv1_fallback" | "imdsv1_fallback" => Ok(Self::ImdsV1Fallback), + "aws_metadata_endpoint" | "metadata_endpoint" => Ok(Self::MetadataEndpoint), + "aws_unsigned_payload" | "unsigned_payload" => Ok(Self::UnsignedPayload), + "aws_checksum_algorithm" | "checksum_algorithm" => Ok(Self::Checksum), + "aws_container_credentials_relative_uri" => Ok(Self::ContainerCredentialsRelativeUri), + "aws_container_credentials_full_uri" => Ok(Self::ContainerCredentialsFullUri), + "aws_container_authorization_token_file" => Ok(Self::ContainerAuthorizationTokenFile), + "aws_web_identity_token_file" | "web_identity_token_file" => { + Ok(Self::WebIdentityTokenFile) + } + "aws_role_arn" | "role_arn" => Ok(Self::RoleArn), + "aws_role_session_name" | "role_session_name" => Ok(Self::RoleSessionName), + "aws_endpoint_url_sts" | "endpoint_url_sts" => Ok(Self::StsEndpoint), + "aws_skip_signature" | "skip_signature" => Ok(Self::SkipSignature), + "aws_copy_if_not_exists" | "copy_if_not_exists" => Ok(Self::CopyIfNotExists), + "aws_conditional_put" | "conditional_put" => Ok(Self::ConditionalPut), + "aws_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging), + "aws_request_payer" | "request_payer" => Ok(Self::RequestPayer), + // Backwards compatibility + "aws_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)), + "aws_server_side_encryption" => Ok(Self::Encryption( + S3EncryptionConfigKey::ServerSideEncryption, + )), + "aws_sse_kms_key_id" => Ok(Self::Encryption(S3EncryptionConfigKey::KmsKeyId)), + "aws_sse_bucket_key_enabled" => { + Ok(Self::Encryption(S3EncryptionConfigKey::BucketKeyEnabled)) + } + "aws_sse_customer_key_base64" => Ok(Self::Encryption( + S3EncryptionConfigKey::CustomerEncryptionKey, + )), + _ => match s.strip_prefix("aws_").unwrap_or(s).parse() { + Ok(key) => Ok(Self::Client(key)), + Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()), + }, + } + } +} + +impl AmazonS3Builder { + /// Create a new [`AmazonS3Builder`] with default values. + pub fn new() -> Self { + Default::default() + } + + /// Fill the [`AmazonS3Builder`] with regular AWS environment variables + /// + /// All environment variables starting with `AWS_` will be evaluated. Names must + /// match acceptable input to [`AmazonS3ConfigKey::from_str`]. Only upper-case environment + /// variables are accepted. + /// + /// Some examples of variables extracted from environment: + /// * `AWS_ACCESS_KEY_ID` -> access_key_id + /// * `AWS_SECRET_ACCESS_KEY` -> secret_access_key + /// * `AWS_DEFAULT_REGION` -> region + /// * `AWS_ENDPOINT` -> endpoint + /// * `AWS_SESSION_TOKEN` -> token + /// * `AWS_WEB_IDENTITY_TOKEN_FILE` -> path to file containing web identity token for AssumeRoleWithWebIdentity + /// * `AWS_ROLE_ARN` -> ARN of the role to assume when using web identity token + /// * `AWS_ROLE_SESSION_NAME` -> optional session name for web identity role assumption (defaults to "WebIdentitySession") + /// * `AWS_ENDPOINT_URL_STS` -> optional custom STS endpoint for web identity token exchange (defaults to "https://sts.{region}.amazonaws.com") + /// * `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> + /// * `AWS_CONTAINER_CREDENTIALS_FULL_URI` -> + /// * `AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE` -> + /// * `AWS_ALLOW_HTTP` -> set to "true" to permit HTTP connections without TLS + /// * `AWS_REQUEST_PAYER` -> set to "true" to permit operations on requester-pays buckets. + /// # Example + /// ``` + /// use object_store::aws::AmazonS3Builder; + /// + /// let s3 = AmazonS3Builder::from_env() + /// .with_bucket_name("foo") + /// .build(); + /// ``` + pub fn from_env() -> Self { + let mut builder: Self = Default::default(); + + for (os_key, os_value) in std::env::vars_os() { + if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { + if key.starts_with("AWS_") { + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + builder = builder.with_config(config_key, value); + } + } + } + } + + builder + } + + /// Parse available connection info form a well-known storage URL. + /// + /// The supported url schemes are: + /// + /// - `s3:///` + /// - `s3a:///` + /// - `https://s3..amazonaws.com/` + /// - `https://.s3..amazonaws.com` + /// - `https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket` + /// + /// Note: Settings derived from the URL will override any others set on this builder + /// + /// # Example + /// ``` + /// use object_store::aws::AmazonS3Builder; + /// + /// let s3 = AmazonS3Builder::from_env() + /// .with_url("s3://bucket/path") + /// .build(); + /// ``` + pub fn with_url(mut self, url: impl Into) -> Self { + self.url = Some(url.into()); + self + } + + /// Set an option on the builder via a key - value pair. + pub fn with_config(mut self, key: AmazonS3ConfigKey, value: impl Into) -> Self { + match key { + AmazonS3ConfigKey::AccessKeyId => self.access_key_id = Some(value.into()), + AmazonS3ConfigKey::SecretAccessKey => self.secret_access_key = Some(value.into()), + AmazonS3ConfigKey::Region => self.region = Some(value.into()), + AmazonS3ConfigKey::Bucket => self.bucket_name = Some(value.into()), + AmazonS3ConfigKey::Endpoint => self.endpoint = Some(value.into()), + AmazonS3ConfigKey::Token => self.token = Some(value.into()), + AmazonS3ConfigKey::ImdsV1Fallback => self.imdsv1_fallback.parse(value), + AmazonS3ConfigKey::VirtualHostedStyleRequest => { + self.virtual_hosted_style_request.parse(value) + } + AmazonS3ConfigKey::S3Express => self.s3_express.parse(value), + AmazonS3ConfigKey::DefaultRegion => { + self.region = self.region.or_else(|| Some(value.into())) + } + AmazonS3ConfigKey::MetadataEndpoint => self.metadata_endpoint = Some(value.into()), + AmazonS3ConfigKey::UnsignedPayload => self.unsigned_payload.parse(value), + AmazonS3ConfigKey::Checksum => { + self.checksum_algorithm = Some(ConfigValue::Deferred(value.into())) + } + AmazonS3ConfigKey::ContainerCredentialsRelativeUri => { + self.container_credentials_relative_uri = Some(value.into()) + } + AmazonS3ConfigKey::ContainerCredentialsFullUri => { + self.container_credentials_full_uri = Some(value.into()); + } + AmazonS3ConfigKey::ContainerAuthorizationTokenFile => { + self.container_authorization_token_file = Some(value.into()); + } + AmazonS3ConfigKey::WebIdentityTokenFile => { + self.web_identity_token_file = Some(value.into()); + } + AmazonS3ConfigKey::RoleArn => { + self.role_arn = Some(value.into()); + } + AmazonS3ConfigKey::RoleSessionName => { + self.role_session_name = Some(value.into()); + } + AmazonS3ConfigKey::StsEndpoint => { + self.sts_endpoint = Some(value.into()); + } + AmazonS3ConfigKey::Client(key) => { + self.client_options = self.client_options.with_config(key, value) + } + AmazonS3ConfigKey::SkipSignature => self.skip_signature.parse(value), + AmazonS3ConfigKey::DisableTagging => self.disable_tagging.parse(value), + AmazonS3ConfigKey::CopyIfNotExists => { + self.copy_if_not_exists = Some(ConfigValue::Deferred(value.into())) + } + AmazonS3ConfigKey::ConditionalPut => { + self.conditional_put = ConfigValue::Deferred(value.into()) + } + AmazonS3ConfigKey::RequestPayer => { + self.request_payer = ConfigValue::Deferred(value.into()) + } + AmazonS3ConfigKey::Encryption(key) => match key { + S3EncryptionConfigKey::ServerSideEncryption => { + self.encryption_type = Some(ConfigValue::Deferred(value.into())) + } + S3EncryptionConfigKey::KmsKeyId => self.encryption_kms_key_id = Some(value.into()), + S3EncryptionConfigKey::BucketKeyEnabled => { + self.encryption_bucket_key_enabled = Some(ConfigValue::Deferred(value.into())) + } + S3EncryptionConfigKey::CustomerEncryptionKey => { + self.encryption_customer_key_base64 = Some(value.into()) + } + }, + }; + self + } + + /// Get config value via a [`AmazonS3ConfigKey`]. + /// + /// # Example + /// ``` + /// use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey}; + /// + /// let builder = AmazonS3Builder::from_env() + /// .with_bucket_name("foo"); + /// let bucket_name = builder.get_config_value(&AmazonS3ConfigKey::Bucket).unwrap_or_default(); + /// assert_eq!("foo", &bucket_name); + /// ``` + pub fn get_config_value(&self, key: &AmazonS3ConfigKey) -> Option { + match key { + AmazonS3ConfigKey::AccessKeyId => self.access_key_id.clone(), + AmazonS3ConfigKey::SecretAccessKey => self.secret_access_key.clone(), + AmazonS3ConfigKey::Region | AmazonS3ConfigKey::DefaultRegion => self.region.clone(), + AmazonS3ConfigKey::Bucket => self.bucket_name.clone(), + AmazonS3ConfigKey::Endpoint => self.endpoint.clone(), + AmazonS3ConfigKey::Token => self.token.clone(), + AmazonS3ConfigKey::ImdsV1Fallback => Some(self.imdsv1_fallback.to_string()), + AmazonS3ConfigKey::VirtualHostedStyleRequest => { + Some(self.virtual_hosted_style_request.to_string()) + } + AmazonS3ConfigKey::S3Express => Some(self.s3_express.to_string()), + AmazonS3ConfigKey::MetadataEndpoint => self.metadata_endpoint.clone(), + AmazonS3ConfigKey::UnsignedPayload => Some(self.unsigned_payload.to_string()), + AmazonS3ConfigKey::Checksum => { + self.checksum_algorithm.as_ref().map(ToString::to_string) + } + AmazonS3ConfigKey::Client(key) => self.client_options.get_config_value(key), + AmazonS3ConfigKey::ContainerCredentialsRelativeUri => { + self.container_credentials_relative_uri.clone() + } + AmazonS3ConfigKey::ContainerCredentialsFullUri => { + self.container_credentials_full_uri.clone() + } + AmazonS3ConfigKey::ContainerAuthorizationTokenFile => { + self.container_authorization_token_file.clone() + } + AmazonS3ConfigKey::WebIdentityTokenFile => self.web_identity_token_file.clone(), + AmazonS3ConfigKey::RoleArn => self.role_arn.clone(), + AmazonS3ConfigKey::RoleSessionName => self.role_session_name.clone(), + AmazonS3ConfigKey::StsEndpoint => self.sts_endpoint.clone(), + AmazonS3ConfigKey::SkipSignature => Some(self.skip_signature.to_string()), + AmazonS3ConfigKey::CopyIfNotExists => { + self.copy_if_not_exists.as_ref().map(ToString::to_string) + } + AmazonS3ConfigKey::ConditionalPut => Some(self.conditional_put.to_string()), + AmazonS3ConfigKey::DisableTagging => Some(self.disable_tagging.to_string()), + AmazonS3ConfigKey::RequestPayer => Some(self.request_payer.to_string()), + AmazonS3ConfigKey::Encryption(key) => match key { + S3EncryptionConfigKey::ServerSideEncryption => { + self.encryption_type.as_ref().map(ToString::to_string) + } + S3EncryptionConfigKey::KmsKeyId => self.encryption_kms_key_id.clone(), + S3EncryptionConfigKey::BucketKeyEnabled => self + .encryption_bucket_key_enabled + .as_ref() + .map(ToString::to_string), + S3EncryptionConfigKey::CustomerEncryptionKey => { + self.encryption_customer_key_base64.clone() + } + }, + } + } + + /// Sets properties on this builder based on a URL + /// + /// This is a separate member function to allow fallible computation to + /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`] + fn parse_url(&mut self, url: &str) -> Result<()> { + let parsed = Url::parse(url).map_err(|source| { + let url = url.into(); + Error::UnableToParseUrl { url, source } + })?; + + let host = parsed + .host_str() + .ok_or_else(|| Error::UrlNotRecognised { url: url.into() })?; + + match parsed.scheme() { + "s3" | "s3a" => self.bucket_name = Some(host.to_string()), + "https" => match host.splitn(4, '.').collect_tuple() { + Some(("s3", region, "amazonaws", "com")) => { + self.region = Some(region.to_string()); + let bucket = parsed.path_segments().into_iter().flatten().next(); + if let Some(bucket) = bucket { + self.bucket_name = Some(bucket.into()); + } + } + Some((bucket, "s3", region, "amazonaws.com")) => { + self.bucket_name = Some(bucket.to_string()); + self.region = Some(region.to_string()); + self.virtual_hosted_style_request = true.into(); + } + Some((account, "r2", "cloudflarestorage", "com")) => { + self.region = Some("auto".to_string()); + let endpoint = format!("https://{account}.r2.cloudflarestorage.com"); + self.endpoint = Some(endpoint); + + let bucket = parsed.path_segments().into_iter().flatten().next(); + if let Some(bucket) = bucket { + self.bucket_name = Some(bucket.into()); + } + } + _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()), + }, + scheme => { + let scheme = scheme.into(); + return Err(Error::UnknownUrlScheme { scheme }.into()); + } + }; + Ok(()) + } + + /// Set the AWS Access Key + pub fn with_access_key_id(mut self, access_key_id: impl Into) -> Self { + self.access_key_id = Some(access_key_id.into()); + self + } + + /// Set the AWS Secret Access Key + pub fn with_secret_access_key(mut self, secret_access_key: impl Into) -> Self { + self.secret_access_key = Some(secret_access_key.into()); + self + } + + /// Set the AWS Session Token to use for requests + pub fn with_token(mut self, token: impl Into) -> Self { + self.token = Some(token.into()); + self + } + + /// Set the region, defaults to `us-east-1` + pub fn with_region(mut self, region: impl Into) -> Self { + self.region = Some(region.into()); + self + } + + /// Set the bucket_name (required) + pub fn with_bucket_name(mut self, bucket_name: impl Into) -> Self { + self.bucket_name = Some(bucket_name.into()); + self + } + + /// Sets the endpoint for communicating with AWS S3, defaults to the [region endpoint] + /// + /// For example, this might be set to `"http://localhost:4566:` + /// for testing against a localstack instance. + /// + /// The `endpoint` field should be consistent with [`Self::with_virtual_hosted_style_request`], + /// i.e. if `virtual_hosted_style_request` is set to true then `endpoint` + /// should have the bucket name included. + /// + /// By default, only HTTPS schemes are enabled. To connect to an HTTP endpoint, enable + /// [`Self::with_allow_http`]. + /// + /// [region endpoint]: https://docs.aws.amazon.com/general/latest/gr/s3.html + pub fn with_endpoint(mut self, endpoint: impl Into) -> Self { + self.endpoint = Some(endpoint.into()); + self + } + + /// Set the credential provider overriding any other options + pub fn with_credentials(mut self, credentials: AwsCredentialProvider) -> Self { + self.credentials = Some(credentials); + self + } + + /// Sets what protocol is allowed. If `allow_http` is : + /// * false (default): Only HTTPS are allowed + /// * true: HTTP and HTTPS are allowed + pub fn with_allow_http(mut self, allow_http: bool) -> Self { + self.client_options = self.client_options.with_allow_http(allow_http); + self + } + + /// Sets if virtual hosted style request has to be used. + /// + /// If `virtual_hosted_style_request` is: + /// * false (default): Path style request is used + /// * true: Virtual hosted style request is used + /// + /// If the `endpoint` is provided then it should be + /// consistent with `virtual_hosted_style_request`. + /// i.e. if `virtual_hosted_style_request` is set to true + /// then `endpoint` should have bucket name included. + pub fn with_virtual_hosted_style_request(mut self, virtual_hosted_style_request: bool) -> Self { + self.virtual_hosted_style_request = virtual_hosted_style_request.into(); + self + } + + /// Configure this as an S3 Express One Zone Bucket + pub fn with_s3_express(mut self, s3_express: bool) -> Self { + self.s3_express = s3_express.into(); + self + } + + /// Set the retry configuration + pub fn with_retry(mut self, retry_config: RetryConfig) -> Self { + self.retry_config = retry_config; + self + } + + /// By default instance credentials will only be fetched over [IMDSv2], as AWS recommends + /// against having IMDSv1 enabled on EC2 instances as it is vulnerable to [SSRF attack] + /// + /// However, certain deployment environments, such as those running old versions of kube2iam, + /// may not support IMDSv2. This option will enable automatic fallback to using IMDSv1 + /// if the token endpoint returns a 403 error indicating that IMDSv2 is not supported. + /// + /// This option has no effect if not using instance credentials + /// + /// [IMDSv2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html + /// [SSRF attack]: https://aws.amazon.com/blogs/security/defense-in-depth-open-firewalls-reverse-proxies-ssrf-vulnerabilities-ec2-instance-metadata-service/ + /// + pub fn with_imdsv1_fallback(mut self) -> Self { + self.imdsv1_fallback = true.into(); + self + } + + /// Sets if unsigned payload option has to be used. + /// See [unsigned payload option](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html) + /// * false (default): Signed payload option is used, where the checksum for the request body is computed and included when constructing a canonical request. + /// * true: Unsigned payload option is used. `UNSIGNED-PAYLOAD` literal is included when constructing a canonical request, + pub fn with_unsigned_payload(mut self, unsigned_payload: bool) -> Self { + self.unsigned_payload = unsigned_payload.into(); + self + } + + /// If enabled, [`AmazonS3`] will not fetch credentials and will not sign requests + /// + /// This can be useful when interacting with public S3 buckets that deny authorized requests + pub fn with_skip_signature(mut self, skip_signature: bool) -> Self { + self.skip_signature = skip_signature.into(); + self + } + + /// Sets the [checksum algorithm] which has to be used for object integrity check during upload. + /// + /// [checksum algorithm]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html + pub fn with_checksum_algorithm(mut self, checksum_algorithm: Checksum) -> Self { + // Convert to String to enable deferred parsing of config + self.checksum_algorithm = Some(checksum_algorithm.into()); + self + } + + /// Set the [instance metadata endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html), + /// used primarily within AWS EC2. + /// + /// This defaults to the IPv4 endpoint: http://169.254.169.254. One can alternatively use the IPv6 + /// endpoint http://fd00:ec2::254. + pub fn with_metadata_endpoint(mut self, endpoint: impl Into) -> Self { + self.metadata_endpoint = Some(endpoint.into()); + self + } + + /// Set the proxy_url to be used by the underlying client + pub fn with_proxy_url(mut self, proxy_url: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_url(proxy_url); + self + } + + /// Set a trusted proxy CA certificate + pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into) -> Self { + self.client_options = self + .client_options + .with_proxy_ca_certificate(proxy_ca_certificate); + self + } + + /// Set a list of hosts to exclude from proxy connections + pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_excludes(proxy_excludes); + self + } + + /// Sets the client options, overriding any already set + pub fn with_client_options(mut self, options: ClientOptions) -> Self { + self.client_options = options; + self + } + + /// Configure how to provide `copy_if_not_exists` + pub fn with_copy_if_not_exists(mut self, config: S3CopyIfNotExists) -> Self { + self.copy_if_not_exists = Some(config.into()); + self + } + + /// Configure how to provide conditional put operations. + /// if not set, the default value will be `S3ConditionalPut::ETagMatch` + pub fn with_conditional_put(mut self, config: S3ConditionalPut) -> Self { + self.conditional_put = config.into(); + self + } + + /// If set to `true` will ignore any tags provided to put_opts + pub fn with_disable_tagging(mut self, ignore: bool) -> Self { + self.disable_tagging = ignore.into(); + self + } + + /// Use SSE-KMS for server side encryption. + pub fn with_sse_kms_encryption(mut self, kms_key_id: impl Into) -> Self { + self.encryption_type = Some(ConfigValue::Parsed(S3EncryptionType::SseKms)); + if let Some(kms_key_id) = kms_key_id.into().into() { + self.encryption_kms_key_id = Some(kms_key_id); + } + self + } + + /// Use dual server side encryption for server side encryption. + pub fn with_dsse_kms_encryption(mut self, kms_key_id: impl Into) -> Self { + self.encryption_type = Some(ConfigValue::Parsed(S3EncryptionType::DsseKms)); + if let Some(kms_key_id) = kms_key_id.into().into() { + self.encryption_kms_key_id = Some(kms_key_id); + } + self + } + + /// Use SSE-C for server side encryption. + /// Must pass the *base64-encoded* 256-bit customer encryption key. + pub fn with_ssec_encryption(mut self, customer_key_base64: impl Into) -> Self { + self.encryption_type = Some(ConfigValue::Parsed(S3EncryptionType::SseC)); + self.encryption_customer_key_base64 = customer_key_base64.into().into(); + self + } + + /// Set whether to enable bucket key for server side encryption. This overrides + /// the bucket default setting for bucket keys. + /// + /// When bucket keys are disabled, each object is encrypted with a unique data key. + /// When bucket keys are enabled, a single data key is used for the entire bucket, + /// reducing overhead of encryption. + pub fn with_bucket_key(mut self, enabled: bool) -> Self { + self.encryption_bucket_key_enabled = Some(ConfigValue::Parsed(enabled)); + self + } + + /// Set whether to charge requester for bucket operations. + /// + /// + pub fn with_request_payer(mut self, enabled: bool) -> Self { + self.request_payer = ConfigValue::Parsed(enabled); + self + } + + /// The [`HttpConnector`] to use + /// + /// On non-WASM32 platforms uses [`reqwest`] by default, on WASM32 platforms must be provided + pub fn with_http_connector(mut self, connector: C) -> Self { + self.http_connector = Some(Arc::new(connector)); + self + } + + /// Create a [`AmazonS3`] instance from the provided values, + /// consuming `self`. + pub fn build(mut self) -> Result { + if let Some(url) = self.url.take() { + self.parse_url(&url)?; + } + + let http = http_connector(self.http_connector)?; + + let bucket = self.bucket_name.ok_or(Error::MissingBucketName)?; + let region = self.region.unwrap_or_else(|| "us-east-1".to_string()); + let checksum = self.checksum_algorithm.map(|x| x.get()).transpose()?; + let copy_if_not_exists = self.copy_if_not_exists.map(|x| x.get()).transpose()?; + + let credentials = if let Some(credentials) = self.credentials { + credentials + } else if self.access_key_id.is_some() || self.secret_access_key.is_some() { + match (self.access_key_id, self.secret_access_key, self.token) { + (Some(key_id), Some(secret_key), token) => { + debug!("Using Static credential provider"); + let credential = AwsCredential { + key_id, + secret_key, + token, + }; + Arc::new(StaticCredentialProvider::new(credential)) as _ + } + (None, Some(_), _) => return Err(Error::MissingAccessKeyId.into()), + (Some(_), None, _) => return Err(Error::MissingSecretAccessKey.into()), + (None, None, _) => unreachable!(), + } + } else if let (Ok(token_path), Ok(role_arn)) = ( + std::env::var("AWS_WEB_IDENTITY_TOKEN_FILE"), + std::env::var("AWS_ROLE_ARN"), + ) { + debug!("Using WebIdentity credential provider"); + + let session_name = self + .role_session_name + .clone() + .unwrap_or_else(|| "WebIdentitySession".to_string()); + + let endpoint = self + .sts_endpoint + .clone() + .unwrap_or_else(|| format!("https://sts.{region}.amazonaws.com")); + + // Disallow non-HTTPs requests + let options = self.client_options.clone().with_allow_http(false); + + let token = WebIdentityProvider { + token_path: token_path.clone(), + session_name, + role_arn: role_arn.clone(), + endpoint, + }; + + Arc::new(TokenCredentialProvider::new( + token, + http.connect(&options)?, + self.retry_config.clone(), + )) as _ + } else if let Some(uri) = self.container_credentials_relative_uri { + debug!("Using Task credential provider"); + + let options = self.client_options.clone().with_allow_http(true); + + Arc::new(TaskCredentialProvider { + url: format!("http://169.254.170.2{uri}"), + retry: self.retry_config.clone(), + // The instance metadata endpoint is access over HTTP + client: http.connect(&options)?, + cache: Default::default(), + }) as _ + } else if let (Some(full_uri), Some(token_file)) = ( + self.container_credentials_full_uri, + self.container_authorization_token_file, + ) { + debug!("Using EKS Pod Identity credential provider"); + + let options = self.client_options.clone().with_allow_http(true); + + Arc::new(EKSPodCredentialProvider { + url: full_uri, + token_file, + retry: self.retry_config.clone(), + client: http.connect(&options)?, + cache: Default::default(), + }) as _ + } else { + debug!("Using Instance credential provider"); + + let token = InstanceCredentialProvider { + imdsv1_fallback: self.imdsv1_fallback.get()?, + metadata_endpoint: self + .metadata_endpoint + .unwrap_or_else(|| DEFAULT_METADATA_ENDPOINT.into()), + }; + + Arc::new(TokenCredentialProvider::new( + token, + http.connect(&self.client_options.metadata_options())?, + self.retry_config.clone(), + )) as _ + }; + + let (session_provider, zonal_endpoint) = match self.s3_express.get()? { + true => { + let zone = parse_bucket_az(&bucket).ok_or_else(|| { + let bucket = bucket.clone(); + Error::ZoneSuffix { bucket } + })?; + + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-Regions-and-Zones.html + let endpoint = format!("https://{bucket}.s3express-{zone}.{region}.amazonaws.com"); + + let session = Arc::new( + TokenCredentialProvider::new( + SessionProvider { + endpoint: endpoint.clone(), + region: region.clone(), + credentials: Arc::clone(&credentials), + }, + http.connect(&self.client_options)?, + self.retry_config.clone(), + ) + .with_min_ttl(Duration::from_secs(60)), // Credentials only valid for 5 minutes + ); + (Some(session as _), Some(endpoint)) + } + false => (None, None), + }; + + // If `endpoint` is provided it's assumed to be consistent with `virtual_hosted_style_request` or `s3_express`. + // For example, if `virtual_hosted_style_request` is true then `endpoint` should have bucket name included. + let virtual_hosted = self.virtual_hosted_style_request.get()?; + let bucket_endpoint = match (&self.endpoint, zonal_endpoint, virtual_hosted) { + (Some(endpoint), _, true) => endpoint.clone(), + (Some(endpoint), _, false) => format!("{}/{}", endpoint.trim_end_matches("/"), bucket), + (None, Some(endpoint), _) => endpoint, + (None, None, true) => format!("https://{bucket}.s3.{region}.amazonaws.com"), + (None, None, false) => format!("https://s3.{region}.amazonaws.com/{bucket}"), + }; + + let encryption_headers = if let Some(encryption_type) = self.encryption_type { + S3EncryptionHeaders::try_new( + &encryption_type.get()?, + self.encryption_kms_key_id, + self.encryption_bucket_key_enabled + .map(|val| val.get()) + .transpose()?, + self.encryption_customer_key_base64, + )? + } else { + S3EncryptionHeaders::default() + }; + + let config = S3Config { + region, + endpoint: self.endpoint, + bucket, + bucket_endpoint, + credentials, + session_provider, + retry_config: self.retry_config, + client_options: self.client_options, + sign_payload: !self.unsigned_payload.get()?, + skip_signature: self.skip_signature.get()?, + disable_tagging: self.disable_tagging.get()?, + checksum, + copy_if_not_exists, + conditional_put: self.conditional_put.get()?, + encryption_headers, + request_payer: self.request_payer.get()?, + }; + + let http_client = http.connect(&config.client_options)?; + let client = Arc::new(S3Client::new(config, http_client)); + + Ok(AmazonS3 { client }) + } +} + +/// Extracts the AZ from a S3 Express One Zone bucket name +/// +/// +fn parse_bucket_az(bucket: &str) -> Option<&str> { + Some(bucket.strip_suffix("--x-s3")?.rsplit_once("--")?.1) +} + +/// Encryption configuration options for S3. +/// +/// These options are used to configure server-side encryption for S3 objects. +/// To configure them, pass them to [`AmazonS3Builder::with_config`]. +/// +/// [SSE-S3]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingServerSideEncryption.html +/// [SSE-KMS]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html +/// [DSSE-KMS]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingDSSEncryption.html +/// [SSE-C]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)] +#[non_exhaustive] +pub enum S3EncryptionConfigKey { + /// Type of encryption to use. If set, must be one of "AES256" (SSE-S3), "aws:kms" (SSE-KMS), "aws:kms:dsse" (DSSE-KMS) or "sse-c". + ServerSideEncryption, + /// The KMS key ID to use for server-side encryption. If set, ServerSideEncryption + /// must be "aws:kms" or "aws:kms:dsse". + KmsKeyId, + /// If set to true, will use the bucket's default KMS key for server-side encryption. + /// If set to false, will disable the use of the bucket's default KMS key for server-side encryption. + BucketKeyEnabled, + + /// The base64 encoded, 256-bit customer encryption key to use for server-side encryption. + /// If set, ServerSideEncryption must be "sse-c". + CustomerEncryptionKey, +} + +impl AsRef for S3EncryptionConfigKey { + fn as_ref(&self) -> &str { + match self { + Self::ServerSideEncryption => "aws_server_side_encryption", + Self::KmsKeyId => "aws_sse_kms_key_id", + Self::BucketKeyEnabled => "aws_sse_bucket_key_enabled", + Self::CustomerEncryptionKey => "aws_sse_customer_key_base64", + } + } +} + +#[derive(Debug, Clone)] +enum S3EncryptionType { + S3, + SseKms, + DsseKms, + SseC, +} + +impl crate::config::Parse for S3EncryptionType { + fn parse(s: &str) -> Result { + match s { + "AES256" => Ok(Self::S3), + "aws:kms" => Ok(Self::SseKms), + "aws:kms:dsse" => Ok(Self::DsseKms), + "sse-c" => Ok(Self::SseC), + _ => Err(Error::InvalidEncryptionType { passed: s.into() }.into()), + } + } +} + +impl From<&S3EncryptionType> for &'static str { + fn from(value: &S3EncryptionType) -> Self { + match value { + S3EncryptionType::S3 => "AES256", + S3EncryptionType::SseKms => "aws:kms", + S3EncryptionType::DsseKms => "aws:kms:dsse", + S3EncryptionType::SseC => "sse-c", + } + } +} + +impl std::fmt::Display for S3EncryptionType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.into()) + } +} + +/// A sequence of headers to be sent for write requests that specify server-side +/// encryption. +/// +/// Whether these headers are sent depends on both the kind of encryption set +/// and the kind of request being made. +#[derive(Default, Clone, Debug)] +pub(super) struct S3EncryptionHeaders(pub HeaderMap); + +impl S3EncryptionHeaders { + fn try_new( + encryption_type: &S3EncryptionType, + encryption_kms_key_id: Option, + bucket_key_enabled: Option, + encryption_customer_key_base64: Option, + ) -> Result { + let mut headers = HeaderMap::new(); + match encryption_type { + S3EncryptionType::S3 | S3EncryptionType::SseKms | S3EncryptionType::DsseKms => { + headers.insert( + "x-amz-server-side-encryption", + HeaderValue::from_static(encryption_type.into()), + ); + if let Some(key_id) = encryption_kms_key_id { + headers.insert( + "x-amz-server-side-encryption-aws-kms-key-id", + key_id + .try_into() + .map_err(|err| Error::InvalidEncryptionHeader { + header: "kms-key-id", + source: Box::new(err), + })?, + ); + } + if let Some(bucket_key_enabled) = bucket_key_enabled { + headers.insert( + "x-amz-server-side-encryption-bucket-key-enabled", + HeaderValue::from_static(if bucket_key_enabled { "true" } else { "false" }), + ); + } + } + S3EncryptionType::SseC => { + headers.insert( + "x-amz-server-side-encryption-customer-algorithm", + HeaderValue::from_static("AES256"), + ); + if let Some(key) = encryption_customer_key_base64 { + let mut header_value: HeaderValue = + key.clone() + .try_into() + .map_err(|err| Error::InvalidEncryptionHeader { + header: "x-amz-server-side-encryption-customer-key", + source: Box::new(err), + })?; + header_value.set_sensitive(true); + headers.insert("x-amz-server-side-encryption-customer-key", header_value); + + let decoded_key = BASE64_STANDARD.decode(key.as_bytes()).map_err(|err| { + Error::InvalidEncryptionHeader { + header: "x-amz-server-side-encryption-customer-key", + source: Box::new(err), + } + })?; + let mut hasher = Md5::new(); + hasher.update(decoded_key); + let md5 = BASE64_STANDARD.encode(hasher.finalize()); + let mut md5_header_value: HeaderValue = + md5.try_into() + .map_err(|err| Error::InvalidEncryptionHeader { + header: "x-amz-server-side-encryption-customer-key-MD5", + source: Box::new(err), + })?; + md5_header_value.set_sensitive(true); + headers.insert( + "x-amz-server-side-encryption-customer-key-MD5", + md5_header_value, + ); + } else { + return Err(Error::InvalidEncryptionHeader { + header: "x-amz-server-side-encryption-customer-key", + source: Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Missing customer key", + )), + } + .into()); + } + } + } + Ok(Self(headers)) + } +} + +impl From for HeaderMap { + fn from(headers: S3EncryptionHeaders) -> Self { + headers.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn s3_test_config_from_map() { + let aws_access_key_id = "object_store:fake_access_key_id".to_string(); + let aws_secret_access_key = "object_store:fake_secret_key".to_string(); + let aws_default_region = "object_store:fake_default_region".to_string(); + let aws_endpoint = "object_store:fake_endpoint".to_string(); + let aws_session_token = "object_store:fake_session_token".to_string(); + let options = HashMap::from([ + ("aws_access_key_id", aws_access_key_id.clone()), + ("aws_secret_access_key", aws_secret_access_key), + ("aws_default_region", aws_default_region.clone()), + ("aws_endpoint", aws_endpoint.clone()), + ("aws_session_token", aws_session_token.clone()), + ("aws_unsigned_payload", "true".to_string()), + ("aws_checksum_algorithm", "sha256".to_string()), + ]); + + let builder = options + .into_iter() + .fold(AmazonS3Builder::new(), |builder, (key, value)| { + builder.with_config(key.parse().unwrap(), value) + }) + .with_config(AmazonS3ConfigKey::SecretAccessKey, "new-secret-key"); + + assert_eq!(builder.access_key_id.unwrap(), aws_access_key_id.as_str()); + assert_eq!(builder.secret_access_key.unwrap(), "new-secret-key"); + assert_eq!(builder.region.unwrap(), aws_default_region); + assert_eq!(builder.endpoint.unwrap(), aws_endpoint); + assert_eq!(builder.token.unwrap(), aws_session_token); + assert_eq!( + builder.checksum_algorithm.unwrap().get().unwrap(), + Checksum::SHA256 + ); + assert!(builder.unsigned_payload.get().unwrap()); + } + + #[test] + fn s3_test_config_get_value() { + let aws_access_key_id = "object_store:fake_access_key_id".to_string(); + let aws_secret_access_key = "object_store:fake_secret_key".to_string(); + let aws_default_region = "object_store:fake_default_region".to_string(); + let aws_endpoint = "object_store:fake_endpoint".to_string(); + let aws_session_token = "object_store:fake_session_token".to_string(); + + let builder = AmazonS3Builder::new() + .with_config(AmazonS3ConfigKey::AccessKeyId, &aws_access_key_id) + .with_config(AmazonS3ConfigKey::SecretAccessKey, &aws_secret_access_key) + .with_config(AmazonS3ConfigKey::DefaultRegion, &aws_default_region) + .with_config(AmazonS3ConfigKey::Endpoint, &aws_endpoint) + .with_config(AmazonS3ConfigKey::Token, &aws_session_token) + .with_config(AmazonS3ConfigKey::UnsignedPayload, "true") + .with_config("aws_server_side_encryption".parse().unwrap(), "AES256") + .with_config("aws_sse_kms_key_id".parse().unwrap(), "some_key_id") + .with_config("aws_sse_bucket_key_enabled".parse().unwrap(), "true") + .with_config( + "aws_sse_customer_key_base64".parse().unwrap(), + "some_customer_key", + ); + + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::AccessKeyId) + .unwrap(), + aws_access_key_id + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::SecretAccessKey) + .unwrap(), + aws_secret_access_key + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::DefaultRegion) + .unwrap(), + aws_default_region + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::Endpoint) + .unwrap(), + aws_endpoint + ); + assert_eq!( + builder.get_config_value(&AmazonS3ConfigKey::Token).unwrap(), + aws_session_token + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::UnsignedPayload) + .unwrap(), + "true" + ); + assert_eq!( + builder + .get_config_value(&"aws_server_side_encryption".parse().unwrap()) + .unwrap(), + "AES256" + ); + assert_eq!( + builder + .get_config_value(&"aws_sse_kms_key_id".parse().unwrap()) + .unwrap(), + "some_key_id" + ); + assert_eq!( + builder + .get_config_value(&"aws_sse_bucket_key_enabled".parse().unwrap()) + .unwrap(), + "true" + ); + assert_eq!( + builder + .get_config_value(&"aws_sse_customer_key_base64".parse().unwrap()) + .unwrap(), + "some_customer_key" + ); + } + + #[test] + fn s3_default_region() { + let builder = AmazonS3Builder::new() + .with_bucket_name("foo") + .build() + .unwrap(); + assert_eq!(builder.client.config.region, "us-east-1"); + } + + #[test] + fn s3_test_bucket_endpoint() { + let builder = AmazonS3Builder::new() + .with_endpoint("http://some.host:1234") + .with_bucket_name("foo") + .build() + .unwrap(); + assert_eq!( + builder.client.config.bucket_endpoint, + "http://some.host:1234/foo" + ); + + let builder = AmazonS3Builder::new() + .with_endpoint("http://some.host:1234/") + .with_bucket_name("foo") + .build() + .unwrap(); + assert_eq!( + builder.client.config.bucket_endpoint, + "http://some.host:1234/foo" + ); + } + + #[test] + fn s3_test_urls() { + let mut builder = AmazonS3Builder::new(); + builder.parse_url("s3://bucket/path").unwrap(); + assert_eq!(builder.bucket_name, Some("bucket".to_string())); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("s3://buckets.can.have.dots/path") + .unwrap(); + assert_eq!( + builder.bucket_name, + Some("buckets.can.have.dots".to_string()) + ); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("https://s3.region.amazonaws.com") + .unwrap(); + assert_eq!(builder.region, Some("region".to_string())); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("https://s3.region.amazonaws.com/bucket") + .unwrap(); + assert_eq!(builder.region, Some("region".to_string())); + assert_eq!(builder.bucket_name, Some("bucket".to_string())); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("https://s3.region.amazonaws.com/bucket.with.dot/path") + .unwrap(); + assert_eq!(builder.region, Some("region".to_string())); + assert_eq!(builder.bucket_name, Some("bucket.with.dot".to_string())); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("https://bucket.s3.region.amazonaws.com") + .unwrap(); + assert_eq!(builder.bucket_name, Some("bucket".to_string())); + assert_eq!(builder.region, Some("region".to_string())); + assert!(builder.virtual_hosted_style_request.get().unwrap()); + + let mut builder = AmazonS3Builder::new(); + builder + .parse_url("https://account123.r2.cloudflarestorage.com/bucket-123") + .unwrap(); + + assert_eq!(builder.bucket_name, Some("bucket-123".to_string())); + assert_eq!(builder.region, Some("auto".to_string())); + assert_eq!( + builder.endpoint, + Some("https://account123.r2.cloudflarestorage.com".to_string()) + ); + + let err_cases = [ + "mailto://bucket/path", + "https://s3.bucket.mydomain.com", + "https://s3.bucket.foo.amazonaws.com", + "https://bucket.mydomain.region.amazonaws.com", + "https://bucket.s3.region.bar.amazonaws.com", + "https://bucket.foo.s3.amazonaws.com", + ]; + let mut builder = AmazonS3Builder::new(); + for case in err_cases { + builder.parse_url(case).unwrap_err(); + } + } + + #[tokio::test] + async fn s3_test_proxy_url() { + let s3 = AmazonS3Builder::new() + .with_access_key_id("access_key_id") + .with_secret_access_key("secret_access_key") + .with_region("region") + .with_bucket_name("bucket_name") + .with_allow_http(true) + .with_proxy_url("https://example.com") + .build(); + + assert!(s3.is_ok()); + + let err = AmazonS3Builder::new() + .with_access_key_id("access_key_id") + .with_secret_access_key("secret_access_key") + .with_region("region") + .with_bucket_name("bucket_name") + .with_allow_http(true) + // use invalid url + .with_proxy_url("dxx:ddd\\example.com") + .build() + .unwrap_err() + .to_string(); + + assert_eq!("Generic HTTP client error: builder error", err); + } + + #[test] + fn test_invalid_config() { + let err = AmazonS3Builder::new() + .with_config(AmazonS3ConfigKey::ImdsV1Fallback, "enabled") + .with_bucket_name("bucket") + .with_region("region") + .build() + .unwrap_err() + .to_string(); + + assert_eq!( + err, + "Generic Config error: failed to parse \"enabled\" as boolean" + ); + + let err = AmazonS3Builder::new() + .with_config(AmazonS3ConfigKey::Checksum, "md5") + .with_bucket_name("bucket") + .with_region("region") + .build() + .unwrap_err() + .to_string(); + + assert_eq!( + err, + "Generic Config error: \"md5\" is not a valid checksum algorithm" + ); + } + + #[test] + fn test_parse_bucket_az() { + let cases = [ + ("bucket-base-name--usw2-az1--x-s3", Some("usw2-az1")), + ("bucket-base--name--azid--x-s3", Some("azid")), + ("bucket-base-name", None), + ("bucket-base-name--x-s3", None), + ]; + + for (bucket, expected) in cases { + assert_eq!(parse_bucket_az(bucket), expected) + } + } + + #[test] + fn aws_test_client_opts() { + let key = "AWS_PROXY_URL"; + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + assert_eq!( + AmazonS3ConfigKey::Client(ClientConfigKey::ProxyUrl), + config_key + ); + } else { + panic!("{key} not propagated as ClientConfigKey"); + } + } + + #[test] + fn test_builder_eks_with_config() { + let builder = AmazonS3Builder::new() + .with_bucket_name("some-bucket") + .with_config( + AmazonS3ConfigKey::ContainerCredentialsFullUri, + "https://127.0.0.1/eks-credentials", + ) + .with_config( + AmazonS3ConfigKey::ContainerAuthorizationTokenFile, + "/tmp/fake-bearer-token", + ); + + let s3 = builder.build().expect("should build successfully"); + let creds = &s3.client.config.credentials; + let debug_str = format!("{creds:?}"); + assert!( + debug_str.contains("EKSPodCredentialProvider"), + "expected EKS provider but got: {debug_str}" + ); + } + + #[test] + fn test_builder_web_identity_with_config() { + let builder = AmazonS3Builder::new() + .with_bucket_name("some-bucket") + .with_config( + AmazonS3ConfigKey::WebIdentityTokenFile, + "/tmp/fake-token-file", + ) + .with_config( + AmazonS3ConfigKey::RoleArn, + "arn:aws:iam::123456789012:role/test-role", + ) + .with_config(AmazonS3ConfigKey::RoleSessionName, "TestSession") + .with_config( + AmazonS3ConfigKey::StsEndpoint, + "https://sts.us-west-2.amazonaws.com", + ); + + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::WebIdentityTokenFile) + .unwrap(), + "/tmp/fake-token-file" + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::RoleArn) + .unwrap(), + "arn:aws:iam::123456789012:role/test-role" + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::RoleSessionName) + .unwrap(), + "TestSession" + ); + assert_eq!( + builder + .get_config_value(&AmazonS3ConfigKey::StsEndpoint) + .unwrap(), + "https://sts.us-west-2.amazonaws.com" + ); + + let s3 = builder.build().expect("should build successfully"); + let creds = &s3.client.config.credentials; + let debug_str = format!("{creds:?}"); + assert!( + debug_str.contains("TokenCredentialProvider"), + "expected TokenCredentialProvider but got: {debug_str}" + ); + } +} diff --git a/rust/object_store/src/aws/checksum.rs b/rust/object_store/src/aws/checksum.rs new file mode 100644 index 0000000000..d15bbf08df --- /dev/null +++ b/rust/object_store/src/aws/checksum.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::config::Parse; +use std::str::FromStr; + +#[allow(non_camel_case_types)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Enum representing checksum algorithm supported by S3. +pub enum Checksum { + /// SHA-256 algorithm. + SHA256, +} + +impl std::fmt::Display for Checksum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + Self::SHA256 => write!(f, "sha256"), + } + } +} + +impl FromStr for Checksum { + type Err = (); + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "sha256" => Ok(Self::SHA256), + _ => Err(()), + } + } +} + +impl TryFrom<&String> for Checksum { + type Error = (); + + fn try_from(value: &String) -> Result { + value.parse() + } +} + +impl Parse for Checksum { + fn parse(v: &str) -> crate::Result { + v.parse().map_err(|_| crate::Error::Generic { + store: "Config", + source: format!("\"{v}\" is not a valid checksum algorithm").into(), + }) + } +} diff --git a/rust/object_store/src/aws/client.rs b/rust/object_store/src/aws/client.rs new file mode 100644 index 0000000000..4edb977f05 --- /dev/null +++ b/rust/object_store/src/aws/client.rs @@ -0,0 +1,951 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::aws::builder::S3EncryptionHeaders; +use crate::aws::checksum::Checksum; +use crate::aws::credential::{AwsCredential, CredentialExt}; +use crate::aws::{ + AwsAuthorizer, AwsCredentialProvider, S3ConditionalPut, S3CopyIfNotExists, COPY_SOURCE_HEADER, + STORE, STRICT_PATH_ENCODE_SET, TAGS_HEADER, +}; +use crate::client::builder::{HttpRequestBuilder, RequestBuilderError}; +use crate::client::get::GetClient; +use crate::client::header::{get_etag, HeaderConfig}; +use crate::client::header::{get_put_result, get_version}; +use crate::client::list::ListClient; +use crate::client::retry::{RetryContext, RetryExt}; +use crate::client::s3::{ + CompleteMultipartUpload, CompleteMultipartUploadResult, CopyPartResult, + InitiateMultipartUploadResult, ListResponse, PartMetadata, +}; +use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse}; +use crate::list::{PaginatedListOptions, PaginatedListResult}; +use crate::multipart::PartId; +use crate::{ + Attribute, Attributes, ClientOptions, GetOptions, MultipartId, Path, PutMultipartOptions, + PutPayload, PutResult, Result, RetryConfig, TagSet, +}; +use async_trait::async_trait; +use base64::prelude::BASE64_STANDARD; +use base64::Engine; +use bytes::{Buf, Bytes}; +use http::header::{ + CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, + CONTENT_TYPE, +}; +use http::{HeaderMap, HeaderName, Method}; +use itertools::Itertools; +use md5::{Digest, Md5}; +use percent_encoding::{utf8_percent_encode, PercentEncode}; +use quick_xml::events::{self as xml_events}; +use ring::digest; +use ring::digest::Context; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +const VERSION_HEADER: &str = "x-amz-version-id"; +const SHA256_CHECKSUM: &str = "x-amz-checksum-sha256"; +const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-amz-meta-"; +const ALGORITHM: &str = "x-amz-checksum-algorithm"; +const STORAGE_CLASS: &str = "x-amz-storage-class"; + +/// A specialized `Error` for object store-related errors +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("Error performing DeleteObjects request: {}", source)] + DeleteObjectsRequest { + source: crate::client::retry::RetryError, + }, + + #[error( + "DeleteObjects request failed for key {}: {} (code: {})", + path, + message, + code + )] + DeleteFailed { + path: String, + code: String, + message: String, + }, + + #[error("Error getting DeleteObjects response body: {}", source)] + DeleteObjectsResponse { source: HttpError }, + + #[error("Got invalid DeleteObjects response: {}", source)] + InvalidDeleteObjectsResponse { + source: Box, + }, + + #[error("Error performing list request: {}", source)] + ListRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting list response body: {}", source)] + ListResponseBody { source: HttpError }, + + #[error("Error getting create multipart response body: {}", source)] + CreateMultipartResponseBody { source: HttpError }, + + #[error("Error performing complete multipart request: {}: {}", path, source)] + CompleteMultipartRequest { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error getting complete multipart response body: {}", source)] + CompleteMultipartResponseBody { source: HttpError }, + + #[error("Got invalid list response: {}", source)] + InvalidListResponse { source: quick_xml::de::DeError }, + + #[error("Got invalid multipart response: {}", source)] + InvalidMultipartResponse { source: quick_xml::de::DeError }, + + #[error("Unable to extract metadata from headers: {}", source)] + Metadata { + source: crate::client::header::Error, + }, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + match err { + Error::CompleteMultipartRequest { source, path } => source.error(STORE, path), + _ => Self::Generic { + store: STORE, + source: Box::new(err), + }, + } + } +} + +pub(crate) enum PutPartPayload<'a> { + Part(PutPayload), + Copy(&'a Path), +} + +impl Default for PutPartPayload<'_> { + fn default() -> Self { + Self::Part(PutPayload::default()) + } +} + +pub(crate) enum CompleteMultipartMode { + Overwrite, + Create, +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase", rename = "DeleteResult")] +struct BatchDeleteResponse { + #[serde(rename = "$value")] + content: Vec, +} + +#[derive(Deserialize)] +enum DeleteObjectResult { + #[allow(unused)] + Deleted(DeletedObject), + Error(DeleteError), +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase", rename = "Deleted")] +struct DeletedObject { + #[allow(dead_code)] + key: String, +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase", rename = "Error")] +struct DeleteError { + key: String, + code: String, + message: String, +} + +impl From for Error { + fn from(err: DeleteError) -> Self { + Self::DeleteFailed { + path: err.key, + code: err.code, + message: err.message, + } + } +} + +#[derive(Debug)] +pub(crate) struct S3Config { + pub region: String, + pub endpoint: Option, + pub bucket: String, + pub bucket_endpoint: String, + pub credentials: AwsCredentialProvider, + pub session_provider: Option, + pub retry_config: RetryConfig, + pub client_options: ClientOptions, + pub sign_payload: bool, + pub skip_signature: bool, + pub disable_tagging: bool, + pub checksum: Option, + pub copy_if_not_exists: Option, + pub conditional_put: S3ConditionalPut, + pub request_payer: bool, + pub(super) encryption_headers: S3EncryptionHeaders, +} + +impl S3Config { + pub(crate) fn path_url(&self, path: &Path) -> String { + format!("{}/{}", self.bucket_endpoint, encode_path(path)) + } + + async fn get_session_credential(&self) -> Result> { + let credential = match self.skip_signature { + false => { + let provider = self.session_provider.as_ref().unwrap_or(&self.credentials); + Some(provider.get_credential().await?) + } + true => None, + }; + + Ok(SessionCredential { + credential, + session_token: self.session_provider.is_some(), + config: self, + }) + } + + pub(crate) async fn get_credential(&self) -> Result>> { + Ok(match self.skip_signature { + false => Some(self.credentials.get_credential().await?), + true => None, + }) + } + + #[inline] + pub(crate) fn is_s3_express(&self) -> bool { + self.session_provider.is_some() + } +} + +struct SessionCredential<'a> { + credential: Option>, + session_token: bool, + config: &'a S3Config, +} + +impl SessionCredential<'_> { + fn authorizer(&self) -> Option> { + let mut authorizer = + AwsAuthorizer::new(self.credential.as_deref()?, "s3", &self.config.region) + .with_sign_payload(self.config.sign_payload) + .with_request_payer(self.config.request_payer); + + if self.session_token { + let token = HeaderName::from_static("x-amz-s3session-token"); + authorizer = authorizer.with_token_header(token) + } + + Some(authorizer) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum RequestError { + #[error(transparent)] + Generic { + #[from] + source: crate::Error, + }, + + #[error("Retry")] + Retry { + source: crate::client::retry::RetryError, + path: String, + }, +} + +impl From for crate::Error { + fn from(value: RequestError) -> Self { + match value { + RequestError::Generic { source } => source, + RequestError::Retry { source, path } => source.error(STORE, path), + } + } +} + +/// A builder for a request allowing customisation of the headers and query string +pub(crate) struct Request<'a> { + path: &'a Path, + config: &'a S3Config, + builder: HttpRequestBuilder, + payload_sha256: Option, + payload: Option, + use_session_creds: bool, + idempotent: bool, + retry_on_conflict: bool, + retry_error_body: bool, +} + +impl Request<'_> { + pub(crate) fn query(self, query: &T) -> Self { + let builder = self.builder.query(query); + Self { builder, ..self } + } + + pub(crate) fn header(self, k: K, v: &str) -> Self + where + K: TryInto, + K::Error: Into, + { + let builder = self.builder.header(k, v); + Self { builder, ..self } + } + + pub(crate) fn headers(self, headers: HeaderMap) -> Self { + let builder = self.builder.headers(headers); + Self { builder, ..self } + } + + pub(crate) fn idempotent(self, idempotent: bool) -> Self { + Self { idempotent, ..self } + } + + pub(crate) fn retry_on_conflict(self, retry_on_conflict: bool) -> Self { + Self { + retry_on_conflict, + ..self + } + } + + pub(crate) fn retry_error_body(self, retry_error_body: bool) -> Self { + Self { + retry_error_body, + ..self + } + } + + pub(crate) fn with_encryption_headers(self) -> Self { + let headers = self.config.encryption_headers.clone().into(); + let builder = self.builder.headers(headers); + Self { builder, ..self } + } + + pub(crate) fn with_session_creds(self, use_session_creds: bool) -> Self { + Self { + use_session_creds, + ..self + } + } + + pub(crate) fn with_tags(mut self, tags: TagSet) -> Self { + let tags = tags.encoded(); + if !tags.is_empty() && !self.config.disable_tagging { + self.builder = self.builder.header(&TAGS_HEADER, tags); + } + self + } + + pub(crate) fn with_attributes(self, attributes: Attributes) -> Self { + let mut has_content_type = false; + let mut builder = self.builder; + for (k, v) in &attributes { + builder = match k { + Attribute::CacheControl => builder.header(CACHE_CONTROL, v.as_ref()), + Attribute::ContentDisposition => builder.header(CONTENT_DISPOSITION, v.as_ref()), + Attribute::ContentEncoding => builder.header(CONTENT_ENCODING, v.as_ref()), + Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE, v.as_ref()), + Attribute::ContentType => { + has_content_type = true; + builder.header(CONTENT_TYPE, v.as_ref()) + } + Attribute::StorageClass => builder.header(STORAGE_CLASS, v.as_ref()), + Attribute::Metadata(k_suffix) => builder.header( + &format!("{USER_DEFINED_METADATA_HEADER_PREFIX}{k_suffix}"), + v.as_ref(), + ), + }; + } + + if !has_content_type { + if let Some(value) = self.config.client_options.get_content_type(self.path) { + builder = builder.header(CONTENT_TYPE, value); + } + } + Self { builder, ..self } + } + + pub(crate) fn with_extensions(self, extensions: ::http::Extensions) -> Self { + let builder = self.builder.extensions(extensions); + Self { builder, ..self } + } + + pub(crate) fn with_payload(mut self, payload: PutPayload) -> Self { + if (!self.config.skip_signature && self.config.sign_payload) + || self.config.checksum.is_some() + { + let mut sha256 = Context::new(&digest::SHA256); + payload.iter().for_each(|x| sha256.update(x)); + let payload_sha256 = sha256.finish(); + + if let Some(Checksum::SHA256) = self.config.checksum { + self.builder = self + .builder + .header(SHA256_CHECKSUM, BASE64_STANDARD.encode(payload_sha256)); + } + self.payload_sha256 = Some(payload_sha256); + } + + let content_length = payload.content_length(); + self.builder = self.builder.header(CONTENT_LENGTH, content_length); + self.payload = Some(payload); + self + } + + pub(crate) async fn send(self) -> Result { + let credential = match self.use_session_creds { + true => self.config.get_session_credential().await?, + false => SessionCredential { + credential: self.config.get_credential().await?, + session_token: false, + config: self.config, + }, + }; + + let sha = self.payload_sha256.as_ref().map(|x| x.as_ref()); + + let path = self.path.as_ref(); + self.builder + .with_aws_sigv4(credential.authorizer(), sha) + .retryable(&self.config.retry_config) + .retry_on_conflict(self.retry_on_conflict) + .idempotent(self.idempotent) + .retry_error_body(self.retry_error_body) + .payload(self.payload) + .send() + .await + .map_err(|source| { + let path = path.into(); + RequestError::Retry { source, path } + }) + } + + pub(crate) async fn do_put(self) -> Result { + let response = self.send().await?; + Ok(get_put_result(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?) + } +} + +#[derive(Debug)] +pub(crate) struct S3Client { + pub config: S3Config, + pub client: HttpClient, +} + +impl S3Client { + pub(crate) fn new(config: S3Config, client: HttpClient) -> Self { + Self { config, client } + } + + pub(crate) fn request<'a>(&'a self, method: Method, path: &'a Path) -> Request<'a> { + let url = self.config.path_url(path); + Request { + path, + builder: self.client.request(method, url), + payload: None, + payload_sha256: None, + config: &self.config, + use_session_creds: true, + idempotent: false, + retry_on_conflict: false, + retry_error_body: false, + } + } + + /// Make an S3 Delete Objects request + /// + /// Produces a vector of results, one for each path in the input vector. If + /// the delete was successful, the path is returned in the `Ok` variant. If + /// there was an error for a certain path, the error will be returned in the + /// vector. If there was an issue with making the overall request, an error + /// will be returned at the top level. + pub(crate) async fn bulk_delete_request(&self, paths: Vec) -> Result>> { + if paths.is_empty() { + return Ok(Vec::new()); + } + + let credential = self.config.get_session_credential().await?; + let url = format!("{}?delete", self.config.bucket_endpoint); + + let mut buffer = Vec::new(); + let mut writer = quick_xml::Writer::new(&mut buffer); + writer + .write_event(xml_events::Event::Start( + xml_events::BytesStart::new("Delete") + .with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]), + )) + .unwrap(); + for path in &paths { + // {path} + writer + .write_event(xml_events::Event::Start(xml_events::BytesStart::new( + "Object", + ))) + .unwrap(); + writer + .write_event(xml_events::Event::Start(xml_events::BytesStart::new("Key"))) + .unwrap(); + writer + .write_event(xml_events::Event::Text(xml_events::BytesText::new( + path.as_ref(), + ))) + .map_err(|err| crate::Error::Generic { + store: STORE, + source: Box::new(err), + })?; + writer + .write_event(xml_events::Event::End(xml_events::BytesEnd::new("Key"))) + .unwrap(); + writer + .write_event(xml_events::Event::End(xml_events::BytesEnd::new("Object"))) + .unwrap(); + } + writer + .write_event(xml_events::Event::End(xml_events::BytesEnd::new("Delete"))) + .unwrap(); + + let body = Bytes::from(buffer); + + let mut builder = self.client.request(Method::POST, url); + + let digest = digest::digest(&digest::SHA256, &body); + builder = builder.header(SHA256_CHECKSUM, BASE64_STANDARD.encode(digest)); + + // S3 *requires* DeleteObjects to include a Content-MD5 header: + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html + // > "The Content-MD5 request header is required for all Multi-Object Delete requests" + // Some platforms, like MinIO, enforce this requirement and fail requests without the header. + let mut hasher = Md5::new(); + hasher.update(&body); + builder = builder.header("Content-MD5", BASE64_STANDARD.encode(hasher.finalize())); + + let response = builder + .header(CONTENT_TYPE, "application/xml") + .body(body) + .with_aws_sigv4(credential.authorizer(), Some(digest.as_ref())) + .send_retry(&self.config.retry_config) + .await + .map_err(|source| Error::DeleteObjectsRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::DeleteObjectsResponse { source })?; + + let response: BatchDeleteResponse = + quick_xml::de::from_reader(response.reader()).map_err(|err| { + Error::InvalidDeleteObjectsResponse { + source: Box::new(err), + } + })?; + + // Assume all were ok, then fill in errors. This guarantees output order + // matches input order. + let mut results: Vec> = paths.iter().cloned().map(Ok).collect(); + for content in response.content.into_iter() { + if let DeleteObjectResult::Error(error) = content { + let path = + Path::parse(&error.key).map_err(|err| Error::InvalidDeleteObjectsResponse { + source: Box::new(err), + })?; + let i = paths.iter().find_position(|&p| p == &path).unwrap().0; + results[i] = Err(Error::from(error).into()); + } + } + + Ok(results) + } + + /// Make an S3 Copy request + pub(crate) fn copy_request<'a>(&'a self, from: &Path, to: &'a Path) -> Request<'a> { + let source = format!("{}/{}", self.config.bucket, encode_path(from)); + + let mut copy_source_encryption_headers = HeaderMap::new(); + if let Some(customer_algorithm) = self + .config + .encryption_headers + .0 + .get("x-amz-server-side-encryption-customer-algorithm") + { + copy_source_encryption_headers.insert( + "x-amz-copy-source-server-side-encryption-customer-algorithm", + customer_algorithm.clone(), + ); + } + if let Some(customer_key) = self + .config + .encryption_headers + .0 + .get("x-amz-server-side-encryption-customer-key") + { + copy_source_encryption_headers.insert( + "x-amz-copy-source-server-side-encryption-customer-key", + customer_key.clone(), + ); + } + if let Some(customer_key_md5) = self + .config + .encryption_headers + .0 + .get("x-amz-server-side-encryption-customer-key-MD5") + { + copy_source_encryption_headers.insert( + "x-amz-copy-source-server-side-encryption-customer-key-MD5", + customer_key_md5.clone(), + ); + } + + self.request(Method::PUT, to) + .idempotent(true) + .retry_error_body(true) + .header(©_SOURCE_HEADER, &source) + .headers(self.config.encryption_headers.clone().into()) + .headers(copy_source_encryption_headers) + .with_session_creds(false) + } + + pub(crate) async fn create_multipart( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result { + let PutMultipartOptions { + tags, + attributes, + extensions, + } = opts; + + let mut request = self.request(Method::POST, location); + if let Some(algorithm) = self.config.checksum { + match algorithm { + Checksum::SHA256 => { + request = request.header(ALGORITHM, "SHA256"); + } + } + } + let response = request + .query(&[("uploads", "")]) + .with_encryption_headers() + .with_attributes(attributes) + .with_tags(tags) + .with_extensions(extensions) + .idempotent(true) + .send() + .await? + .into_body() + .bytes() + .await + .map_err(|source| Error::CreateMultipartResponseBody { source })?; + + let response: InitiateMultipartUploadResult = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidMultipartResponse { source })?; + + Ok(response.upload_id) + } + + pub(crate) async fn put_part( + &self, + path: &Path, + upload_id: &MultipartId, + part_idx: usize, + data: PutPartPayload<'_>, + ) -> Result { + let is_copy = matches!(data, PutPartPayload::Copy(_)); + let part = (part_idx + 1).to_string(); + + let mut request = self + .request(Method::PUT, path) + .query(&[("partNumber", &part), ("uploadId", upload_id)]) + .idempotent(true); + + request = match data { + PutPartPayload::Part(payload) => request.with_payload(payload), + PutPartPayload::Copy(path) => request.header( + "x-amz-copy-source", + &format!("{}/{}", self.config.bucket, encode_path(path)), + ), + }; + + if self + .config + .encryption_headers + .0 + .contains_key("x-amz-server-side-encryption-customer-algorithm") + { + // If SSE-C is used, we must include the encryption headers in every upload request. + request = request.with_encryption_headers(); + } + let (parts, body) = request.send().await?.into_parts(); + let checksum_sha256 = parts + .headers + .get(SHA256_CHECKSUM) + .and_then(|v| v.to_str().ok()) + .map(|v| v.to_string()); + + let e_tag = match is_copy { + false => get_etag(&parts.headers).map_err(|source| Error::Metadata { source })?, + true => { + let response = body + .bytes() + .await + .map_err(|source| Error::CreateMultipartResponseBody { source })?; + let response: CopyPartResult = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidMultipartResponse { source })?; + response.e_tag + } + }; + + let content_id = if self.config.checksum == Some(Checksum::SHA256) { + let meta = PartMetadata { + e_tag, + checksum_sha256, + }; + quick_xml::se::to_string(&meta).unwrap() + } else { + e_tag + }; + + Ok(PartId { content_id }) + } + + pub(crate) async fn abort_multipart(&self, location: &Path, upload_id: &str) -> Result<()> { + self.request(Method::DELETE, location) + .query(&[("uploadId", upload_id)]) + .with_encryption_headers() + .send() + .await?; + + Ok(()) + } + + pub(crate) async fn complete_multipart( + &self, + location: &Path, + upload_id: &str, + parts: Vec, + mode: CompleteMultipartMode, + ) -> Result { + let parts = if parts.is_empty() { + // If no parts were uploaded, upload an empty part + // otherwise the completion request will fail + let part = self + .put_part( + location, + &upload_id.to_string(), + 0, + PutPartPayload::default(), + ) + .await?; + vec![part] + } else { + parts + }; + let request = CompleteMultipartUpload::from(parts); + let body = quick_xml::se::to_string(&request).unwrap(); + + let credential = self.config.get_session_credential().await?; + let url = self.config.path_url(location); + + let request = self + .client + .post(url) + .query(&[("uploadId", upload_id)]) + .body(body) + .with_aws_sigv4(credential.authorizer(), None); + + let request = match mode { + CompleteMultipartMode::Overwrite => request, + CompleteMultipartMode::Create => request.header("If-None-Match", "*"), + }; + + let response = request + .retryable(&self.config.retry_config) + .idempotent(true) + .retry_error_body(true) + .send() + .await + .map_err(|source| Error::CompleteMultipartRequest { + source, + path: location.as_ref().to_string(), + })?; + + let version = get_version(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?; + + let data = response + .into_body() + .bytes() + .await + .map_err(|source| Error::CompleteMultipartResponseBody { source })?; + + let response: CompleteMultipartUploadResult = quick_xml::de::from_reader(data.reader()) + .map_err(|source| Error::InvalidMultipartResponse { source })?; + + Ok(PutResult { + e_tag: Some(response.e_tag), + version, + }) + } + + #[cfg(test)] + pub(crate) async fn get_object_tagging(&self, path: &Path) -> Result { + let credential = self.config.get_session_credential().await?; + let url = format!("{}?tagging", self.config.path_url(path)); + let response = self + .client + .request(Method::GET, url) + .with_aws_sigv4(credential.authorizer(), None) + .send_retry(&self.config.retry_config) + .await + .map_err(|e| e.error(STORE, path.to_string()))?; + Ok(response) + } +} + +#[async_trait] +impl GetClient for S3Client { + const STORE: &'static str = STORE; + + const HEADER_CONFIG: HeaderConfig = HeaderConfig { + etag_required: false, + last_modified_required: false, + version_header: Some(VERSION_HEADER), + user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), + }; + + fn retry_config(&self) -> &RetryConfig { + &self.config.retry_config + } + + /// Make an S3 GET request + async fn get_request( + &self, + ctx: &mut RetryContext, + path: &Path, + options: GetOptions, + ) -> Result { + let credential = self.config.get_session_credential().await?; + let url = self.config.path_url(path); + let method = match options.head { + true => Method::HEAD, + false => Method::GET, + }; + + let mut builder = self.client.request(method, url); + if self + .config + .encryption_headers + .0 + .contains_key("x-amz-server-side-encryption-customer-algorithm") + { + builder = builder.headers(self.config.encryption_headers.clone().into()); + } + + if let Some(v) = &options.version { + builder = builder.query(&[("versionId", v)]) + } + + let response = builder + .with_get_options(options) + .with_aws_sigv4(credential.authorizer(), None) + .retryable_request() + .send(ctx) + .await + .map_err(|e| e.error(STORE, path.to_string()))?; + + Ok(response) + } +} + +#[async_trait] +impl ListClient for Arc { + /// Make an S3 List request + async fn list_request( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + let credential = self.config.get_session_credential().await?; + let url = self.config.bucket_endpoint.clone(); + + let mut query = Vec::with_capacity(4); + + if let Some(token) = &opts.page_token { + query.push(("continuation-token", token.as_ref())) + } + + if let Some(d) = &opts.delimiter { + query.push(("delimiter", d.as_ref())) + } + + query.push(("list-type", "2")); + + if let Some(prefix) = prefix { + query.push(("prefix", prefix)) + } + + if let Some(offset) = &opts.offset { + query.push(("start-after", offset.as_ref())) + } + + let max_keys_str; + if let Some(max_keys) = &opts.max_keys { + max_keys_str = max_keys.to_string(); + query.push(("max-keys", max_keys_str.as_ref())) + } + + let response = self + .client + .request(Method::GET, &url) + .extensions(opts.extensions) + .query(&query) + .with_aws_sigv4(credential.authorizer(), None) + .send_retry(&self.config.retry_config) + .await + .map_err(|source| Error::ListRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::ListResponseBody { source })?; + + let mut response: ListResponse = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidListResponse { source })?; + + let token = response.next_continuation_token.take(); + + Ok(PaginatedListResult { + result: response.try_into()?, + page_token: token, + }) + } +} + +fn encode_path(path: &Path) -> PercentEncode<'_> { + utf8_percent_encode(path.as_ref(), &STRICT_PATH_ENCODE_SET) +} diff --git a/rust/object_store/src/aws/credential.rs b/rust/object_store/src/aws/credential.rs new file mode 100644 index 0000000000..7e2681d4ca --- /dev/null +++ b/rust/object_store/src/aws/credential.rs @@ -0,0 +1,1296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::aws::{AwsCredentialProvider, STORE, STRICT_ENCODE_SET, STRICT_PATH_ENCODE_SET}; +use crate::client::builder::HttpRequestBuilder; +use crate::client::retry::RetryExt; +use crate::client::token::{TemporaryToken, TokenCache}; +use crate::client::{HttpClient, HttpError, HttpRequest, TokenProvider}; +use crate::util::{hex_digest, hex_encode, hmac_sha256}; +use crate::{CredentialProvider, Result, RetryConfig}; +use async_trait::async_trait; +use bytes::Buf; +use chrono::{DateTime, Utc}; +use http::header::{HeaderMap, HeaderName, HeaderValue, AUTHORIZATION}; +use http::{Method, StatusCode}; +use percent_encoding::utf8_percent_encode; +use serde::Deserialize; +use std::collections::BTreeMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tracing::warn; +use url::Url; + +#[derive(Debug, thiserror::Error)] +#[allow(clippy::enum_variant_names)] +enum Error { + #[error("Error performing CreateSession request: {source}")] + CreateSessionRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting CreateSession response: {source}")] + CreateSessionResponse { source: HttpError }, + + #[error("Invalid CreateSessionOutput response: {source}")] + CreateSessionOutput { source: quick_xml::DeError }, +} + +impl From for crate::Error { + fn from(value: Error) -> Self { + Self::Generic { + store: STORE, + source: Box::new(value), + } + } +} + +type StdError = Box; + +/// SHA256 hash of empty string +static EMPTY_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; +static UNSIGNED_PAYLOAD: &str = "UNSIGNED-PAYLOAD"; +static STREAMING_PAYLOAD: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD"; + +/// A set of AWS security credentials +#[derive(Eq, PartialEq)] +pub struct AwsCredential { + /// AWS_ACCESS_KEY_ID + pub key_id: String, + /// AWS_SECRET_ACCESS_KEY + pub secret_key: String, + /// AWS_SESSION_TOKEN + pub token: Option, +} + +impl std::fmt::Debug for AwsCredential { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AwsCredential") + .field("key_id", &self.key_id) + .field("secret_key", &"******") + .field("token", &self.token.as_ref().map(|_| "******")) + .finish() + } +} + +impl AwsCredential { + /// Signs a string + /// + /// + fn sign(&self, to_sign: &str, date: DateTime, region: &str, service: &str) -> String { + let date_string = date.format("%Y%m%d").to_string(); + let date_hmac = hmac_sha256(format!("AWS4{}", self.secret_key), date_string); + let region_hmac = hmac_sha256(date_hmac, region); + let service_hmac = hmac_sha256(region_hmac, service); + let signing_hmac = hmac_sha256(service_hmac, b"aws4_request"); + hex_encode(hmac_sha256(signing_hmac, to_sign).as_ref()) + } +} + +/// Authorize a [`HttpRequest`] with an [`AwsCredential`] using [AWS SigV4] +/// +/// [AWS SigV4]: https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html +#[derive(Debug)] +pub struct AwsAuthorizer<'a> { + date: Option>, + credential: &'a AwsCredential, + service: &'a str, + region: &'a str, + token_header: Option, + sign_payload: bool, + request_payer: bool, +} + +static DATE_HEADER: HeaderName = HeaderName::from_static("x-amz-date"); +static HASH_HEADER: HeaderName = HeaderName::from_static("x-amz-content-sha256"); +static TOKEN_HEADER: HeaderName = HeaderName::from_static("x-amz-security-token"); +static REQUEST_PAYER_HEADER: HeaderName = HeaderName::from_static("x-amz-request-payer"); +static REQUEST_PAYER_HEADER_VALUE: HeaderValue = HeaderValue::from_static("requester"); +const ALGORITHM: &str = "AWS4-HMAC-SHA256"; + +impl<'a> AwsAuthorizer<'a> { + /// Create a new [`AwsAuthorizer`] + pub fn new(credential: &'a AwsCredential, service: &'a str, region: &'a str) -> Self { + Self { + credential, + service, + region, + date: None, + sign_payload: true, + token_header: None, + request_payer: false, + } + } + + /// Controls whether this [`AwsAuthorizer`] will attempt to sign the request payload, + /// the default is `true` + pub fn with_sign_payload(mut self, signed: bool) -> Self { + self.sign_payload = signed; + self + } + + /// Overrides the header name for security tokens, defaults to `x-amz-security-token` + pub(crate) fn with_token_header(mut self, header: HeaderName) -> Self { + self.token_header = Some(header); + self + } + + /// Set whether to include requester pays headers + /// + /// + pub fn with_request_payer(mut self, request_payer: bool) -> Self { + self.request_payer = request_payer; + self + } + + /// Authorize `request` with an optional pre-calculated SHA256 digest by attaching + /// the relevant [AWS SigV4] headers + /// + /// # Payload Signature + /// + /// AWS SigV4 requests must contain the `x-amz-content-sha256` header, it is set as follows: + /// + /// * If not configured to sign payloads, it is set to `UNSIGNED-PAYLOAD` + /// * If a `pre_calculated_digest` is provided, it is set to the hex encoding of it + /// * If it is a streaming request, it is set to `STREAMING-AWS4-HMAC-SHA256-PAYLOAD` + /// * Otherwise it is set to the hex encoded SHA256 of the request body + /// + /// [AWS SigV4]: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html + pub fn authorize(&self, request: &mut HttpRequest, pre_calculated_digest: Option<&[u8]>) { + let url = Url::parse(&request.uri().to_string()).unwrap(); + + if let Some(ref token) = self.credential.token { + let token_val = HeaderValue::from_str(token).unwrap(); + let header = self.token_header.as_ref().unwrap_or(&TOKEN_HEADER); + request.headers_mut().insert(header, token_val); + } + + let host = &url[url::Position::BeforeHost..url::Position::AfterPort]; + let host_val = HeaderValue::from_str(host).unwrap(); + request.headers_mut().insert("host", host_val); + + let date = self.date.unwrap_or_else(Utc::now); + let date_str = date.format("%Y%m%dT%H%M%SZ").to_string(); + let date_val = HeaderValue::from_str(&date_str).unwrap(); + request.headers_mut().insert(&DATE_HEADER, date_val); + + let digest = match self.sign_payload { + false => UNSIGNED_PAYLOAD.to_string(), + true => match pre_calculated_digest { + Some(digest) => hex_encode(digest), + None => match request.body().is_empty() { + true => EMPTY_SHA256_HASH.to_string(), + false => match request.body().as_bytes() { + Some(bytes) => hex_digest(bytes), + None => STREAMING_PAYLOAD.to_string(), + }, + }, + }, + }; + + let header_digest = HeaderValue::from_str(&digest).unwrap(); + request.headers_mut().insert(&HASH_HEADER, header_digest); + + if self.request_payer { + // For DELETE, GET, HEAD, POST, and PUT requests, include x-amz-request-payer : + // requester in the header + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html + request + .headers_mut() + .insert(&REQUEST_PAYER_HEADER, REQUEST_PAYER_HEADER_VALUE.clone()); + } + + let (signed_headers, canonical_headers) = canonicalize_headers(request.headers()); + + let scope = self.scope(date); + + let string_to_sign = self.string_to_sign( + date, + &scope, + request.method(), + &url, + &canonical_headers, + &signed_headers, + &digest, + ); + + // sign the string + let signature = self + .credential + .sign(&string_to_sign, date, self.region, self.service); + + // build the actual auth header + let authorisation = format!( + "{} Credential={}/{}, SignedHeaders={}, Signature={}", + ALGORITHM, self.credential.key_id, scope, signed_headers, signature + ); + + let authorization_val = HeaderValue::from_str(&authorisation).unwrap(); + request + .headers_mut() + .insert(&AUTHORIZATION, authorization_val); + } + + pub(crate) fn sign(&self, method: Method, url: &mut Url, expires_in: Duration) { + let date = self.date.unwrap_or_else(Utc::now); + let scope = self.scope(date); + + // https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html + url.query_pairs_mut() + .append_pair("X-Amz-Algorithm", ALGORITHM) + .append_pair( + "X-Amz-Credential", + &format!("{}/{}", self.credential.key_id, scope), + ) + .append_pair("X-Amz-Date", &date.format("%Y%m%dT%H%M%SZ").to_string()) + .append_pair("X-Amz-Expires", &expires_in.as_secs().to_string()) + .append_pair("X-Amz-SignedHeaders", "host"); + + if self.request_payer { + // For signed URLs, include x-amz-request-payer=requester in the request + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/ObjectsinRequesterPaysBuckets.html + url.query_pairs_mut() + .append_pair("x-amz-request-payer", "requester"); + } + + // For S3, you must include the X-Amz-Security-Token query parameter in the URL if + // using credentials sourced from the STS service. + if let Some(ref token) = self.credential.token { + url.query_pairs_mut() + .append_pair("X-Amz-Security-Token", token); + } + + // We don't have a payload; the user is going to send the payload directly themselves. + let digest = UNSIGNED_PAYLOAD; + + let host = &url[url::Position::BeforeHost..url::Position::AfterPort].to_string(); + let mut headers = HeaderMap::new(); + let host_val = HeaderValue::from_str(host).unwrap(); + headers.insert("host", host_val); + + let (signed_headers, canonical_headers) = canonicalize_headers(&headers); + + let string_to_sign = self.string_to_sign( + date, + &scope, + &method, + url, + &canonical_headers, + &signed_headers, + digest, + ); + + let signature = self + .credential + .sign(&string_to_sign, date, self.region, self.service); + + url.query_pairs_mut() + .append_pair("X-Amz-Signature", &signature); + } + + #[allow(clippy::too_many_arguments)] + fn string_to_sign( + &self, + date: DateTime, + scope: &str, + request_method: &Method, + url: &Url, + canonical_headers: &str, + signed_headers: &str, + digest: &str, + ) -> String { + // Each path segment must be URI-encoded twice (except for Amazon S3 which only gets + // URI-encoded once). + // see https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + let canonical_uri = match self.service { + "s3" => url.path().to_string(), + _ => utf8_percent_encode(url.path(), &STRICT_PATH_ENCODE_SET).to_string(), + }; + + let canonical_query = canonicalize_query(url); + + // https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + let canonical_request = format!( + "{}\n{}\n{}\n{}\n{}\n{}", + request_method.as_str(), + canonical_uri, + canonical_query, + canonical_headers, + signed_headers, + digest + ); + + let hashed_canonical_request = hex_digest(canonical_request.as_bytes()); + + format!( + "{}\n{}\n{}\n{}", + ALGORITHM, + date.format("%Y%m%dT%H%M%SZ"), + scope, + hashed_canonical_request + ) + } + + fn scope(&self, date: DateTime) -> String { + format!( + "{}/{}/{}/aws4_request", + date.format("%Y%m%d"), + self.region, + self.service + ) + } +} + +pub(crate) trait CredentialExt { + /// Sign a request + fn with_aws_sigv4( + self, + authorizer: Option>, + payload_sha256: Option<&[u8]>, + ) -> Self; +} + +impl CredentialExt for HttpRequestBuilder { + fn with_aws_sigv4( + self, + authorizer: Option>, + payload_sha256: Option<&[u8]>, + ) -> Self { + match authorizer { + Some(authorizer) => { + let (client, request) = self.into_parts(); + let mut request = request.expect("request valid"); + authorizer.authorize(&mut request, payload_sha256); + + Self::from_parts(client, request) + } + None => self, + } + } +} + +/// Canonicalizes query parameters into the AWS canonical form +/// +/// +fn canonicalize_query(url: &Url) -> String { + use std::fmt::Write; + + let capacity = match url.query() { + Some(q) if !q.is_empty() => q.len(), + _ => return String::new(), + }; + let mut encoded = String::with_capacity(capacity + 1); + + let mut headers = url.query_pairs().collect::>(); + headers.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut first = true; + for (k, v) in headers { + if !first { + encoded.push('&'); + } + first = false; + let _ = write!( + encoded, + "{}={}", + utf8_percent_encode(k.as_ref(), &STRICT_ENCODE_SET), + utf8_percent_encode(v.as_ref(), &STRICT_ENCODE_SET) + ); + } + encoded +} + +/// Canonicalizes headers into the AWS Canonical Form. +/// +/// +fn canonicalize_headers(header_map: &HeaderMap) -> (String, String) { + let mut headers = BTreeMap::<&str, Vec<&str>>::new(); + let mut value_count = 0; + let mut value_bytes = 0; + let mut key_bytes = 0; + + for (key, value) in header_map { + let key = key.as_str(); + if ["authorization", "content-length", "user-agent"].contains(&key) { + continue; + } + + let value = std::str::from_utf8(value.as_bytes()).unwrap(); + key_bytes += key.len(); + value_bytes += value.len(); + value_count += 1; + headers.entry(key).or_default().push(value); + } + + let mut signed_headers = String::with_capacity(key_bytes + headers.len()); + let mut canonical_headers = + String::with_capacity(key_bytes + value_bytes + headers.len() + value_count); + + for (header_idx, (name, values)) in headers.into_iter().enumerate() { + if header_idx != 0 { + signed_headers.push(';'); + } + + signed_headers.push_str(name); + canonical_headers.push_str(name); + canonical_headers.push(':'); + for (value_idx, value) in values.into_iter().enumerate() { + if value_idx != 0 { + canonical_headers.push(','); + } + canonical_headers.push_str(value.trim()); + } + canonical_headers.push('\n'); + } + + (signed_headers, canonical_headers) +} + +/// Credentials sourced from the instance metadata service +/// +/// +#[derive(Debug)] +pub(crate) struct InstanceCredentialProvider { + pub imdsv1_fallback: bool, + pub metadata_endpoint: String, +} + +#[async_trait] +impl TokenProvider for InstanceCredentialProvider { + type Credential = AwsCredential; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> Result>> { + instance_creds(client, retry, &self.metadata_endpoint, self.imdsv1_fallback) + .await + .map_err(|source| crate::Error::Generic { + store: STORE, + source, + }) + } +} + +/// Credentials sourced using AssumeRoleWithWebIdentity +/// +/// +#[derive(Debug)] +pub(crate) struct WebIdentityProvider { + pub token_path: String, + pub role_arn: String, + pub session_name: String, + pub endpoint: String, +} + +#[async_trait] +impl TokenProvider for WebIdentityProvider { + type Credential = AwsCredential; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> Result>> { + web_identity( + client, + retry, + &self.token_path, + &self.role_arn, + &self.session_name, + &self.endpoint, + ) + .await + .map_err(|source| crate::Error::Generic { + store: STORE, + source, + }) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct InstanceCredentials { + access_key_id: String, + secret_access_key: String, + token: String, + expiration: DateTime, +} + +impl From for AwsCredential { + fn from(s: InstanceCredentials) -> Self { + Self { + key_id: s.access_key_id, + secret_key: s.secret_access_key, + token: Some(s.token), + } + } +} + +/// +async fn instance_creds( + client: &HttpClient, + retry_config: &RetryConfig, + endpoint: &str, + imdsv1_fallback: bool, +) -> Result>, StdError> { + const CREDENTIALS_PATH: &str = "latest/meta-data/iam/security-credentials"; + const AWS_EC2_METADATA_TOKEN_HEADER: &str = "X-aws-ec2-metadata-token"; + + let token_url = format!("{endpoint}/latest/api/token"); + + let token_result = client + .request(Method::PUT, token_url) + .header("X-aws-ec2-metadata-token-ttl-seconds", "600") // 10 minute TTL + .retryable(retry_config) + .idempotent(true) + .send() + .await; + + let token = match token_result { + Ok(t) => Some(t.into_body().text().await?), + Err(e) if imdsv1_fallback && matches!(e.status(), Some(StatusCode::FORBIDDEN)) => { + warn!("received 403 from metadata endpoint, falling back to IMDSv1"); + None + } + Err(e) => return Err(e.into()), + }; + + let role_url = format!("{endpoint}/{CREDENTIALS_PATH}/"); + let mut role_request = client.request(Method::GET, role_url); + + if let Some(token) = &token { + role_request = role_request.header(AWS_EC2_METADATA_TOKEN_HEADER, token); + } + + let role = role_request + .send_retry(retry_config) + .await? + .into_body() + .text() + .await?; + + let creds_url = format!("{endpoint}/{CREDENTIALS_PATH}/{role}"); + let mut creds_request = client.request(Method::GET, creds_url); + if let Some(token) = &token { + creds_request = creds_request.header(AWS_EC2_METADATA_TOKEN_HEADER, token); + } + + let creds: InstanceCredentials = creds_request + .send_retry(retry_config) + .await? + .into_body() + .json() + .await?; + + let now = Utc::now(); + let ttl = (creds.expiration - now).to_std().unwrap_or_default(); + Ok(TemporaryToken { + token: Arc::new(creds.into()), + expiry: Some(Instant::now() + ttl), + }) +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct AssumeRoleResponse { + assume_role_with_web_identity_result: AssumeRoleResult, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct AssumeRoleResult { + credentials: SessionCredentials, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct SessionCredentials { + session_token: String, + secret_access_key: String, + access_key_id: String, + expiration: DateTime, +} + +impl From for AwsCredential { + fn from(s: SessionCredentials) -> Self { + Self { + key_id: s.access_key_id, + secret_key: s.secret_access_key, + token: Some(s.session_token), + } + } +} + +/// +async fn web_identity( + client: &HttpClient, + retry_config: &RetryConfig, + token_path: &str, + role_arn: &str, + session_name: &str, + endpoint: &str, +) -> Result>, StdError> { + let token = std::fs::read_to_string(token_path) + .map_err(|e| format!("Failed to read token file '{token_path}': {e}"))?; + + let bytes = client + .post(endpoint) + .query(&[ + ("Action", "AssumeRoleWithWebIdentity"), + ("DurationSeconds", "3600"), + ("RoleArn", role_arn), + ("RoleSessionName", session_name), + ("Version", "2011-06-15"), + ("WebIdentityToken", &token), + ]) + .retryable(retry_config) + .idempotent(true) + .sensitive(true) + .send() + .await? + .into_body() + .bytes() + .await?; + + let resp: AssumeRoleResponse = quick_xml::de::from_reader(bytes.reader()) + .map_err(|e| format!("Invalid AssumeRoleWithWebIdentity response: {e}"))?; + + let creds = resp.assume_role_with_web_identity_result.credentials; + let now = Utc::now(); + let ttl = (creds.expiration - now).to_std().unwrap_or_default(); + + Ok(TemporaryToken { + token: Arc::new(creds.into()), + expiry: Some(Instant::now() + ttl), + }) +} + +/// Credentials sourced from a task IAM role +/// +/// +#[derive(Debug)] +pub(crate) struct TaskCredentialProvider { + pub url: String, + pub retry: RetryConfig, + pub client: HttpClient, + pub cache: TokenCache>, +} + +#[async_trait] +impl CredentialProvider for TaskCredentialProvider { + type Credential = AwsCredential; + + async fn get_credential(&self) -> Result> { + self.cache + .get_or_insert_with(|| task_credential(&self.client, &self.retry, &self.url)) + .await + .map_err(|source| crate::Error::Generic { + store: STORE, + source, + }) + } +} + +/// +async fn task_credential( + client: &HttpClient, + retry: &RetryConfig, + url: &str, +) -> Result>, StdError> { + let creds: InstanceCredentials = client + .get(url) + .send_retry(retry) + .await? + .into_body() + .json() + .await?; + + let now = Utc::now(); + let ttl = (creds.expiration - now).to_std().unwrap_or_default(); + Ok(TemporaryToken { + token: Arc::new(creds.into()), + expiry: Some(Instant::now() + ttl), + }) +} + +/// EKS Pod Identity credential provider. +/// +/// Uses the endpoint in `AWS_CONTAINER_CREDENTIALS_FULL_URI` +/// and the bearer token in `AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE` +/// to fetch ephemeral AWS credentials from an EKS pod. +#[derive(Debug)] +pub(crate) struct EKSPodCredentialProvider { + pub url: String, + pub token_file: String, + pub retry: RetryConfig, + pub client: HttpClient, + pub cache: TokenCache>, +} + +#[async_trait] +impl CredentialProvider for EKSPodCredentialProvider { + type Credential = AwsCredential; + + async fn get_credential(&self) -> Result> { + self.cache + .get_or_insert_with(|| { + eks_credential(&self.client, &self.retry, &self.url, &self.token_file) + }) + .await + .map_err(|source| crate::Error::Generic { + store: STORE, + source, + }) + } +} + +/// Performs the actual credential retrieval and parsing for `EKSPodCredentialProvider`. +/// +/// +async fn eks_credential( + client: &HttpClient, + retry: &RetryConfig, + url: &str, + token_file: &str, +) -> Result>, StdError> { + // Spawn IO to blocking tokio pool if running in tokio context + let token = match tokio::runtime::Handle::try_current() { + Ok(runtime) => { + let path = token_file.to_string(); + runtime + .spawn_blocking(move || std::fs::read_to_string(&path)) + .await? + } + Err(_) => std::fs::read_to_string(token_file), + } + .map_err(|e| format!("Failed to read EKS token file '{token_file}': {e}"))?; + + let mut req = client.request(Method::GET, url); + req = req.header("Authorization", token); + + // The JSON from the EKS credential endpoint has the same shape as ECS task credentials + let creds: InstanceCredentials = req.send_retry(retry).await?.into_body().json().await?; + + let now = Utc::now(); + let ttl = (creds.expiration - now).to_std().unwrap_or_default(); + + Ok(TemporaryToken { + token: Arc::new(creds.into()), + expiry: Some(Instant::now() + ttl), + }) +} + +/// A session provider as used by S3 Express One Zone +/// +/// +#[derive(Debug)] +pub(crate) struct SessionProvider { + pub endpoint: String, + pub region: String, + pub credentials: AwsCredentialProvider, +} + +#[async_trait] +impl TokenProvider for SessionProvider { + type Credential = AwsCredential; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> Result>> { + let creds = self.credentials.get_credential().await?; + let authorizer = AwsAuthorizer::new(&creds, "s3", &self.region); + + let bytes = client + .get(format!("{}?session", self.endpoint)) + .with_aws_sigv4(Some(authorizer), None) + .send_retry(retry) + .await + .map_err(|source| Error::CreateSessionRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::CreateSessionResponse { source })?; + + let resp: CreateSessionOutput = quick_xml::de::from_reader(bytes.reader()) + .map_err(|source| Error::CreateSessionOutput { source })?; + + let creds = resp.credentials; + Ok(TemporaryToken { + token: Arc::new(creds.into()), + // Credentials last 5 minutes - https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateSession.html + expiry: Some(Instant::now() + Duration::from_secs(5 * 60)), + }) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct CreateSessionOutput { + credentials: SessionCredentials, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::aws::{AmazonS3Builder, AmazonS3ConfigKey}; + use crate::client::mock_server::MockServer; + use crate::client::HttpClient; + use http::Response; + use reqwest::{Client, Method}; + use std::env; + + // Test generated using https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html + #[test] + fn test_sign_with_signed_payload() { + let client = HttpClient::new(Client::new()); + + // Test credentials from https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html + let credential = AwsCredential { + key_id: "AKIAIOSFODNN7EXAMPLE".to_string(), + secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + token: None, + }; + + // method = 'GET' + // service = 'ec2' + // host = 'ec2.amazonaws.com' + // region = 'us-east-1' + // endpoint = 'https://ec2.amazonaws.com' + // request_parameters = '' + let date = DateTime::parse_from_rfc3339("2022-08-06T18:01:34Z") + .unwrap() + .with_timezone(&Utc); + + let mut request = client + .request(Method::GET, "https://ec2.amazon.com/") + .into_parts() + .1 + .unwrap(); + + let signer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "ec2", + region: "us-east-1", + sign_payload: true, + token_header: None, + request_payer: false, + }; + + signer.authorize(&mut request, None); + assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a3c787a7ed37f7fdfbfd2d7056a3d7c9d85e6d52a2bfbec73793c0be6e7862d4") + } + + #[test] + fn test_sign_with_signed_payload_request_payer() { + let client = HttpClient::new(Client::new()); + + // Test credentials from https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html + let credential = AwsCredential { + key_id: "AKIAIOSFODNN7EXAMPLE".to_string(), + secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + token: None, + }; + + // method = 'GET' + // service = 'ec2' + // host = 'ec2.amazonaws.com' + // region = 'us-east-1' + // endpoint = 'https://ec2.amazonaws.com' + // request_parameters = '' + let date = DateTime::parse_from_rfc3339("2022-08-06T18:01:34Z") + .unwrap() + .with_timezone(&Utc); + + let mut request = client + .request(Method::GET, "https://ec2.amazon.com/") + .into_parts() + .1 + .unwrap(); + + let signer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "ec2", + region: "us-east-1", + sign_payload: true, + token_header: None, + request_payer: true, + }; + + signer.authorize(&mut request, None); + assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-request-payer, Signature=7030625a9e9b57ed2a40e63d749f4a4b7714b6e15004cab026152f870dd8565d") + } + + #[test] + fn test_sign_with_unsigned_payload() { + let client = HttpClient::new(Client::new()); + + // Test credentials from https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html + let credential = AwsCredential { + key_id: "AKIAIOSFODNN7EXAMPLE".to_string(), + secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + token: None, + }; + + // method = 'GET' + // service = 'ec2' + // host = 'ec2.amazonaws.com' + // region = 'us-east-1' + // endpoint = 'https://ec2.amazonaws.com' + // request_parameters = '' + let date = DateTime::parse_from_rfc3339("2022-08-06T18:01:34Z") + .unwrap() + .with_timezone(&Utc); + + let mut request = client + .request(Method::GET, "https://ec2.amazon.com/") + .into_parts() + .1 + .unwrap(); + + let authorizer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "ec2", + region: "us-east-1", + token_header: None, + sign_payload: false, + request_payer: false, + }; + + authorizer.authorize(&mut request, None); + assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699"); + } + + #[test] + fn signed_get_url() { + // Values from https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html + let credential = AwsCredential { + key_id: "AKIAIOSFODNN7EXAMPLE".to_string(), + secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + token: None, + }; + + let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z") + .unwrap() + .with_timezone(&Utc); + + let authorizer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "s3", + region: "us-east-1", + token_header: None, + sign_payload: false, + request_payer: false, + }; + + let mut url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap(); + authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400)); + + assert_eq!( + url, + Url::parse( + "https://examplebucket.s3.amazonaws.com/test.txt?\ + X-Amz-Algorithm=AWS4-HMAC-SHA256&\ + X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\ + X-Amz-Date=20130524T000000Z&\ + X-Amz-Expires=86400&\ + X-Amz-SignedHeaders=host&\ + X-Amz-Signature=aeeed9bbccd4d02ee5c0109b86d86835f995330da4c265957d157751f604d404" + ) + .unwrap() + ); + } + + #[test] + fn signed_get_url_request_payer() { + // Values from https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html + let credential = AwsCredential { + key_id: "AKIAIOSFODNN7EXAMPLE".to_string(), + secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(), + token: None, + }; + + let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z") + .unwrap() + .with_timezone(&Utc); + + let authorizer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "s3", + region: "us-east-1", + token_header: None, + sign_payload: false, + request_payer: true, + }; + + let mut url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap(); + authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400)); + + assert_eq!( + url, + Url::parse( + "https://examplebucket.s3.amazonaws.com/test.txt?\ + X-Amz-Algorithm=AWS4-HMAC-SHA256&\ + X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\ + X-Amz-Date=20130524T000000Z&\ + X-Amz-Expires=86400&\ + X-Amz-SignedHeaders=host&\ + x-amz-request-payer=requester&\ + X-Amz-Signature=9ad7c781cc30121f199b47d35ed3528473e4375b63c5d91cd87c927803e4e00a" + ) + .unwrap() + ); + } + + #[test] + fn test_sign_port() { + let client = HttpClient::new(Client::new()); + + let credential = AwsCredential { + key_id: "H20ABqCkLZID4rLe".to_string(), + secret_key: "jMqRDgxSsBqqznfmddGdu1TmmZOJQxdM".to_string(), + token: None, + }; + + let date = DateTime::parse_from_rfc3339("2022-08-09T13:05:25Z") + .unwrap() + .with_timezone(&Utc); + + let mut request = client + .request(Method::GET, "http://localhost:9000/tsm-schemas") + .query(&[ + ("delimiter", "/"), + ("encoding-type", "url"), + ("list-type", "2"), + ("prefix", ""), + ]) + .into_parts() + .1 + .unwrap(); + + let authorizer = AwsAuthorizer { + date: Some(date), + credential: &credential, + service: "s3", + region: "us-east-1", + token_header: None, + sign_payload: true, + request_payer: false, + }; + + authorizer.authorize(&mut request, None); + assert_eq!(request.headers().get(&AUTHORIZATION).unwrap(), "AWS4-HMAC-SHA256 Credential=H20ABqCkLZID4rLe/20220809/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=9ebf2f92872066c99ac94e573b4e1b80f4dbb8a32b1e8e23178318746e7d1b4d") + } + + #[tokio::test] + async fn test_instance_metadata() { + if env::var("TEST_INTEGRATION").is_err() { + eprintln!("skipping AWS integration test"); + return; + } + + // For example https://github.com/aws/amazon-ec2-metadata-mock + let endpoint = env::var("EC2_METADATA_ENDPOINT").unwrap(); + let client = HttpClient::new(Client::new()); + let retry_config = RetryConfig::default(); + + // Verify only allows IMDSv2 + let (client, req) = client + .request(Method::GET, format!("{endpoint}/latest/meta-data/ami-id")) + .into_parts(); + + let resp = client.execute(req.unwrap()).await.unwrap(); + + assert_eq!( + resp.status(), + StatusCode::UNAUTHORIZED, + "Ensure metadata endpoint is set to only allow IMDSv2" + ); + + let creds = instance_creds(&client, &retry_config, &endpoint, false) + .await + .unwrap(); + + let id = &creds.token.key_id; + let secret = &creds.token.secret_key; + let token = creds.token.token.as_ref().unwrap(); + + assert!(!id.is_empty()); + assert!(!secret.is_empty()); + assert!(!token.is_empty()) + } + + #[tokio::test] + async fn test_mock() { + let server = MockServer::new().await; + + const IMDSV2_HEADER: &str = "X-aws-ec2-metadata-token"; + + let secret_access_key = "SECRET"; + let access_key_id = "KEYID"; + let token = "TOKEN"; + + let endpoint = server.url(); + let client = HttpClient::new(Client::new()); + let retry_config = RetryConfig::default(); + + // Test IMDSv2 + server.push_fn(|req| { + assert_eq!(req.uri().path(), "/latest/api/token"); + assert_eq!(req.method(), &Method::PUT); + Response::new("cupcakes".to_string()) + }); + server.push_fn(|req| { + assert_eq!( + req.uri().path(), + "/latest/meta-data/iam/security-credentials/" + ); + assert_eq!(req.method(), &Method::GET); + let t = req.headers().get(IMDSV2_HEADER).unwrap().to_str().unwrap(); + assert_eq!(t, "cupcakes"); + Response::new("myrole".to_string()) + }); + server.push_fn(|req| { + assert_eq!(req.uri().path(), "/latest/meta-data/iam/security-credentials/myrole"); + assert_eq!(req.method(), &Method::GET); + let t = req.headers().get(IMDSV2_HEADER).unwrap().to_str().unwrap(); + assert_eq!(t, "cupcakes"); + Response::new(r#"{"AccessKeyId":"KEYID","Code":"Success","Expiration":"2022-08-30T10:51:04Z","LastUpdated":"2022-08-30T10:21:04Z","SecretAccessKey":"SECRET","Token":"TOKEN","Type":"AWS-HMAC"}"#.to_string()) + }); + + let creds = instance_creds(&client, &retry_config, endpoint, true) + .await + .unwrap(); + + assert_eq!(creds.token.token.as_deref().unwrap(), token); + assert_eq!(&creds.token.key_id, access_key_id); + assert_eq!(&creds.token.secret_key, secret_access_key); + + // Test IMDSv1 fallback + server.push_fn(|req| { + assert_eq!(req.uri().path(), "/latest/api/token"); + assert_eq!(req.method(), &Method::PUT); + Response::builder() + .status(StatusCode::FORBIDDEN) + .body(String::new()) + .unwrap() + }); + server.push_fn(|req| { + assert_eq!( + req.uri().path(), + "/latest/meta-data/iam/security-credentials/" + ); + assert_eq!(req.method(), &Method::GET); + assert!(req.headers().get(IMDSV2_HEADER).is_none()); + Response::new("myrole".to_string()) + }); + server.push_fn(|req| { + assert_eq!(req.uri().path(), "/latest/meta-data/iam/security-credentials/myrole"); + assert_eq!(req.method(), &Method::GET); + assert!(req.headers().get(IMDSV2_HEADER).is_none()); + Response::new(r#"{"AccessKeyId":"KEYID","Code":"Success","Expiration":"2022-08-30T10:51:04Z","LastUpdated":"2022-08-30T10:21:04Z","SecretAccessKey":"SECRET","Token":"TOKEN","Type":"AWS-HMAC"}"#.to_string()) + }); + + let creds = instance_creds(&client, &retry_config, endpoint, true) + .await + .unwrap(); + + assert_eq!(creds.token.token.as_deref().unwrap(), token); + assert_eq!(&creds.token.key_id, access_key_id); + assert_eq!(&creds.token.secret_key, secret_access_key); + + // Test IMDSv1 fallback disabled + server.push( + Response::builder() + .status(StatusCode::FORBIDDEN) + .body(String::new()) + .unwrap(), + ); + + // Should fail + instance_creds(&client, &retry_config, endpoint, false) + .await + .unwrap_err(); + } + + #[tokio::test] + async fn test_eks_pod_credential_provider() { + use crate::client::mock_server::MockServer; + use http::Response; + use std::fs::File; + use std::io::Write; + + let mock_server = MockServer::new().await; + + mock_server.push(Response::new( + r#"{ + "AccessKeyId": "TEST_KEY", + "SecretAccessKey": "TEST_SECRET", + "Token": "TEST_SESSION_TOKEN", + "Expiration": "2100-01-01T00:00:00Z" + }"# + .to_string(), + )); + + let token_file = tempfile::NamedTempFile::new().expect("cannot create temp file"); + let path = token_file.path().to_string_lossy().into_owned(); + let mut f = File::create(token_file.path()).unwrap(); + write!(f, "TEST_BEARER_TOKEN").unwrap(); + + let builder = AmazonS3Builder::new() + .with_bucket_name("some-bucket") + .with_config( + AmazonS3ConfigKey::ContainerCredentialsFullUri, + mock_server.url(), + ) + .with_config(AmazonS3ConfigKey::ContainerAuthorizationTokenFile, &path); + + let s3 = builder.build().unwrap(); + + let cred = s3.client.config.credentials.get_credential().await.unwrap(); + + assert_eq!(cred.key_id, "TEST_KEY"); + assert_eq!(cred.secret_key, "TEST_SECRET"); + assert_eq!(cred.token.as_deref(), Some("TEST_SESSION_TOKEN")); + } + + #[test] + fn test_output_masks_all_fields() { + let cred = AwsCredential { + key_id: "AKIAXXX".to_string(), + secret_key: "super_secret".to_string(), + token: Some("temp_token".to_string()), + }; + + let debug_output = format!("{cred:?}"); + + assert!(debug_output.contains("key_id: \"AKIAXXX\"")); + assert!(debug_output.contains("secret_key: \"******\"")); + assert!(debug_output.contains("token: Some(\"******\")")); + + assert!(!debug_output.contains("super_secret")); + assert!(!debug_output.contains("temp_token")); + } +} diff --git a/rust/object_store/src/aws/dynamo.rs b/rust/object_store/src/aws/dynamo.rs new file mode 100644 index 0000000000..a6775efab8 --- /dev/null +++ b/rust/object_store/src/aws/dynamo.rs @@ -0,0 +1,595 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A DynamoDB based lock system + +use std::borrow::Cow; +use std::collections::HashMap; +use std::future::Future; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use chrono::Utc; +use http::{Method, StatusCode}; +use serde::ser::SerializeMap; +use serde::{Deserialize, Serialize, Serializer}; + +use crate::aws::client::S3Client; +use crate::aws::credential::CredentialExt; +use crate::aws::{AwsAuthorizer, AwsCredential}; +use crate::client::get::GetClientExt; +use crate::client::retry::RetryExt; +use crate::client::retry::{RequestError, RetryError}; +use crate::path::Path; +use crate::{Error, GetOptions, Result}; + +/// The exception returned by DynamoDB on conflict +const CONFLICT: &str = "ConditionalCheckFailedException"; + +const STORE: &str = "DynamoDB"; + +/// A DynamoDB-based commit protocol, used to provide conditional write support for S3 +/// +/// ## Limitations +/// +/// Only conditional operations, e.g. `copy_if_not_exists` will be synchronized, and can +/// therefore race with non-conditional operations, e.g. `put`, `copy`, `delete`, or +/// conditional operations performed by writers not configured to synchronize with DynamoDB. +/// +/// Workloads making use of this mechanism **must** ensure: +/// +/// * Conditional and non-conditional operations are not performed on the same paths +/// * Conditional operations are only performed via similarly configured clients +/// +/// Additionally as the locking mechanism relies on timeouts to detect stale locks, +/// performance will be poor for systems that frequently delete and then create +/// objects at the same path, instead being optimised for systems that primarily create +/// files with paths never used before, or perform conditional updates to existing files +/// +/// ## Commit Protocol +/// +/// The DynamoDB schema is as follows: +/// +/// * A string partition key named `"path"` +/// * A string sort key named `"etag"` +/// * A numeric [TTL] attribute named `"ttl"` +/// * A numeric attribute named `"generation"` +/// * A numeric attribute named `"timeout"` +/// +/// An appropriate DynamoDB table can be created with the CLI as follows: +/// +/// ```bash +/// $ aws dynamodb create-table --table-name --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S +/// $ aws dynamodb update-time-to-live --table-name --time-to-live-specification Enabled=true,AttributeName=ttl +/// ``` +/// +/// To perform a conditional operation on an object with a given `path` and `etag` (`*` if creating), +/// the commit protocol is as follows: +/// +/// 1. Perform HEAD request on `path` and error on precondition mismatch +/// 2. Create record in DynamoDB with given `path` and `etag` with the configured timeout +/// 1. On Success: Perform operation with the configured timeout +/// 2. On Conflict: +/// 1. Periodically re-perform HEAD request on `path` and error on precondition mismatch +/// 2. If `timeout * max_skew_rate` passed, replace the record incrementing the `"generation"` +/// 1. On Success: GOTO 2.1 +/// 2. On Conflict: GOTO 2.2 +/// +/// Provided no writer modifies an object with a given `path` and `etag` without first adding a +/// corresponding record to DynamoDB, we are guaranteed that only one writer will ever commit. +/// +/// This is inspired by the [DynamoDB Lock Client] but simplified for the more limited +/// requirements of synchronizing object storage. The major changes are: +/// +/// * Uses a monotonic generation count instead of a UUID rvn, as this is: +/// * Cheaper to generate, serialize and compare +/// * Cannot collide +/// * More human readable / interpretable +/// * Relies on [TTL] to eventually clean up old locks +/// +/// It also draws inspiration from the DeltaLake [S3 Multi-Cluster] commit protocol, but +/// generalised to not make assumptions about the workload and not rely on first writing +/// to a temporary path. +/// +/// [TTL]: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/howitworks-ttl.html +/// [DynamoDB Lock Client]: https://aws.amazon.com/blogs/database/building-distributed-locks-with-the-dynamodb-lock-client/ +/// [S3 Multi-Cluster]: https://docs.google.com/document/d/1Gs4ZsTH19lMxth4BSdwlWjUNR-XhKHicDvBjd2RqNd8/edit#heading=h.mjjuxw9mcz9h +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct DynamoCommit { + table_name: String, + /// The number of milliseconds a lease is valid for + timeout: u64, + /// The maximum clock skew rate tolerated by the system + max_clock_skew_rate: u32, + /// The length of time a record will be retained in DynamoDB before being cleaned up + /// + /// This is purely an optimisation to avoid indefinite growth of the DynamoDB table + /// and does not impact how long clients may wait to acquire a lock + ttl: Duration, + /// The backoff duration before retesting a condition + test_interval: Duration, +} + +impl DynamoCommit { + /// Create a new [`DynamoCommit`] with a given table name + pub fn new(table_name: String) -> Self { + Self { + table_name, + timeout: 20_000, + max_clock_skew_rate: 3, + ttl: Duration::from_secs(60 * 60), + test_interval: Duration::from_millis(100), + } + } + + /// Overrides the lock timeout. + /// + /// A longer lock timeout reduces the probability of spurious commit failures and multi-writer + /// races, but will increase the time that writers must wait to reclaim a lock lost. The + /// default value of 20 seconds should be appropriate for must use-cases. + pub fn with_timeout(mut self, millis: u64) -> Self { + self.timeout = millis; + self + } + + /// The maximum clock skew rate tolerated by the system. + /// + /// An environment in which the clock on the fastest node ticks twice as fast as the slowest + /// node, would have a clock skew rate of 2. The default value of 3 should be appropriate + /// for most environments. + pub fn with_max_clock_skew_rate(mut self, rate: u32) -> Self { + self.max_clock_skew_rate = rate; + self + } + + /// The length of time a record should be retained in DynamoDB before being cleaned up + /// + /// This should be significantly larger than the configured lock timeout, with the default + /// value of 1 hour appropriate for most use-cases. + pub fn with_ttl(mut self, ttl: Duration) -> Self { + self.ttl = ttl; + self + } + + /// Parse [`DynamoCommit`] from a string + pub(crate) fn from_str(value: &str) -> Option { + Some(match value.split_once(':') { + Some((table_name, timeout)) => { + Self::new(table_name.trim().to_string()).with_timeout(timeout.parse().ok()?) + } + None => Self::new(value.trim().to_string()), + }) + } + + /// Returns the name of the DynamoDB table. + pub(crate) fn table_name(&self) -> &str { + &self.table_name + } + + pub(crate) async fn copy_if_not_exists( + &self, + client: &Arc, + from: &Path, + to: &Path, + ) -> Result<()> { + self.conditional_op(client, to, None, || async { + client.copy_request(from, to).send().await?; + Ok(()) + }) + .await + } + + #[allow(clippy::future_not_send)] // Generics confound this lint + pub(crate) async fn conditional_op( + &self, + client: &Arc, + to: &Path, + etag: Option<&str>, + op: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + check_precondition(client, to, etag).await?; + + let mut previous_lease = None; + + loop { + let existing = previous_lease.as_ref(); + match self.try_lock(client, to.as_ref(), etag, existing).await? { + TryLockResult::Ok(lease) => { + let expiry = lease.acquire + lease.timeout; + return match tokio::time::timeout_at(expiry.into(), op()).await { + Ok(Ok(v)) => Ok(v), + Ok(Err(e)) => Err(e), + Err(_) => Err(Error::Generic { + store: "DynamoDB", + source: format!( + "Failed to perform conditional operation in {} milliseconds", + self.timeout + ) + .into(), + }), + }; + } + TryLockResult::Conflict(conflict) => { + let mut interval = tokio::time::interval(self.test_interval); + let expiry = conflict.timeout * self.max_clock_skew_rate; + loop { + interval.tick().await; + check_precondition(client, to, etag).await?; + if conflict.acquire.elapsed() > expiry { + previous_lease = Some(conflict); + break; + } + } + } + } + } + } + + /// Attempt to acquire a lock, reclaiming an existing lease if provided + async fn try_lock( + &self, + s3: &S3Client, + path: &str, + etag: Option<&str>, + existing: Option<&Lease>, + ) -> Result { + let attributes; + let (next_gen, condition_expression, expression_attribute_values) = match existing { + None => (0_u64, "attribute_not_exists(#pk)", Map(&[])), + Some(existing) => { + attributes = [(":g", AttributeValue::Number(existing.generation))]; + ( + existing.generation.checked_add(1).unwrap(), + "attribute_exists(#pk) AND generation = :g", + Map(attributes.as_slice()), + ) + } + }; + + let ttl = (Utc::now() + self.ttl).timestamp(); + let items = [ + ("path", AttributeValue::from(path)), + ("etag", AttributeValue::from(etag.unwrap_or("*"))), + ("generation", AttributeValue::Number(next_gen)), + ("timeout", AttributeValue::Number(self.timeout)), + ("ttl", AttributeValue::Number(ttl as _)), + ]; + let names = [("#pk", "path")]; + + let req = PutItem { + table_name: &self.table_name, + condition_expression, + expression_attribute_values, + expression_attribute_names: Map(&names), + item: Map(&items), + return_values: None, + return_values_on_condition_check_failure: Some(ReturnValues::AllOld), + }; + + let credential = s3.config.get_credential().await?; + + let acquire = Instant::now(); + match self + .request(s3, credential.as_deref(), "DynamoDB_20120810.PutItem", req) + .await + { + Ok(_) => Ok(TryLockResult::Ok(Lease { + acquire, + generation: next_gen, + timeout: Duration::from_millis(self.timeout), + })), + Err(e) => match parse_error_response(&e) { + Some(e) if e.error.ends_with(CONFLICT) => match extract_lease(&e.item) { + Some(lease) => Ok(TryLockResult::Conflict(lease)), + None => Err(Error::Generic { + store: STORE, + source: "Failed to extract lease from conflict ReturnValuesOnConditionCheckFailure response".into() + }), + }, + _ => Err(Error::Generic { + store: STORE, + source: Box::new(e), + }), + }, + } + } + + async fn request( + &self, + s3: &S3Client, + cred: Option<&AwsCredential>, + target: &str, + req: R, + ) -> Result { + let region = &s3.config.region; + let authorizer = cred.map(|x| AwsAuthorizer::new(x, "dynamodb", region)); + + let builder = match &s3.config.endpoint { + Some(e) => s3.client.request(Method::POST, e), + None => { + let url = format!("https://dynamodb.{region}.amazonaws.com"); + s3.client.request(Method::POST, url) + } + }; + + // TODO: Timeout + builder + .json(&req) + .header("X-Amz-Target", target) + .with_aws_sigv4(authorizer, None) + .send_retry(&s3.config.retry_config) + .await + } +} + +#[derive(Debug)] +enum TryLockResult { + /// Successfully acquired a lease + Ok(Lease), + /// An existing lease was found + Conflict(Lease), +} + +/// Validates that `path` has the given `etag` or doesn't exist if `None` +async fn check_precondition(client: &Arc, path: &Path, etag: Option<&str>) -> Result<()> { + let options = GetOptions { + head: true, + ..Default::default() + }; + + match etag { + Some(expected) => match client.get_opts(path, options).await { + Ok(r) => match r.meta.e_tag { + Some(actual) if expected == actual => Ok(()), + actual => Err(Error::Precondition { + path: path.to_string(), + source: format!("{} does not match {expected}", actual.unwrap_or_default()) + .into(), + }), + }, + Err(Error::NotFound { .. }) => Err(Error::Precondition { + path: path.to_string(), + source: format!("Object at location {path} not found").into(), + }), + Err(e) => Err(e), + }, + None => match client.get_opts(path, options).await { + Ok(_) => Err(Error::AlreadyExists { + path: path.to_string(), + source: "Already Exists".to_string().into(), + }), + Err(Error::NotFound { .. }) => Ok(()), + Err(e) => Err(e), + }, + } +} + +/// Parses the error response if any +fn parse_error_response(e: &RetryError) -> Option> { + match e.inner() { + RequestError::Status { + status: StatusCode::BAD_REQUEST, + body: Some(b), + } => serde_json::from_str(b).ok(), + _ => None, + } +} + +/// Extracts a lease from `item`, returning `None` on error +fn extract_lease(item: &HashMap<&str, AttributeValue<'_>>) -> Option { + let generation = match item.get("generation") { + Some(AttributeValue::Number(generation)) => generation, + _ => return None, + }; + + let timeout = match item.get("timeout") { + Some(AttributeValue::Number(timeout)) => *timeout, + _ => return None, + }; + + Some(Lease { + acquire: Instant::now(), + generation: *generation, + timeout: Duration::from_millis(timeout), + }) +} + +/// A lock lease +#[derive(Debug, Clone)] +struct Lease { + acquire: Instant, + generation: u64, + timeout: Duration, +} + +/// A DynamoDB [PutItem] payload +/// +/// [PutItem]: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_PutItem.html +#[derive(Serialize)] +#[serde(rename_all = "PascalCase")] +struct PutItem<'a> { + /// The table name + table_name: &'a str, + + /// A condition that must be satisfied in order for a conditional PutItem operation to succeed. + condition_expression: &'a str, + + /// One or more substitution tokens for attribute names in an expression + expression_attribute_names: Map<'a, &'a str, &'a str>, + + /// One or more values that can be substituted in an expression + expression_attribute_values: Map<'a, &'a str, AttributeValue<'a>>, + + /// A map of attribute name/value pairs, one for each attribute + item: Map<'a, &'a str, AttributeValue<'a>>, + + /// Use ReturnValues if you want to get the item attributes as they appeared + /// before they were updated with the PutItem request. + #[serde(skip_serializing_if = "Option::is_none")] + return_values: Option, + + /// An optional parameter that returns the item attributes for a PutItem operation + /// that failed a condition check. + #[serde(skip_serializing_if = "Option::is_none")] + return_values_on_condition_check_failure: Option, +} + +#[derive(Deserialize)] +struct ErrorResponse<'a> { + #[serde(rename = "__type")] + error: &'a str, + + #[serde(borrow, default, rename = "Item")] + item: HashMap<&'a str, AttributeValue<'a>>, +} + +#[derive(Serialize)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +enum ReturnValues { + AllOld, +} + +/// A collection of key value pairs +/// +/// This provides cheap, ordered serialization of maps +struct Map<'a, K, V>(&'a [(K, V)]); + +impl Serialize for Map<'_, K, V> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + if self.0.is_empty() { + return serializer.serialize_none(); + } + let mut map = serializer.serialize_map(Some(self.0.len()))?; + for (k, v) in self.0 { + map.serialize_entry(k, v)? + } + map.end() + } +} + +/// A DynamoDB [AttributeValue] +/// +/// [AttributeValue]: https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_AttributeValue.html +#[derive(Debug, Serialize, Deserialize)] +enum AttributeValue<'a> { + #[serde(rename = "S")] + String(Cow<'a, str>), + #[serde(rename = "N", with = "number")] + Number(u64), +} + +impl<'a> From<&'a str> for AttributeValue<'a> { + fn from(value: &'a str) -> Self { + Self::String(Cow::Borrowed(value)) + } +} + +/// Numbers are serialized as strings +mod number { + use serde::{Deserialize, Deserializer, Serializer}; + + pub(crate) fn serialize(v: &u64, s: S) -> Result { + s.serialize_str(&v.to_string()) + } + + pub(crate) fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result { + let v: &str = Deserialize::deserialize(d)?; + v.parse().map_err(serde::de::Error::custom) + } +} + +use crate::client::HttpResponse; +/// Re-export integration_test to be called by s3_test +#[cfg(test)] +pub(crate) use tests::integration_test; + +#[cfg(test)] +mod tests { + use super::*; + use crate::aws::AmazonS3; + use crate::ObjectStore; + use rand::distr::Alphanumeric; + use rand::{rng, Rng}; + + #[test] + fn test_attribute_serde() { + let serde = serde_json::to_string(&AttributeValue::Number(23)).unwrap(); + assert_eq!(serde, "{\"N\":\"23\"}"); + let back: AttributeValue<'_> = serde_json::from_str(&serde).unwrap(); + assert!(matches!(back, AttributeValue::Number(23))); + } + + /// An integration test for DynamoDB + /// + /// This is a function called by s3_test to avoid test concurrency issues + pub(crate) async fn integration_test(integration: &AmazonS3, d: &DynamoCommit) { + let client = &integration.client; + + let src = Path::from("dynamo_path_src"); + integration.put(&src, "asd".into()).await.unwrap(); + + let dst = Path::from("dynamo_path"); + let _ = integration.delete(&dst).await; // Delete if present + + // Create a lock if not already exists + let existing = match d.try_lock(client, dst.as_ref(), None, None).await.unwrap() { + TryLockResult::Conflict(l) => l, + TryLockResult::Ok(l) => l, + }; + + // Should not be able to acquire a lock again + let r = d.try_lock(client, dst.as_ref(), None, None).await; + assert!(matches!(r, Ok(TryLockResult::Conflict(_)))); + + // But should still be able to reclaim lock and perform copy + d.copy_if_not_exists(client, &src, &dst).await.unwrap(); + + match d.try_lock(client, dst.as_ref(), None, None).await.unwrap() { + TryLockResult::Conflict(new) => { + // Should have incremented generation to do so + assert_eq!(new.generation, existing.generation + 1); + } + _ => panic!("Should conflict"), + } + + let rng = rng(); + let etag = String::from_utf8(rng.sample_iter(Alphanumeric).take(32).collect()).unwrap(); + let t = Some(etag.as_str()); + + let l = match d.try_lock(client, dst.as_ref(), t, None).await.unwrap() { + TryLockResult::Ok(l) => l, + _ => panic!("should not conflict"), + }; + + match d.try_lock(client, dst.as_ref(), t, None).await.unwrap() { + TryLockResult::Conflict(c) => assert_eq!(l.generation, c.generation), + _ => panic!("should conflict"), + } + + match d.try_lock(client, dst.as_ref(), t, Some(&l)).await.unwrap() { + TryLockResult::Ok(new) => assert_eq!(new.generation, l.generation + 1), + _ => panic!("should not conflict"), + } + } +} diff --git a/rust/object_store/src/aws/mod.rs b/rust/object_store/src/aws/mod.rs new file mode 100644 index 0000000000..4abf374867 --- /dev/null +++ b/rust/object_store/src/aws/mod.rs @@ -0,0 +1,903 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store implementation for S3 +//! +//! ## Multipart uploads +//! +//! Multipart uploads can be initiated with the [ObjectStore::put_multipart] method. +//! +//! If the writer fails for any reason, you may have parts uploaded to AWS but not +//! used that you will be charged for. [`MultipartUpload::abort`] may be invoked to drop +//! these unneeded parts, however, it is recommended that you consider implementing +//! [automatic cleanup] of unused parts that are older than some threshold. +//! +//! [automatic cleanup]: https://aws.amazon.com/blogs/aws/s3-lifecycle-management-update-support-for-multipart-uploads-and-delete-markers/ + +use async_trait::async_trait; +use futures::stream::BoxStream; +use futures::{StreamExt, TryStreamExt}; +use reqwest::header::{HeaderName, IF_MATCH, IF_NONE_MATCH}; +use reqwest::{Method, StatusCode}; +use std::{sync::Arc, time::Duration}; +use url::Url; + +use crate::aws::client::{CompleteMultipartMode, PutPartPayload, RequestError, S3Client}; +use crate::client::get::GetClientExt; +use crate::client::list::{ListClient, ListClientExt}; +use crate::client::CredentialProvider; +use crate::multipart::{MultipartStore, PartId}; +use crate::signer::Signer; +use crate::util::STRICT_ENCODE_SET; +use crate::{ + Error, GetOptions, GetResult, ListResult, MultipartId, MultipartUpload, ObjectMeta, + ObjectStore, Path, PutMode, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + UploadPart, +}; + +static TAGS_HEADER: HeaderName = HeaderName::from_static("x-amz-tagging"); +static COPY_SOURCE_HEADER: HeaderName = HeaderName::from_static("x-amz-copy-source"); + +mod builder; +mod checksum; +mod client; +mod credential; +mod dynamo; +mod precondition; + +#[cfg(not(target_arch = "wasm32"))] +mod resolve; + +pub use builder::{AmazonS3Builder, AmazonS3ConfigKey}; +pub use checksum::Checksum; +pub use dynamo::DynamoCommit; +pub use precondition::{S3ConditionalPut, S3CopyIfNotExists}; + +#[cfg(not(target_arch = "wasm32"))] +pub use resolve::resolve_bucket_region; + +/// This struct is used to maintain the URI path encoding +const STRICT_PATH_ENCODE_SET: percent_encoding::AsciiSet = STRICT_ENCODE_SET.remove(b'/'); + +const STORE: &str = "S3"; + +/// [`CredentialProvider`] for [`AmazonS3`] +pub type AwsCredentialProvider = Arc>; +use crate::client::parts::Parts; +use crate::list::{PaginatedListOptions, PaginatedListResult, PaginatedListStore}; +pub use credential::{AwsAuthorizer, AwsCredential}; + +/// Interface for [Amazon S3](https://aws.amazon.com/s3/). +#[derive(Debug, Clone)] +pub struct AmazonS3 { + client: Arc, +} + +impl std::fmt::Display for AmazonS3 { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "AmazonS3({})", self.client.config.bucket) + } +} + +impl AmazonS3 { + /// Returns the [`AwsCredentialProvider`] used by [`AmazonS3`] + pub fn credentials(&self) -> &AwsCredentialProvider { + &self.client.config.credentials + } + + /// Create a full URL to the resource specified by `path` with this instance's configuration. + fn path_url(&self, path: &Path) -> String { + self.client.config.path_url(path) + } +} + +#[async_trait] +impl Signer for AmazonS3 { + /// Create a URL containing the relevant [AWS SigV4] query parameters that authorize a request + /// via `method` to the resource at `path` valid for the duration specified in `expires_in`. + /// + /// [AWS SigV4]: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html + /// + /// # Example + /// + /// This example returns a URL that will enable a user to upload a file to + /// "some-folder/some-file.txt" in the next hour. + /// + /// ``` + /// # async fn example() -> Result<(), Box> { + /// # use object_store::{aws::AmazonS3Builder, path::Path, signer::Signer}; + /// # use reqwest::Method; + /// # use std::time::Duration; + /// # + /// let region = "us-east-1"; + /// let s3 = AmazonS3Builder::new() + /// .with_region(region) + /// .with_bucket_name("my-bucket") + /// .with_access_key_id("my-access-key-id") + /// .with_secret_access_key("my-secret-access-key") + /// .build()?; + /// + /// let url = s3.signed_url( + /// Method::PUT, + /// &Path::from("some-folder/some-file.txt"), + /// Duration::from_secs(60 * 60) + /// ).await?; + /// # Ok(()) + /// # } + /// ``` + async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result { + let credential = self.credentials().get_credential().await?; + let authorizer = AwsAuthorizer::new(&credential, "s3", &self.client.config.region) + .with_request_payer(self.client.config.request_payer); + + let path_url = self.path_url(path); + let mut url = path_url.parse().map_err(|e| Error::Generic { + store: STORE, + source: format!("Unable to parse url {path_url}: {e}").into(), + })?; + + authorizer.sign(method, &mut url, expires_in); + + Ok(url) + } +} + +#[async_trait] +impl ObjectStore for AmazonS3 { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let PutOptions { + mode, + tags, + attributes, + extensions, + } = opts; + + let request = self + .client + .request(Method::PUT, location) + .with_payload(payload) + .with_attributes(attributes) + .with_tags(tags) + .with_extensions(extensions) + .with_encryption_headers(); + + match (mode, &self.client.config.conditional_put) { + (PutMode::Overwrite, _) => request.idempotent(true).do_put().await, + (PutMode::Create, S3ConditionalPut::Disabled) => Err(Error::NotImplemented), + (PutMode::Create, S3ConditionalPut::ETagMatch) => { + match request.header(&IF_NONE_MATCH, "*").do_put().await { + // Technically If-None-Match should return NotModified but some stores, + // such as R2, instead return PreconditionFailed + // https://developers.cloudflare.com/r2/api/s3/extensions/#conditional-operations-in-putobject + Err(e @ Error::NotModified { .. } | e @ Error::Precondition { .. }) => { + Err(Error::AlreadyExists { + path: location.to_string(), + source: Box::new(e), + }) + } + r => r, + } + } + #[allow(deprecated)] + (PutMode::Create, S3ConditionalPut::Dynamo(d)) => { + d.conditional_op(&self.client, location, None, move || request.do_put()) + .await + } + (PutMode::Update(v), put) => { + let etag = v.e_tag.ok_or_else(|| Error::Generic { + store: STORE, + source: "ETag required for conditional put".to_string().into(), + })?; + match put { + S3ConditionalPut::ETagMatch => { + match request + .header(&IF_MATCH, etag.as_str()) + // Real S3 will occasionally report 409 Conflict + // if there are concurrent `If-Match` requests + // in flight, so we need to be prepared to retry + // 409 responses. + .retry_on_conflict(true) + .do_put() + .await + { + // Real S3 reports NotFound rather than PreconditionFailed when the + // object doesn't exist. Convert to PreconditionFailed for + // consistency with R2. This also matches what the HTTP spec + // says the behavior should be. + Err(Error::NotFound { path, source }) => { + Err(Error::Precondition { path, source }) + } + r => r, + } + } + #[allow(deprecated)] + S3ConditionalPut::Dynamo(d) => { + d.conditional_op(&self.client, location, Some(&etag), move || { + request.do_put() + }) + .await + } + S3ConditionalPut::Disabled => Err(Error::NotImplemented), + } + } + } + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + let upload_id = self.client.create_multipart(location, opts).await?; + + Ok(Box::new(S3MultiPartUpload { + part_idx: 0, + state: Arc::new(UploadState { + client: Arc::clone(&self.client), + location: location.clone(), + upload_id: upload_id.clone(), + parts: Default::default(), + }), + })) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.client.get_opts(location, options).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.client.request(Method::DELETE, location).send().await?; + Ok(()) + } + + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, Result>, + ) -> BoxStream<'a, Result> { + locations + .try_chunks(1_000) + .map(move |locations| async { + // Early return the error. We ignore the paths that have already been + // collected into the chunk. + let locations = locations.map_err(|e| e.1)?; + self.client + .bulk_delete_request(locations) + .await + .map(futures::stream::iter) + }) + .buffered(20) + .try_flatten() + .boxed() + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.client.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + if self.client.config.is_s3_express() { + let offset = offset.clone(); + // S3 Express does not support start-after + return self + .client + .list(prefix) + .try_filter(move |f| futures::future::ready(f.location > offset)) + .boxed(); + } + + self.client.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.client.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.client + .copy_request(from, to) + .idempotent(true) + .send() + .await?; + Ok(()) + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let (k, v, status) = match &self.client.config.copy_if_not_exists { + Some(S3CopyIfNotExists::Header(k, v)) => (k, v, StatusCode::PRECONDITION_FAILED), + Some(S3CopyIfNotExists::HeaderWithStatus(k, v, status)) => (k, v, *status), + Some(S3CopyIfNotExists::Multipart) => { + let upload_id = self + .client + .create_multipart(to, PutMultipartOptions::default()) + .await?; + + let res = async { + let part_id = self + .client + .put_part(to, &upload_id, 0, PutPartPayload::Copy(from)) + .await?; + match self + .client + .complete_multipart( + to, + &upload_id, + vec![part_id], + CompleteMultipartMode::Create, + ) + .await + { + Err(e @ Error::Precondition { .. }) => Err(Error::AlreadyExists { + path: to.to_string(), + source: Box::new(e), + }), + Ok(_) => Ok(()), + Err(e) => Err(e), + } + } + .await; + + // If the multipart upload failed, make a best effort attempt to + // clean it up. It's the caller's responsibility to add a + // lifecycle rule if guaranteed cleanup is required, as we + // cannot protect against an ill-timed process crash. + if res.is_err() { + let _ = self.client.abort_multipart(to, &upload_id).await; + } + + return res; + } + #[allow(deprecated)] + Some(S3CopyIfNotExists::Dynamo(lock)) => { + return lock.copy_if_not_exists(&self.client, from, to).await + } + None => { + return Err(Error::NotSupported { + source: "S3 does not support copy-if-not-exists".to_string().into(), + }) + } + }; + + let req = self.client.copy_request(from, to); + match req.header(k, v).send().await { + Err(RequestError::Retry { source, path }) if source.status() == Some(status) => { + Err(Error::AlreadyExists { + source: Box::new(source), + path, + }) + } + Err(e) => Err(e.into()), + Ok(_) => Ok(()), + } + } +} + +#[derive(Debug)] +struct S3MultiPartUpload { + part_idx: usize, + state: Arc, +} + +#[derive(Debug)] +struct UploadState { + parts: Parts, + location: Path, + upload_id: String, + client: Arc, +} + +#[async_trait] +impl MultipartUpload for S3MultiPartUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + let idx = self.part_idx; + self.part_idx += 1; + let state = Arc::clone(&self.state); + Box::pin(async move { + let part = state + .client + .put_part( + &state.location, + &state.upload_id, + idx, + PutPartPayload::Part(data), + ) + .await?; + state.parts.put(idx, part); + Ok(()) + }) + } + + async fn complete(&mut self) -> Result { + let parts = self.state.parts.finish(self.part_idx)?; + + self.state + .client + .complete_multipart( + &self.state.location, + &self.state.upload_id, + parts, + CompleteMultipartMode::Overwrite, + ) + .await + } + + async fn abort(&mut self) -> Result<()> { + self.state + .client + .request(Method::DELETE, &self.state.location) + .query(&[("uploadId", &self.state.upload_id)]) + .idempotent(true) + .send() + .await?; + + Ok(()) + } +} + +#[async_trait] +impl MultipartStore for AmazonS3 { + async fn create_multipart(&self, path: &Path) -> Result { + self.client + .create_multipart(path, PutMultipartOptions::default()) + .await + } + + async fn put_part( + &self, + path: &Path, + id: &MultipartId, + part_idx: usize, + data: PutPayload, + ) -> Result { + self.client + .put_part(path, id, part_idx, PutPartPayload::Part(data)) + .await + } + + async fn complete_multipart( + &self, + path: &Path, + id: &MultipartId, + parts: Vec, + ) -> Result { + self.client + .complete_multipart(path, id, parts, CompleteMultipartMode::Overwrite) + .await + } + + async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> { + self.client + .request(Method::DELETE, path) + .query(&[("uploadId", id)]) + .send() + .await?; + Ok(()) + } +} + +#[async_trait] +impl PaginatedListStore for AmazonS3 { + async fn list_paginated( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + self.client.list_request(prefix, opts).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::client::get::GetClient; + use crate::client::retry::RetryContext; + use crate::client::SpawnedReqwestConnector; + use crate::integration::*; + use crate::tests::*; + use crate::ClientOptions; + use base64::prelude::BASE64_STANDARD; + use base64::Engine; + use http::HeaderMap; + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn write_multipart_file_with_signature() { + maybe_skip_integration!(); + + let bucket = "test-bucket-for-checksum"; + let store = AmazonS3Builder::from_env() + .with_bucket_name(bucket) + .with_checksum_algorithm(Checksum::SHA256) + .build() + .unwrap(); + + let str = "test.bin"; + let path = Path::parse(str).unwrap(); + let opts = PutMultipartOptions::default(); + let mut upload = store.put_multipart_opts(&path, opts).await.unwrap(); + + upload + .put_part(PutPayload::from(vec![0u8; 10_000_000])) + .await + .unwrap(); + upload + .put_part(PutPayload::from(vec![0u8; 5_000_000])) + .await + .unwrap(); + + let res = upload.complete().await.unwrap(); + assert!(res.e_tag.is_some(), "Should have valid etag"); + + store.delete(&path).await.unwrap(); + } + + #[tokio::test] + async fn write_multipart_file_with_signature_object_lock() { + maybe_skip_integration!(); + + let bucket = "test-object-lock"; + let store = AmazonS3Builder::from_env() + .with_bucket_name(bucket) + .with_checksum_algorithm(Checksum::SHA256) + .build() + .unwrap(); + + let str = "test.bin"; + let path = Path::parse(str).unwrap(); + let opts = PutMultipartOptions::default(); + let mut upload = store.put_multipart_opts(&path, opts).await.unwrap(); + + upload + .put_part(PutPayload::from(vec![0u8; 10_000_000])) + .await + .unwrap(); + upload + .put_part(PutPayload::from(vec![0u8; 5_000_000])) + .await + .unwrap(); + + let res = upload.complete().await.unwrap(); + assert!(res.e_tag.is_some(), "Should have valid etag"); + + store.delete(&path).await.unwrap(); + } + + #[tokio::test] + async fn s3_test() { + maybe_skip_integration!(); + let config = AmazonS3Builder::from_env(); + + let integration = config.build().unwrap(); + let config = &integration.client.config; + let test_not_exists = config.copy_if_not_exists.is_some(); + let test_conditional_put = config.conditional_put != S3ConditionalPut::Disabled; + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + stream_get(&integration).await; + multipart(&integration, &integration).await; + multipart_race_condition(&integration, true).await; + multipart_out_of_order(&integration).await; + signing(&integration).await; + s3_encryption(&integration).await; + put_get_attributes(&integration).await; + list_paginated(&integration, &integration).await; + + // Object tagging is not supported by S3 Express One Zone + if config.session_provider.is_none() { + tagging( + Arc::new(AmazonS3 { + client: Arc::clone(&integration.client), + }), + !config.disable_tagging, + |p| { + let client = Arc::clone(&integration.client); + async move { client.get_object_tagging(&p).await } + }, + ) + .await; + } + + if test_not_exists { + copy_if_not_exists(&integration).await; + } + if test_conditional_put { + put_opts(&integration, true).await; + } + + // run integration test with unsigned payload enabled + let builder = AmazonS3Builder::from_env().with_unsigned_payload(true); + let integration = builder.build().unwrap(); + put_get_delete_list(&integration).await; + + // run integration test with checksum set to sha256 + let builder = AmazonS3Builder::from_env().with_checksum_algorithm(Checksum::SHA256); + let integration = builder.build().unwrap(); + put_get_delete_list(&integration).await; + + match &integration.client.config.copy_if_not_exists { + #[allow(deprecated)] + Some(S3CopyIfNotExists::Dynamo(d)) => dynamo::integration_test(&integration, d).await, + _ => eprintln!("Skipping dynamo integration test - dynamo not configured"), + }; + } + + #[tokio::test] + async fn s3_test_get_nonexistent_location() { + maybe_skip_integration!(); + let integration = AmazonS3Builder::from_env().build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + } + + #[tokio::test] + async fn s3_test_get_nonexistent_bucket() { + maybe_skip_integration!(); + let config = AmazonS3Builder::from_env().with_bucket_name(NON_EXISTENT_NAME); + let integration = config.build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = integration.get(&location).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + } + + #[tokio::test] + async fn s3_test_put_nonexistent_bucket() { + maybe_skip_integration!(); + let config = AmazonS3Builder::from_env().with_bucket_name(NON_EXISTENT_NAME); + let integration = config.build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + let data = PutPayload::from("arbitrary data"); + + let err = integration.put(&location, data).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + } + + #[tokio::test] + async fn s3_test_delete_nonexistent_location() { + maybe_skip_integration!(); + let integration = AmazonS3Builder::from_env().build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + integration.delete(&location).await.unwrap(); + } + + #[tokio::test] + async fn s3_test_delete_nonexistent_bucket() { + maybe_skip_integration!(); + let config = AmazonS3Builder::from_env().with_bucket_name(NON_EXISTENT_NAME); + let integration = config.build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = integration.delete(&location).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + } + + #[tokio::test] + #[ignore = "Tests shouldn't call use remote services by default"] + async fn test_disable_creds() { + // https://registry.opendata.aws/daylight-osm/ + let v1 = AmazonS3Builder::new() + .with_bucket_name("daylight-map-distribution") + .with_region("us-west-1") + .with_access_key_id("local") + .with_secret_access_key("development") + .build() + .unwrap(); + + let prefix = Path::from("release"); + + v1.list_with_delimiter(Some(&prefix)).await.unwrap_err(); + + let v2 = AmazonS3Builder::new() + .with_bucket_name("daylight-map-distribution") + .with_region("us-west-1") + .with_skip_signature(true) + .build() + .unwrap(); + + v2.list_with_delimiter(Some(&prefix)).await.unwrap(); + } + + async fn s3_encryption(store: &AmazonS3) { + maybe_skip_integration!(); + + let data = PutPayload::from(vec![3u8; 1024]); + + let encryption_headers: HeaderMap = store.client.config.encryption_headers.clone().into(); + let expected_encryption = + if let Some(encryption_type) = encryption_headers.get("x-amz-server-side-encryption") { + encryption_type + } else { + eprintln!("Skipping S3 encryption test - encryption not configured"); + return; + }; + + let locations = [ + Path::from("test-encryption-1"), + Path::from("test-encryption-2"), + Path::from("test-encryption-3"), + ]; + + store.put(&locations[0], data.clone()).await.unwrap(); + store.copy(&locations[0], &locations[1]).await.unwrap(); + + let mut upload = store.put_multipart(&locations[2]).await.unwrap(); + upload.put_part(data.clone()).await.unwrap(); + upload.complete().await.unwrap(); + + for location in &locations { + let mut context = RetryContext::new(&store.client.config.retry_config); + + let res = store + .client + .get_request(&mut context, location, GetOptions::default()) + .await + .unwrap(); + + let headers = res.headers(); + assert_eq!( + headers + .get("x-amz-server-side-encryption") + .expect("object is not encrypted"), + expected_encryption + ); + + store.delete(location).await.unwrap(); + } + } + + /// See CONTRIBUTING.md for the MinIO setup for this test. + #[tokio::test] + async fn test_s3_ssec_encryption_with_minio() { + if std::env::var("TEST_S3_SSEC_ENCRYPTION").is_err() { + eprintln!("Skipping S3 SSE-C encryption test"); + return; + } + eprintln!("Running S3 SSE-C encryption test"); + + let customer_key = "1234567890abcdef1234567890abcdef"; + let expected_md5 = "JMwgiexXqwuPqIPjYFmIZQ=="; + + let store = AmazonS3Builder::from_env() + .with_ssec_encryption(BASE64_STANDARD.encode(customer_key)) + .with_client_options(ClientOptions::default().with_allow_invalid_certificates(true)) + .build() + .unwrap(); + + let data = PutPayload::from(vec![3u8; 1024]); + + let locations = [ + Path::from("test-encryption-1"), + Path::from("test-encryption-2"), + Path::from("test-encryption-3"), + ]; + + // Test put with sse-c. + store.put(&locations[0], data.clone()).await.unwrap(); + + // Test copy with sse-c. + store.copy(&locations[0], &locations[1]).await.unwrap(); + + // Test multipart upload with sse-c. + let mut upload = store.put_multipart(&locations[2]).await.unwrap(); + upload.put_part(data.clone()).await.unwrap(); + upload.complete().await.unwrap(); + + // Test get with sse-c. + for location in &locations { + let mut context = RetryContext::new(&store.client.config.retry_config); + + let res = store + .client + .get_request(&mut context, location, GetOptions::default()) + .await + .unwrap(); + + let headers = res.headers(); + assert_eq!( + headers + .get("x-amz-server-side-encryption-customer-algorithm") + .expect("object is not encrypted with SSE-C"), + "AES256" + ); + + assert_eq!( + headers + .get("x-amz-server-side-encryption-customer-key-MD5") + .expect("object is not encrypted with SSE-C"), + expected_md5 + ); + + store.delete(location).await.unwrap(); + } + } + + /// Integration test that ensures I/O is done on an alternate threadpool + /// when using the `SpawnedReqwestConnector`. + #[test] + fn s3_alternate_threadpool_spawned_request_connector() { + maybe_skip_integration!(); + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + + // Runtime with I/O enabled + let io_runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() // <-- turns on IO + .build() + .unwrap(); + + // Runtime without I/O enabled + let non_io_runtime = tokio::runtime::Builder::new_current_thread() + // note: no call to enable_all + .build() + .unwrap(); + + // run the io runtime in a different thread + let io_handle = io_runtime.handle().clone(); + let thread_handle = std::thread::spawn(move || { + io_runtime.block_on(async move { + shutdown_rx.await.unwrap(); + }); + }); + + let store = AmazonS3Builder::from_env() + // use different bucket to avoid collisions with other tests + .with_bucket_name("test-bucket-for-spawn") + .with_http_connector(SpawnedReqwestConnector::new(io_handle)) + .build() + .unwrap(); + + // run a request on the non io runtime -- will fail if the connector + // does not spawn the request to the io runtime + non_io_runtime + .block_on(async move { + let path = Path::from("alternate_threadpool/test.txt"); + store.delete(&path).await.ok(); // remove the file if it exists from prior runs + store.put(&path, "foo".into()).await?; + let res = store.get(&path).await?.bytes().await?; + assert_eq!(res.as_ref(), b"foo"); + store.delete(&path).await?; // cleanup + Ok(()) as Result<()> + }) + .expect("failed to run request on non io runtime"); + + // shutdown the io runtime and thread + shutdown_tx.send(()).ok(); + thread_handle.join().expect("runtime thread panicked"); + } +} diff --git a/rust/object_store/src/aws/precondition.rs b/rust/object_store/src/aws/precondition.rs new file mode 100644 index 0000000000..2f11e4f92e --- /dev/null +++ b/rust/object_store/src/aws/precondition.rs @@ -0,0 +1,286 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::aws::dynamo::DynamoCommit; +use crate::config::Parse; + +use itertools::Itertools; + +/// Configure how to provide [`ObjectStore::copy_if_not_exists`] for [`AmazonS3`]. +/// +/// [`ObjectStore::copy_if_not_exists`]: crate::ObjectStore::copy_if_not_exists +/// [`AmazonS3`]: super::AmazonS3 +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum S3CopyIfNotExists { + /// Some S3-compatible stores, such as Cloudflare R2, support copy if not exists + /// semantics through custom headers. + /// + /// If set, [`ObjectStore::copy_if_not_exists`] will perform a normal copy operation + /// with the provided header pair, and expect the store to fail with `412 Precondition Failed` + /// if the destination file already exists. + /// + /// Encoded as `header::` ignoring whitespace + /// + /// For example `header: cf-copy-destination-if-none-match: *`, would set + /// the header `cf-copy-destination-if-none-match` to `*` + /// + /// [`ObjectStore::copy_if_not_exists`]: crate::ObjectStore::copy_if_not_exists + Header(String, String), + /// The same as [`S3CopyIfNotExists::Header`] but allows custom status code checking, for object stores that return values + /// other than 412. + /// + /// Encoded as `header-with-status:::` ignoring whitespace + HeaderWithStatus(String, String, reqwest::StatusCode), + /// Native Amazon S3 supports copy if not exists through a multipart upload + /// where the upload copies an existing object and is completed only if the + /// new object does not already exist. + /// + /// WARNING: When using this mode, `copy_if_not_exists` does not copy tags + /// or attributes from the source object. + /// + /// WARNING: When using this mode, `copy_if_not_exists` makes only a best + /// effort attempt to clean up the multipart upload if the copy operation + /// fails. Consider using a lifecycle rule to automatically clean up + /// abandoned multipart uploads. See [the module + /// docs](super#multipart-uploads) for details. + /// + /// Encoded as `multipart` ignoring whitespace. + Multipart, + /// The name of a DynamoDB table to use for coordination + /// + /// Encoded as either `dynamo:` or `dynamo::` + /// ignoring whitespace. The default timeout is used if not specified + /// + /// See [`DynamoCommit`] for more information + /// + /// This will use the same region, credentials and endpoint as configured for S3 + #[deprecated(note = "Use S3CopyIfNotExists::Multipart")] + Dynamo(DynamoCommit), +} + +impl std::fmt::Display for S3CopyIfNotExists { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Header(k, v) => write!(f, "header: {k}: {v}"), + Self::HeaderWithStatus(k, v, code) => { + write!(f, "header-with-status: {k}: {v}: {}", code.as_u16()) + } + Self::Multipart => f.write_str("multipart"), + #[allow(deprecated)] + Self::Dynamo(lock) => write!(f, "dynamo: {}", lock.table_name()), + } + } +} + +impl S3CopyIfNotExists { + fn from_str(s: &str) -> Option { + if s.trim() == "multipart" { + return Some(Self::Multipart); + }; + + let (variant, value) = s.split_once(':')?; + match variant.trim() { + "header" => { + let (k, v) = value.split_once(':')?; + Some(Self::Header(k.trim().to_string(), v.trim().to_string())) + } + "header-with-status" => { + let (k, v, status) = value.split(':').collect_tuple()?; + + let code = status.trim().parse().ok()?; + + Some(Self::HeaderWithStatus( + k.trim().to_string(), + v.trim().to_string(), + code, + )) + } + #[allow(deprecated)] + "dynamo" => Some(Self::Dynamo(DynamoCommit::from_str(value)?)), + _ => None, + } + } +} + +impl Parse for S3CopyIfNotExists { + fn parse(v: &str) -> crate::Result { + Self::from_str(v).ok_or_else(|| crate::Error::Generic { + store: "Config", + source: format!("Failed to parse \"{v}\" as S3CopyIfNotExists").into(), + }) + } +} + +/// Configure how to provide conditional put support for [`AmazonS3`]. +/// +/// [`AmazonS3`]: super::AmazonS3 +#[derive(Debug, Clone, Eq, PartialEq, Default)] +#[allow(missing_copy_implementations)] +#[non_exhaustive] +pub enum S3ConditionalPut { + /// Some S3-compatible stores, such as Cloudflare R2 and minio support conditional + /// put using the standard [HTTP precondition] headers If-Match and If-None-Match + /// + /// Encoded as `etag` ignoring whitespace + /// + /// [HTTP precondition]: https://datatracker.ietf.org/doc/html/rfc9110#name-preconditions + #[default] + ETagMatch, + + /// The name of a DynamoDB table to use for coordination + /// + /// Encoded as either `dynamo:` or `dynamo::` + /// ignoring whitespace. The default timeout is used if not specified + /// + /// See [`DynamoCommit`] for more information + /// + /// This will use the same region, credentials and endpoint as configured for S3 + #[deprecated(note = "Use S3ConditionalPut::ETagMatch")] + Dynamo(DynamoCommit), + + /// Disable `conditional put` + Disabled, +} + +impl std::fmt::Display for S3ConditionalPut { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ETagMatch => write!(f, "etag"), + #[allow(deprecated)] + Self::Dynamo(lock) => write!(f, "dynamo: {}", lock.table_name()), + Self::Disabled => write!(f, "disabled"), + } + } +} + +impl S3ConditionalPut { + fn from_str(s: &str) -> Option { + match s.trim() { + "etag" => Some(Self::ETagMatch), + "disabled" => Some(Self::Disabled), + trimmed => match trimmed.split_once(':')? { + #[allow(deprecated)] + ("dynamo", s) => Some(Self::Dynamo(DynamoCommit::from_str(s)?)), + _ => None, + }, + } + } +} + +impl Parse for S3ConditionalPut { + fn parse(v: &str) -> crate::Result { + Self::from_str(v).ok_or_else(|| crate::Error::Generic { + store: "Config", + source: format!("Failed to parse \"{v}\" as S3PutConditional").into(), + }) + } +} + +#[cfg(test)] +mod tests { + use super::S3CopyIfNotExists; + use crate::aws::{DynamoCommit, S3ConditionalPut}; + + #[test] + fn parse_s3_copy_if_not_exists_header() { + let input = "header: cf-copy-destination-if-none-match: *"; + let expected = Some(S3CopyIfNotExists::Header( + "cf-copy-destination-if-none-match".to_owned(), + "*".to_owned(), + )); + + assert_eq!(expected, S3CopyIfNotExists::from_str(input)); + } + + #[test] + fn parse_s3_copy_if_not_exists_header_with_status() { + let input = "header-with-status:key:value:403"; + let expected = Some(S3CopyIfNotExists::HeaderWithStatus( + "key".to_owned(), + "value".to_owned(), + reqwest::StatusCode::FORBIDDEN, + )); + + assert_eq!(expected, S3CopyIfNotExists::from_str(input)); + } + + #[test] + #[allow(deprecated)] + fn parse_s3_copy_if_not_exists_dynamo() { + let input = "dynamo: table:100"; + let expected = Some(S3CopyIfNotExists::Dynamo( + DynamoCommit::new("table".into()).with_timeout(100), + )); + assert_eq!(expected, S3CopyIfNotExists::from_str(input)); + } + + #[test] + #[allow(deprecated)] + fn parse_s3_condition_put_dynamo() { + let input = "dynamo: table:1300"; + let expected = Some(S3ConditionalPut::Dynamo( + DynamoCommit::new("table".into()).with_timeout(1300), + )); + assert_eq!(expected, S3ConditionalPut::from_str(input)); + } + + #[test] + fn parse_s3_copy_if_not_exists_header_whitespace_invariant() { + let expected = Some(S3CopyIfNotExists::Header( + "cf-copy-destination-if-none-match".to_owned(), + "*".to_owned(), + )); + + const INPUTS: &[&str] = &[ + "header:cf-copy-destination-if-none-match:*", + "header: cf-copy-destination-if-none-match:*", + "header: cf-copy-destination-if-none-match: *", + "header : cf-copy-destination-if-none-match: *", + "header : cf-copy-destination-if-none-match : *", + "header : cf-copy-destination-if-none-match : * ", + ]; + + for input in INPUTS { + assert_eq!(expected, S3CopyIfNotExists::from_str(input)); + } + } + + #[test] + fn parse_s3_copy_if_not_exists_header_with_status_whitespace_invariant() { + let expected = Some(S3CopyIfNotExists::HeaderWithStatus( + "key".to_owned(), + "value".to_owned(), + reqwest::StatusCode::FORBIDDEN, + )); + + const INPUTS: &[&str] = &[ + "header-with-status:key:value:403", + "header-with-status: key:value:403", + "header-with-status: key: value:403", + "header-with-status: key: value: 403", + "header-with-status : key: value: 403", + "header-with-status : key : value: 403", + "header-with-status : key : value : 403", + "header-with-status : key : value : 403 ", + ]; + + for input in INPUTS { + assert_eq!(expected, S3CopyIfNotExists::from_str(input)); + } + } +} diff --git a/rust/object_store/src/aws/resolve.rs b/rust/object_store/src/aws/resolve.rs new file mode 100644 index 0000000000..66d1511c5c --- /dev/null +++ b/rust/object_store/src/aws/resolve.rs @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::aws::STORE; +use crate::{ClientOptions, Result}; + +/// A specialized `Error` for object store-related errors +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Bucket '{}' not found", bucket)] + BucketNotFound { bucket: String }, + + #[error("Failed to resolve region for bucket '{}'", bucket)] + ResolveRegion { + bucket: String, + source: reqwest::Error, + }, + + #[error("Failed to parse the region for bucket '{}'", bucket)] + RegionParse { bucket: String }, +} + +impl From for crate::Error { + fn from(source: Error) -> Self { + Self::Generic { + store: STORE, + source: Box::new(source), + } + } +} + +/// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist. +/// +/// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html +pub async fn resolve_bucket_region(bucket: &str, client_options: &ClientOptions) -> Result { + use reqwest::StatusCode; + + let endpoint = format!("https://{bucket}.s3.amazonaws.com"); + + let client = client_options.client()?; + + let response = client.head(&endpoint).send().await.map_err(|source| { + let bucket = bucket.into(); + Error::ResolveRegion { bucket, source } + })?; + + if response.status() == StatusCode::NOT_FOUND { + let bucket = bucket.into(); + return Err(Error::BucketNotFound { bucket }.into()); + } + + let region = response + .headers() + .get("x-amz-bucket-region") + .and_then(|x| x.to_str().ok()) + .ok_or_else(|| Error::RegionParse { + bucket: bucket.into(), + })?; + + Ok(region.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_bucket_does_not_exist() { + let bucket = "please-dont-exist"; + + let result = resolve_bucket_region(bucket, &ClientOptions::new()).await; + + assert!(result.is_err()); + } +} diff --git a/rust/object_store/src/azure/builder.rs b/rust/object_store/src/azure/builder.rs new file mode 100644 index 0000000000..182bdf04e8 --- /dev/null +++ b/rust/object_store/src/azure/builder.rs @@ -0,0 +1,1259 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::azure::client::{AzureClient, AzureConfig}; +use crate::azure::credential::{ + AzureAccessKey, AzureCliCredential, ClientSecretOAuthProvider, FabricTokenOAuthProvider, + ImdsManagedIdentityProvider, WorkloadIdentityOAuthProvider, +}; +use crate::azure::{AzureCredential, AzureCredentialProvider, MicrosoftAzure, STORE}; +use crate::client::{http_connector, HttpConnector, TokenCredentialProvider}; +use crate::config::ConfigValue; +use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider}; +use percent_encoding::percent_decode_str; +use serde::{Deserialize, Serialize}; +use std::str::FromStr; +use std::sync::Arc; +use url::Url; + +/// The well-known account used by Azurite and the legacy Azure Storage Emulator. +/// +/// +const EMULATOR_ACCOUNT: &str = "devstoreaccount1"; + +/// The well-known account key used by Azurite and the legacy Azure Storage Emulator. +/// +/// +const EMULATOR_ACCOUNT_KEY: &str = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + +const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT"; + +/// A specialized `Error` for Azure builder-related errors +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Unable parse source url. Url: {}, Error: {}", url, source)] + UnableToParseUrl { + source: url::ParseError, + url: String, + }, + + #[error( + "Unable parse emulator url {}={}, Error: {}", + env_name, + env_value, + source + )] + UnableToParseEmulatorUrl { + env_name: String, + env_value: String, + source: url::ParseError, + }, + + #[error("Account must be specified")] + MissingAccount {}, + + #[error("Container name must be specified")] + MissingContainerName {}, + + #[error( + "Unknown url scheme cannot be parsed into storage location: {}", + scheme + )] + UnknownUrlScheme { scheme: String }, + + #[error("URL did not match any known pattern for scheme: {}", url)] + UrlNotRecognised { url: String }, + + #[error("Failed parsing an SAS key")] + DecodeSasKey { source: std::str::Utf8Error }, + + #[error("Missing component in SAS query pair")] + MissingSasComponent {}, + + #[error("Configuration key: '{}' is not known.", key)] + UnknownConfigurationKey { key: String }, +} + +impl From for crate::Error { + fn from(source: Error) -> Self { + match source { + Error::UnknownConfigurationKey { key } => { + Self::UnknownConfigurationKey { store: STORE, key } + } + _ => Self::Generic { + store: STORE, + source: Box::new(source), + }, + } + } +} + +/// Configure a connection to Microsoft Azure Blob Storage container using +/// the specified credentials. +/// +/// # Example +/// ``` +/// # let ACCOUNT = "foo"; +/// # let BUCKET_NAME = "foo"; +/// # let ACCESS_KEY = "foo"; +/// # use object_store::azure::MicrosoftAzureBuilder; +/// let azure = MicrosoftAzureBuilder::new() +/// .with_account(ACCOUNT) +/// .with_access_key(ACCESS_KEY) +/// .with_container_name(BUCKET_NAME) +/// .build(); +/// ``` +#[derive(Default, Clone)] +pub struct MicrosoftAzureBuilder { + /// Account name + account_name: Option, + /// Access key + access_key: Option, + /// Container name + container_name: Option, + /// Bearer token + bearer_token: Option, + /// Client id + client_id: Option, + /// Client secret + client_secret: Option, + /// Tenant id + tenant_id: Option, + /// Query pairs for shared access signature authorization + sas_query_pairs: Option>, + /// Shared access signature + sas_key: Option, + /// Authority host + authority_host: Option, + /// Url + url: Option, + /// When set to true, azurite storage emulator has to be used + use_emulator: ConfigValue, + /// Storage endpoint + endpoint: Option, + /// Msi endpoint for acquiring managed identity token + msi_endpoint: Option, + /// Object id for use with managed identity authentication + object_id: Option, + /// Msi resource id for use with managed identity authentication + msi_resource_id: Option, + /// File containing token for Azure AD workload identity federation + federated_token_file: Option, + /// When set to true, azure cli has to be used for acquiring access token + use_azure_cli: ConfigValue, + /// Retry config + retry_config: RetryConfig, + /// Client options + client_options: ClientOptions, + /// Credentials + credentials: Option, + /// Skip signing requests + skip_signature: ConfigValue, + /// When set to true, fabric url scheme will be used + /// + /// i.e. https://{account_name}.dfs.fabric.microsoft.com + use_fabric_endpoint: ConfigValue, + /// When set to true, skips tagging objects + disable_tagging: ConfigValue, + /// Fabric token service url + fabric_token_service_url: Option, + /// Fabric workload host + fabric_workload_host: Option, + /// Fabric session token + fabric_session_token: Option, + /// Fabric cluster identifier + fabric_cluster_identifier: Option, + /// The [`HttpConnector`] to use + http_connector: Option>, +} + +/// Configuration keys for [`MicrosoftAzureBuilder`] +/// +/// Configuration via keys can be done via [`MicrosoftAzureBuilder::with_config`] +/// +/// # Example +/// ``` +/// # use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey}; +/// let builder = MicrosoftAzureBuilder::new() +/// .with_config("azure_client_id".parse().unwrap(), "my-client-id") +/// .with_config(AzureConfigKey::AuthorityId, "my-tenant-id"); +/// ``` +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)] +#[non_exhaustive] +pub enum AzureConfigKey { + /// The name of the azure storage account + /// + /// Supported keys: + /// - `azure_storage_account_name` + /// - `account_name` + AccountName, + + /// Master key for accessing storage account + /// + /// Supported keys: + /// - `azure_storage_account_key` + /// - `azure_storage_access_key` + /// - `azure_storage_master_key` + /// - `access_key` + /// - `account_key` + /// - `master_key` + AccessKey, + + /// Service principal client id for authorizing requests + /// + /// Supported keys: + /// - `azure_storage_client_id` + /// - `azure_client_id` + /// - `client_id` + ClientId, + + /// Service principal client secret for authorizing requests + /// + /// Supported keys: + /// - `azure_storage_client_secret` + /// - `azure_client_secret` + /// - `client_secret` + ClientSecret, + + /// Tenant id used in oauth flows + /// + /// Supported keys: + /// - `azure_storage_tenant_id` + /// - `azure_storage_authority_id` + /// - `azure_tenant_id` + /// - `azure_authority_id` + /// - `tenant_id` + /// - `authority_id` + AuthorityId, + + /// Authority host used in oauth flows + /// + /// Supported keys: + /// - `azure_storage_authority_host` + /// - `azure_authority_host` + /// - `authority_host` + AuthorityHost, + + /// Shared access signature. + /// + /// The signature is expected to be percent-encoded, much like they are provided + /// in the azure storage explorer or azure portal. + /// + /// Supported keys: + /// - `azure_storage_sas_key` + /// - `azure_storage_sas_token` + /// - `sas_key` + /// - `sas_token` + SasKey, + + /// Bearer token + /// + /// Supported keys: + /// - `azure_storage_token` + /// - `bearer_token` + /// - `token` + Token, + + /// Use object store with azurite storage emulator + /// + /// Supported keys: + /// - `azure_storage_use_emulator` + /// - `object_store_use_emulator` + /// - `use_emulator` + UseEmulator, + + /// Override the endpoint used to communicate with blob storage + /// + /// Supported keys: + /// - `azure_storage_endpoint` + /// - `azure_endpoint` + /// - `endpoint` + Endpoint, + + /// Use object store with url scheme account.dfs.fabric.microsoft.com + /// + /// Supported keys: + /// - `azure_use_fabric_endpoint` + /// - `use_fabric_endpoint` + UseFabricEndpoint, + + /// Endpoint to request a imds managed identity token + /// + /// Supported keys: + /// - `azure_msi_endpoint` + /// - `azure_identity_endpoint` + /// - `identity_endpoint` + /// - `msi_endpoint` + MsiEndpoint, + + /// Object id for use with managed identity authentication + /// + /// Supported keys: + /// - `azure_object_id` + /// - `object_id` + ObjectId, + + /// Msi resource id for use with managed identity authentication + /// + /// Supported keys: + /// - `azure_msi_resource_id` + /// - `msi_resource_id` + MsiResourceId, + + /// File containing token for Azure AD workload identity federation + /// + /// Supported keys: + /// - `azure_federated_token_file` + /// - `federated_token_file` + FederatedTokenFile, + + /// Use azure cli for acquiring access token + /// + /// Supported keys: + /// - `azure_use_azure_cli` + /// - `use_azure_cli` + UseAzureCli, + + /// Skip signing requests + /// + /// Supported keys: + /// - `azure_skip_signature` + /// - `skip_signature` + SkipSignature, + + /// Container name + /// + /// Supported keys: + /// - `azure_container_name` + /// - `container_name` + ContainerName, + + /// Disables tagging objects + /// + /// This can be desirable if not supported by the backing store + /// + /// Supported keys: + /// - `azure_disable_tagging` + /// - `disable_tagging` + DisableTagging, + + /// Fabric token service url + /// + /// Supported keys: + /// - `azure_fabric_token_service_url` + /// - `fabric_token_service_url` + FabricTokenServiceUrl, + + /// Fabric workload host + /// + /// Supported keys: + /// - `azure_fabric_workload_host` + /// - `fabric_workload_host` + FabricWorkloadHost, + + /// Fabric session token + /// + /// Supported keys: + /// - `azure_fabric_session_token` + /// - `fabric_session_token` + FabricSessionToken, + + /// Fabric cluster identifier + /// + /// Supported keys: + /// - `azure_fabric_cluster_identifier` + /// - `fabric_cluster_identifier` + FabricClusterIdentifier, + + /// Client options + Client(ClientConfigKey), +} + +impl AsRef for AzureConfigKey { + fn as_ref(&self) -> &str { + match self { + Self::AccountName => "azure_storage_account_name", + Self::AccessKey => "azure_storage_account_key", + Self::ClientId => "azure_storage_client_id", + Self::ClientSecret => "azure_storage_client_secret", + Self::AuthorityId => "azure_storage_tenant_id", + Self::AuthorityHost => "azure_storage_authority_host", + Self::SasKey => "azure_storage_sas_key", + Self::Token => "azure_storage_token", + Self::UseEmulator => "azure_storage_use_emulator", + Self::UseFabricEndpoint => "azure_use_fabric_endpoint", + Self::Endpoint => "azure_storage_endpoint", + Self::MsiEndpoint => "azure_msi_endpoint", + Self::ObjectId => "azure_object_id", + Self::MsiResourceId => "azure_msi_resource_id", + Self::FederatedTokenFile => "azure_federated_token_file", + Self::UseAzureCli => "azure_use_azure_cli", + Self::SkipSignature => "azure_skip_signature", + Self::ContainerName => "azure_container_name", + Self::DisableTagging => "azure_disable_tagging", + Self::FabricTokenServiceUrl => "azure_fabric_token_service_url", + Self::FabricWorkloadHost => "azure_fabric_workload_host", + Self::FabricSessionToken => "azure_fabric_session_token", + Self::FabricClusterIdentifier => "azure_fabric_cluster_identifier", + Self::Client(key) => key.as_ref(), + } + } +} + +impl FromStr for AzureConfigKey { + type Err = crate::Error; + + fn from_str(s: &str) -> Result { + match s { + "azure_storage_account_key" + | "azure_storage_access_key" + | "azure_storage_master_key" + | "master_key" + | "account_key" + | "access_key" => Ok(Self::AccessKey), + "azure_storage_account_name" | "account_name" => Ok(Self::AccountName), + "azure_storage_client_id" | "azure_client_id" | "client_id" => Ok(Self::ClientId), + "azure_storage_client_secret" | "azure_client_secret" | "client_secret" => { + Ok(Self::ClientSecret) + } + "azure_storage_tenant_id" + | "azure_storage_authority_id" + | "azure_tenant_id" + | "azure_authority_id" + | "tenant_id" + | "authority_id" => Ok(Self::AuthorityId), + "azure_storage_authority_host" | "azure_authority_host" | "authority_host" => { + Ok(Self::AuthorityHost) + } + "azure_storage_sas_key" | "azure_storage_sas_token" | "sas_key" | "sas_token" => { + Ok(Self::SasKey) + } + "azure_storage_token" | "bearer_token" | "token" => Ok(Self::Token), + "azure_storage_use_emulator" | "use_emulator" => Ok(Self::UseEmulator), + "azure_storage_endpoint" | "azure_endpoint" | "endpoint" => Ok(Self::Endpoint), + "azure_msi_endpoint" + | "azure_identity_endpoint" + | "identity_endpoint" + | "msi_endpoint" => Ok(Self::MsiEndpoint), + "azure_object_id" | "object_id" => Ok(Self::ObjectId), + "azure_msi_resource_id" | "msi_resource_id" => Ok(Self::MsiResourceId), + "azure_federated_token_file" | "federated_token_file" => Ok(Self::FederatedTokenFile), + "azure_use_fabric_endpoint" | "use_fabric_endpoint" => Ok(Self::UseFabricEndpoint), + "azure_use_azure_cli" | "use_azure_cli" => Ok(Self::UseAzureCli), + "azure_skip_signature" | "skip_signature" => Ok(Self::SkipSignature), + "azure_container_name" | "container_name" => Ok(Self::ContainerName), + "azure_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging), + "azure_fabric_token_service_url" | "fabric_token_service_url" => { + Ok(Self::FabricTokenServiceUrl) + } + "azure_fabric_workload_host" | "fabric_workload_host" => Ok(Self::FabricWorkloadHost), + "azure_fabric_session_token" | "fabric_session_token" => Ok(Self::FabricSessionToken), + "azure_fabric_cluster_identifier" | "fabric_cluster_identifier" => { + Ok(Self::FabricClusterIdentifier) + } + // Backwards compatibility + "azure_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)), + _ => match s.strip_prefix("azure_").unwrap_or(s).parse() { + Ok(key) => Ok(Self::Client(key)), + Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()), + }, + } + } +} + +impl std::fmt::Debug for MicrosoftAzureBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "MicrosoftAzureBuilder {{ account: {:?}, container_name: {:?} }}", + self.account_name, self.container_name + ) + } +} + +impl MicrosoftAzureBuilder { + /// Create a new [`MicrosoftAzureBuilder`] with default values. + pub fn new() -> Self { + Default::default() + } + + /// Create an instance of [`MicrosoftAzureBuilder`] with values pre-populated from environment variables. + /// + /// Variables extracted from environment: + /// * AZURE_STORAGE_ACCOUNT_NAME: storage account name + /// * AZURE_STORAGE_ACCOUNT_KEY: storage account master key + /// * AZURE_STORAGE_ACCESS_KEY: alias for AZURE_STORAGE_ACCOUNT_KEY + /// * AZURE_STORAGE_CLIENT_ID -> client id for service principal authorization + /// * AZURE_STORAGE_CLIENT_SECRET -> client secret for service principal authorization + /// * AZURE_STORAGE_TENANT_ID -> tenant id used in oauth flows + /// # Example + /// ``` + /// use object_store::azure::MicrosoftAzureBuilder; + /// + /// let azure = MicrosoftAzureBuilder::from_env() + /// .with_container_name("foo") + /// .build(); + /// ``` + pub fn from_env() -> Self { + let mut builder = Self::default(); + for (os_key, os_value) in std::env::vars_os() { + if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { + if key.starts_with("AZURE_") { + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + builder = builder.with_config(config_key, value); + } + } + } + } + + if let Ok(text) = std::env::var(MSI_ENDPOINT_ENV_KEY) { + builder = builder.with_msi_endpoint(text); + } + + builder + } + + /// Parse available connection info form a well-known storage URL. + /// + /// The supported url schemes are: + /// + /// - `abfs[s]:///` (according to [fsspec](https://github.com/fsspec/adlfs)) + /// - `abfs[s]://@.dfs.core.windows.net/` + /// - `abfs[s]://@.dfs.fabric.microsoft.com/` + /// - `az:///` (according to [fsspec](https://github.com/fsspec/adlfs)) + /// - `adl:///` (according to [fsspec](https://github.com/fsspec/adlfs)) + /// - `azure:///` (custom) + /// - `https://.dfs.core.windows.net` + /// - `https://.blob.core.windows.net` + /// - `https://.blob.core.windows.net/` + /// - `https://.dfs.fabric.microsoft.com` + /// - `https://.dfs.fabric.microsoft.com/` + /// - `https://.blob.fabric.microsoft.com` + /// - `https://.blob.fabric.microsoft.com/` + /// + /// Note: Settings derived from the URL will override any others set on this builder + /// + /// # Example + /// ``` + /// use object_store::azure::MicrosoftAzureBuilder; + /// + /// let azure = MicrosoftAzureBuilder::from_env() + /// .with_url("abfss://file_system@account.dfs.core.windows.net/") + /// .build(); + /// ``` + pub fn with_url(mut self, url: impl Into) -> Self { + self.url = Some(url.into()); + self + } + + /// Set an option on the builder via a key - value pair. + pub fn with_config(mut self, key: AzureConfigKey, value: impl Into) -> Self { + match key { + AzureConfigKey::AccessKey => self.access_key = Some(value.into()), + AzureConfigKey::AccountName => self.account_name = Some(value.into()), + AzureConfigKey::ClientId => self.client_id = Some(value.into()), + AzureConfigKey::ClientSecret => self.client_secret = Some(value.into()), + AzureConfigKey::AuthorityId => self.tenant_id = Some(value.into()), + AzureConfigKey::AuthorityHost => self.authority_host = Some(value.into()), + AzureConfigKey::SasKey => self.sas_key = Some(value.into()), + AzureConfigKey::Token => self.bearer_token = Some(value.into()), + AzureConfigKey::MsiEndpoint => self.msi_endpoint = Some(value.into()), + AzureConfigKey::ObjectId => self.object_id = Some(value.into()), + AzureConfigKey::MsiResourceId => self.msi_resource_id = Some(value.into()), + AzureConfigKey::FederatedTokenFile => self.federated_token_file = Some(value.into()), + AzureConfigKey::UseAzureCli => self.use_azure_cli.parse(value), + AzureConfigKey::SkipSignature => self.skip_signature.parse(value), + AzureConfigKey::UseEmulator => self.use_emulator.parse(value), + AzureConfigKey::Endpoint => self.endpoint = Some(value.into()), + AzureConfigKey::UseFabricEndpoint => self.use_fabric_endpoint.parse(value), + AzureConfigKey::Client(key) => { + self.client_options = self.client_options.with_config(key, value) + } + AzureConfigKey::ContainerName => self.container_name = Some(value.into()), + AzureConfigKey::DisableTagging => self.disable_tagging.parse(value), + AzureConfigKey::FabricTokenServiceUrl => { + self.fabric_token_service_url = Some(value.into()) + } + AzureConfigKey::FabricWorkloadHost => self.fabric_workload_host = Some(value.into()), + AzureConfigKey::FabricSessionToken => self.fabric_session_token = Some(value.into()), + AzureConfigKey::FabricClusterIdentifier => { + self.fabric_cluster_identifier = Some(value.into()) + } + }; + self + } + + /// Get config value via a [`AzureConfigKey`]. + /// + /// # Example + /// ``` + /// use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey}; + /// + /// let builder = MicrosoftAzureBuilder::from_env() + /// .with_account("foo"); + /// let account_name = builder.get_config_value(&AzureConfigKey::AccountName).unwrap_or_default(); + /// assert_eq!("foo", &account_name); + /// ``` + pub fn get_config_value(&self, key: &AzureConfigKey) -> Option { + match key { + AzureConfigKey::AccountName => self.account_name.clone(), + AzureConfigKey::AccessKey => self.access_key.clone(), + AzureConfigKey::ClientId => self.client_id.clone(), + AzureConfigKey::ClientSecret => self.client_secret.clone(), + AzureConfigKey::AuthorityId => self.tenant_id.clone(), + AzureConfigKey::AuthorityHost => self.authority_host.clone(), + AzureConfigKey::SasKey => self.sas_key.clone(), + AzureConfigKey::Token => self.bearer_token.clone(), + AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()), + AzureConfigKey::UseFabricEndpoint => Some(self.use_fabric_endpoint.to_string()), + AzureConfigKey::Endpoint => self.endpoint.clone(), + AzureConfigKey::MsiEndpoint => self.msi_endpoint.clone(), + AzureConfigKey::ObjectId => self.object_id.clone(), + AzureConfigKey::MsiResourceId => self.msi_resource_id.clone(), + AzureConfigKey::FederatedTokenFile => self.federated_token_file.clone(), + AzureConfigKey::UseAzureCli => Some(self.use_azure_cli.to_string()), + AzureConfigKey::SkipSignature => Some(self.skip_signature.to_string()), + AzureConfigKey::Client(key) => self.client_options.get_config_value(key), + AzureConfigKey::ContainerName => self.container_name.clone(), + AzureConfigKey::DisableTagging => Some(self.disable_tagging.to_string()), + AzureConfigKey::FabricTokenServiceUrl => self.fabric_token_service_url.clone(), + AzureConfigKey::FabricWorkloadHost => self.fabric_workload_host.clone(), + AzureConfigKey::FabricSessionToken => self.fabric_session_token.clone(), + AzureConfigKey::FabricClusterIdentifier => self.fabric_cluster_identifier.clone(), + } + } + + /// Sets properties on this builder based on a URL + /// + /// This is a separate member function to allow fallible computation to + /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`] + fn parse_url(&mut self, url: &str) -> Result<()> { + let parsed = Url::parse(url).map_err(|source| { + let url = url.into(); + Error::UnableToParseUrl { url, source } + })?; + + let host = parsed + .host_str() + .ok_or_else(|| Error::UrlNotRecognised { url: url.into() })?; + + let validate = |s: &str| match s.contains('.') { + true => Err(Error::UrlNotRecognised { url: url.into() }), + false => Ok(s.to_string()), + }; + + match parsed.scheme() { + "adl" | "azure" => self.container_name = Some(validate(host)?), + "az" | "abfs" | "abfss" => { + // abfs(s) might refer to the fsspec convention abfs:/// + // or the convention for the hadoop driver abfs[s]://@.dfs.core.windows.net/ + if parsed.username().is_empty() { + self.container_name = Some(validate(host)?); + } else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") { + self.container_name = Some(validate(parsed.username())?); + self.account_name = Some(validate(a)?); + } else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") { + self.container_name = Some(validate(parsed.username())?); + self.account_name = Some(validate(a)?); + self.use_fabric_endpoint = true.into(); + } else { + return Err(Error::UrlNotRecognised { url: url.into() }.into()); + } + } + "https" => match host.split_once('.') { + Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => { + self.account_name = Some(validate(a)?); + let container = parsed.path_segments().unwrap().next().expect( + "iterator always contains at least one string (which may be empty)", + ); + if !container.is_empty() { + self.container_name = Some(validate(container)?); + } + } + Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => { + self.account_name = Some(validate(a)?); + // Attempt to infer the container name from the URL + // - https://onelake.dfs.fabric.microsoft.com///Files/test.csv + // - https://onelake.dfs.fabric.microsoft.com//.// + // + // See + let workspace = parsed.path_segments().unwrap().next().expect( + "iterator always contains at least one string (which may be empty)", + ); + if !workspace.is_empty() { + self.container_name = Some(workspace.to_string()) + } + self.use_fabric_endpoint = true.into(); + } + _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()), + }, + scheme => { + let scheme = scheme.into(); + return Err(Error::UnknownUrlScheme { scheme }.into()); + } + } + Ok(()) + } + + /// Set the Azure Account (required) + pub fn with_account(mut self, account: impl Into) -> Self { + self.account_name = Some(account.into()); + self + } + + /// Set the Azure Container Name (required) + pub fn with_container_name(mut self, container_name: impl Into) -> Self { + self.container_name = Some(container_name.into()); + self + } + + /// Set the Azure Access Key (required - one of access key, bearer token, or client credentials) + pub fn with_access_key(mut self, access_key: impl Into) -> Self { + self.access_key = Some(access_key.into()); + self + } + + /// Set a static bearer token to be used for authorizing requests + pub fn with_bearer_token_authorization(mut self, bearer_token: impl Into) -> Self { + self.bearer_token = Some(bearer_token.into()); + self + } + + /// Set a client secret used for client secret authorization + pub fn with_client_secret_authorization( + mut self, + client_id: impl Into, + client_secret: impl Into, + tenant_id: impl Into, + ) -> Self { + self.client_id = Some(client_id.into()); + self.client_secret = Some(client_secret.into()); + self.tenant_id = Some(tenant_id.into()); + self + } + + /// Sets the client id for use in client secret or k8s federated credential flow + pub fn with_client_id(mut self, client_id: impl Into) -> Self { + self.client_id = Some(client_id.into()); + self + } + + /// Sets the client secret for use in client secret flow + pub fn with_client_secret(mut self, client_secret: impl Into) -> Self { + self.client_secret = Some(client_secret.into()); + self + } + + /// Sets the tenant id for use in client secret or k8s federated credential flow + pub fn with_tenant_id(mut self, tenant_id: impl Into) -> Self { + self.tenant_id = Some(tenant_id.into()); + self + } + + /// Set query pairs appended to the url for shared access signature authorization + pub fn with_sas_authorization(mut self, query_pairs: impl Into>) -> Self { + self.sas_query_pairs = Some(query_pairs.into()); + self + } + + /// Set the credential provider overriding any other options + pub fn with_credentials(mut self, credentials: AzureCredentialProvider) -> Self { + self.credentials = Some(credentials); + self + } + + /// Set if the Azure emulator should be used (defaults to false) + pub fn with_use_emulator(mut self, use_emulator: bool) -> Self { + self.use_emulator = use_emulator.into(); + self + } + + /// Override the endpoint used to communicate with blob storage + /// + /// Defaults to `https://{account}.blob.core.windows.net` + /// + /// By default, only HTTPS schemes are enabled. To connect to an HTTP endpoint, enable + /// [`Self::with_allow_http`]. + pub fn with_endpoint(mut self, endpoint: String) -> Self { + self.endpoint = Some(endpoint); + self + } + + /// Set if Microsoft Fabric url scheme should be used (defaults to false) + /// + /// When disabled the url scheme used is `https://{account}.blob.core.windows.net` + /// When enabled the url scheme used is `https://{account}.dfs.fabric.microsoft.com` + /// + /// Note: [`Self::with_endpoint`] will take precedence over this option + pub fn with_use_fabric_endpoint(mut self, use_fabric_endpoint: bool) -> Self { + self.use_fabric_endpoint = use_fabric_endpoint.into(); + self + } + + /// Sets what protocol is allowed + /// + /// If `allow_http` is : + /// * false (default): Only HTTPS are allowed + /// * true: HTTP and HTTPS are allowed + pub fn with_allow_http(mut self, allow_http: bool) -> Self { + self.client_options = self.client_options.with_allow_http(allow_http); + self + } + + /// Sets an alternative authority host for OAuth based authorization + /// + /// Common hosts for azure clouds are defined in [authority_hosts](crate::azure::authority_hosts). + /// + /// Defaults to + pub fn with_authority_host(mut self, authority_host: impl Into) -> Self { + self.authority_host = Some(authority_host.into()); + self + } + + /// Set the retry configuration + pub fn with_retry(mut self, retry_config: RetryConfig) -> Self { + self.retry_config = retry_config; + self + } + + /// Set the proxy_url to be used by the underlying client + pub fn with_proxy_url(mut self, proxy_url: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_url(proxy_url); + self + } + + /// Set a trusted proxy CA certificate + pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into) -> Self { + self.client_options = self + .client_options + .with_proxy_ca_certificate(proxy_ca_certificate); + self + } + + /// Set a list of hosts to exclude from proxy connections + pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_excludes(proxy_excludes); + self + } + + /// Sets the client options, overriding any already set + pub fn with_client_options(mut self, options: ClientOptions) -> Self { + self.client_options = options; + self + } + + /// Sets the endpoint for acquiring managed identity token + pub fn with_msi_endpoint(mut self, msi_endpoint: impl Into) -> Self { + self.msi_endpoint = Some(msi_endpoint.into()); + self + } + + /// Sets a file path for acquiring azure federated identity token in k8s + /// + /// requires `client_id` and `tenant_id` to be set + pub fn with_federated_token_file(mut self, federated_token_file: impl Into) -> Self { + self.federated_token_file = Some(federated_token_file.into()); + self + } + + /// Set if the Azure Cli should be used for acquiring access token + /// + /// + pub fn with_use_azure_cli(mut self, use_azure_cli: bool) -> Self { + self.use_azure_cli = use_azure_cli.into(); + self + } + + /// If enabled, [`MicrosoftAzure`] will not fetch credentials and will not sign requests + /// + /// This can be useful when interacting with public containers + pub fn with_skip_signature(mut self, skip_signature: bool) -> Self { + self.skip_signature = skip_signature.into(); + self + } + + /// If set to `true` will ignore any tags provided to put_opts + pub fn with_disable_tagging(mut self, ignore: bool) -> Self { + self.disable_tagging = ignore.into(); + self + } + + /// The [`HttpConnector`] to use + /// + /// On non-WASM32 platforms uses [`reqwest`] by default, on WASM32 platforms must be provided + pub fn with_http_connector(mut self, connector: C) -> Self { + self.http_connector = Some(Arc::new(connector)); + self + } + + /// Configure a connection to container with given name on Microsoft Azure Blob store. + pub fn build(mut self) -> Result { + if let Some(url) = self.url.take() { + self.parse_url(&url)?; + } + + let container = self.container_name.ok_or(Error::MissingContainerName {})?; + + let static_creds = |credential: AzureCredential| -> AzureCredentialProvider { + Arc::new(StaticCredentialProvider::new(credential)) + }; + + let http = http_connector(self.http_connector)?; + + let (is_emulator, storage_url, auth, account) = if self.use_emulator.get()? { + let account_name = self + .account_name + .unwrap_or_else(|| EMULATOR_ACCOUNT.to_string()); + // Allow overriding defaults. Values taken from + // from https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141 + let url = url_from_env("AZURITE_BLOB_STORAGE_URL", "http://127.0.0.1:10000")?; + let credential = if let Some(k) = self.access_key { + AzureCredential::AccessKey(AzureAccessKey::try_new(&k)?) + } else if let Some(bearer_token) = self.bearer_token { + AzureCredential::BearerToken(bearer_token) + } else if let Some(query_pairs) = self.sas_query_pairs { + AzureCredential::SASToken(query_pairs) + } else if let Some(sas) = self.sas_key { + AzureCredential::SASToken(split_sas(&sas)?) + } else { + AzureCredential::AccessKey(AzureAccessKey::try_new(EMULATOR_ACCOUNT_KEY)?) + }; + + self.client_options = self.client_options.with_allow_http(true); + (true, url, static_creds(credential), account_name) + } else { + let account_name = self.account_name.ok_or(Error::MissingAccount {})?; + let account_url = match self.endpoint { + Some(account_url) => account_url, + None => match self.use_fabric_endpoint.get()? { + true => { + format!("https://{}.blob.fabric.microsoft.com", &account_name) + } + false => format!("https://{}.blob.core.windows.net", &account_name), + }, + }; + + let url = Url::parse(&account_url).map_err(|source| { + let url = account_url.clone(); + Error::UnableToParseUrl { url, source } + })?; + + let credential = if let Some(credential) = self.credentials { + credential + } else if let ( + Some(fabric_token_service_url), + Some(fabric_workload_host), + Some(fabric_session_token), + Some(fabric_cluster_identifier), + ) = ( + &self.fabric_token_service_url, + &self.fabric_workload_host, + &self.fabric_session_token, + &self.fabric_cluster_identifier, + ) { + // This case should precede the bearer token case because it is more specific and will utilize the bearer token. + let fabric_credential = FabricTokenOAuthProvider::new( + fabric_token_service_url, + fabric_workload_host, + fabric_session_token, + fabric_cluster_identifier, + self.bearer_token.clone(), + ); + Arc::new(TokenCredentialProvider::new( + fabric_credential, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } else if let Some(bearer_token) = self.bearer_token { + static_creds(AzureCredential::BearerToken(bearer_token)) + } else if let Some(access_key) = self.access_key { + let key = AzureAccessKey::try_new(&access_key)?; + static_creds(AzureCredential::AccessKey(key)) + } else if let (Some(client_id), Some(tenant_id), Some(federated_token_file)) = + (&self.client_id, &self.tenant_id, self.federated_token_file) + { + let client_credential = WorkloadIdentityOAuthProvider::new( + client_id, + federated_token_file, + tenant_id, + self.authority_host, + ); + Arc::new(TokenCredentialProvider::new( + client_credential, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } else if let (Some(client_id), Some(client_secret), Some(tenant_id)) = + (&self.client_id, self.client_secret, &self.tenant_id) + { + let client_credential = ClientSecretOAuthProvider::new( + client_id.clone(), + client_secret, + tenant_id, + self.authority_host, + ); + Arc::new(TokenCredentialProvider::new( + client_credential, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } else if let Some(query_pairs) = self.sas_query_pairs { + static_creds(AzureCredential::SASToken(query_pairs)) + } else if let Some(sas) = self.sas_key { + static_creds(AzureCredential::SASToken(split_sas(&sas)?)) + } else if self.use_azure_cli.get()? { + Arc::new(AzureCliCredential::new()) as _ + } else { + let msi_credential = ImdsManagedIdentityProvider::new( + self.client_id, + self.object_id, + self.msi_resource_id, + self.msi_endpoint, + ); + Arc::new(TokenCredentialProvider::new( + msi_credential, + http.connect(&self.client_options.metadata_options())?, + self.retry_config.clone(), + )) as _ + }; + (false, url, credential, account_name) + }; + + let config = AzureConfig { + account, + is_emulator, + skip_signature: self.skip_signature.get()?, + container, + disable_tagging: self.disable_tagging.get()?, + retry_config: self.retry_config, + client_options: self.client_options, + service: storage_url, + credentials: auth, + }; + + let http_client = http.connect(&config.client_options)?; + let client = Arc::new(AzureClient::new(config, http_client)); + + Ok(MicrosoftAzure { client }) + } +} + +/// Parses the contents of the environment variable `env_name` as a URL +/// if present, otherwise falls back to default_url +fn url_from_env(env_name: &str, default_url: &str) -> Result { + let url = match std::env::var(env_name) { + Ok(env_value) => { + Url::parse(&env_value).map_err(|source| Error::UnableToParseEmulatorUrl { + env_name: env_name.into(), + env_value, + source, + })? + } + Err(_) => Url::parse(default_url).expect("Failed to parse default URL"), + }; + Ok(url) +} + +fn split_sas(sas: &str) -> Result, Error> { + let sas = percent_decode_str(sas) + .decode_utf8() + .map_err(|source| Error::DecodeSasKey { source })?; + let kv_str_pairs = sas + .trim_start_matches('?') + .split('&') + .filter(|s| !s.chars().all(char::is_whitespace)); + let mut pairs = Vec::new(); + for kv_pair_str in kv_str_pairs { + let (k, v) = kv_pair_str + .trim() + .split_once('=') + .ok_or(Error::MissingSasComponent {})?; + pairs.push((k.into(), v.into())) + } + Ok(pairs) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn azure_blob_test_urls() { + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("abfss://file_system@account.dfs.core.windows.net/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, Some("file_system".to_string())); + assert!(!builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("az://container@account.dfs.core.windows.net/path-part/file") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, Some("container".to_string())); + assert!(!builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("abfss://file_system@account.dfs.fabric.microsoft.com/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, Some("file_system".to_string())); + assert!(builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder.parse_url("abfs://container/path").unwrap(); + assert_eq!(builder.container_name, Some("container".to_string())); + + let mut builder = MicrosoftAzureBuilder::new(); + builder.parse_url("az://container").unwrap(); + assert_eq!(builder.container_name, Some("container".to_string())); + + let mut builder = MicrosoftAzureBuilder::new(); + builder.parse_url("az://container/path").unwrap(); + assert_eq!(builder.container_name, Some("container".to_string())); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.dfs.core.windows.net/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert!(!builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = + MicrosoftAzureBuilder::new().with_container_name("explicit_container_name"); + builder + .parse_url("https://account.blob.core.windows.net/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!( + builder.container_name, + Some("explicit_container_name".to_string()) + ); + assert!(!builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.blob.core.windows.net/container") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, Some("container".to_string())); + assert!(!builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.dfs.fabric.microsoft.com/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, None); + assert!(builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.dfs.fabric.microsoft.com/container") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name.as_deref(), Some("container")); + assert!(builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.blob.fabric.microsoft.com/") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name, None); + assert!(builder.use_fabric_endpoint.get().unwrap()); + + let mut builder = MicrosoftAzureBuilder::new(); + builder + .parse_url("https://account.blob.fabric.microsoft.com/container") + .unwrap(); + assert_eq!(builder.account_name, Some("account".to_string())); + assert_eq!(builder.container_name.as_deref(), Some("container")); + assert!(builder.use_fabric_endpoint.get().unwrap()); + + let err_cases = [ + "mailto://account.blob.core.windows.net/", + "az://blob.mydomain/", + "abfs://container.foo/path", + "abfss://file_system@account.foo.dfs.core.windows.net/", + "abfss://file_system.bar@account.dfs.core.windows.net/", + "https://blob.mydomain/", + "https://blob.foo.dfs.core.windows.net/", + ]; + let mut builder = MicrosoftAzureBuilder::new(); + for case in err_cases { + builder.parse_url(case).unwrap_err(); + } + } + + #[test] + fn azure_test_config_from_map() { + let azure_client_id = "object_store:fake_access_key_id"; + let azure_storage_account_name = "object_store:fake_secret_key"; + let azure_storage_token = "object_store:fake_default_region"; + let options = HashMap::from([ + ("azure_client_id", azure_client_id), + ("azure_storage_account_name", azure_storage_account_name), + ("azure_storage_token", azure_storage_token), + ]); + + let builder = options + .into_iter() + .fold(MicrosoftAzureBuilder::new(), |builder, (key, value)| { + builder.with_config(key.parse().unwrap(), value) + }); + assert_eq!(builder.client_id.unwrap(), azure_client_id); + assert_eq!(builder.account_name.unwrap(), azure_storage_account_name); + assert_eq!(builder.bearer_token.unwrap(), azure_storage_token); + } + + #[test] + fn azure_test_split_sas() { + let raw_sas = "?sv=2021-10-04&st=2023-01-04T17%3A48%3A57Z&se=2023-01-04T18%3A15%3A00Z&sr=c&sp=rcwl&sig=C7%2BZeEOWbrxPA3R0Cw%2Fw1EZz0%2B4KBvQexeKZKe%2BB6h0%3D"; + let expected = vec![ + ("sv".to_string(), "2021-10-04".to_string()), + ("st".to_string(), "2023-01-04T17:48:57Z".to_string()), + ("se".to_string(), "2023-01-04T18:15:00Z".to_string()), + ("sr".to_string(), "c".to_string()), + ("sp".to_string(), "rcwl".to_string()), + ( + "sig".to_string(), + "C7+ZeEOWbrxPA3R0Cw/w1EZz0+4KBvQexeKZKe+B6h0=".to_string(), + ), + ]; + let pairs = split_sas(raw_sas).unwrap(); + assert_eq!(expected, pairs); + } + + #[test] + fn azure_test_client_opts() { + let key = "AZURE_PROXY_URL"; + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + assert_eq!( + AzureConfigKey::Client(ClientConfigKey::ProxyUrl), + config_key + ); + } else { + panic!("{key} not propagated as ClientConfigKey"); + } + } +} diff --git a/rust/object_store/src/azure/client.rs b/rust/object_store/src/azure/client.rs new file mode 100644 index 0000000000..1e96aac008 --- /dev/null +++ b/rust/object_store/src/azure/client.rs @@ -0,0 +1,1555 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::credential::AzureCredential; +use crate::azure::credential::*; +use crate::azure::{AzureCredentialProvider, STORE}; +use crate::client::builder::HttpRequestBuilder; +use crate::client::get::GetClient; +use crate::client::header::{get_put_result, HeaderConfig}; +use crate::client::list::ListClient; +use crate::client::retry::{RetryContext, RetryExt}; +use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpRequest, HttpResponse}; +use crate::list::{PaginatedListOptions, PaginatedListResult}; +use crate::multipart::PartId; +use crate::util::{deserialize_rfc1123, GetRange}; +use crate::{ + Attribute, Attributes, ClientOptions, GetOptions, ListResult, ObjectMeta, Path, PutMode, + PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, RetryConfig, TagSet, +}; +use async_trait::async_trait; +use base64::prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD}; +use base64::Engine; +use bytes::{Buf, Bytes}; +use chrono::{DateTime, Utc}; +use http::{ + header::{HeaderMap, HeaderValue, CONTENT_LENGTH, CONTENT_TYPE, IF_MATCH, IF_NONE_MATCH}, + HeaderName, Method, +}; +use rand::Rng as _; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; +use url::Url; + +const VERSION_HEADER: &str = "x-ms-version-id"; +const ACCESS_TIER_HEADER: &str = "x-ms-access-tier"; +const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-ms-meta-"; +static MS_CACHE_CONTROL: HeaderName = HeaderName::from_static("x-ms-blob-cache-control"); +static MS_CONTENT_TYPE: HeaderName = HeaderName::from_static("x-ms-blob-content-type"); +static MS_CONTENT_DISPOSITION: HeaderName = + HeaderName::from_static("x-ms-blob-content-disposition"); +static MS_CONTENT_ENCODING: HeaderName = HeaderName::from_static("x-ms-blob-content-encoding"); +static MS_CONTENT_LANGUAGE: HeaderName = HeaderName::from_static("x-ms-blob-content-language"); + +static TAGS_HEADER: HeaderName = HeaderName::from_static("x-ms-tags"); + +/// A specialized `Error` for object store-related errors +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("Error performing get request {}: {}", path, source)] + GetRequest { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error performing put request {}: {}", path, source)] + PutRequest { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error performing delete request {}: {}", path, source)] + DeleteRequest { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error performing bulk delete request: {}", source)] + BulkDeleteRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error receiving bulk delete request body: {}", source)] + BulkDeleteRequestBody { source: HttpError }, + + #[error( + "Bulk delete request failed due to invalid input: {} (code: {})", + reason, + code + )] + BulkDeleteRequestInvalidInput { code: String, reason: String }, + + #[error("Got invalid bulk delete response: {}", reason)] + InvalidBulkDeleteResponse { reason: String }, + + #[error( + "Bulk delete request failed for key {}: {} (code: {})", + path, + reason, + code + )] + DeleteFailed { + path: String, + code: String, + reason: String, + }, + + #[error("Error performing list request: {}", source)] + ListRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting list response body: {}", source)] + ListResponseBody { source: HttpError }, + + #[error("Got invalid list response: {}", source)] + InvalidListResponse { source: quick_xml::de::DeError }, + + #[error("Unable to extract metadata from headers: {}", source)] + Metadata { + source: crate::client::header::Error, + }, + + #[error("ETag required for conditional update")] + MissingETag, + + #[error("Error requesting user delegation key: {}", source)] + DelegationKeyRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting user delegation key response body: {}", source)] + DelegationKeyResponseBody { source: HttpError }, + + #[error("Got invalid user delegation key response: {}", source)] + DelegationKeyResponse { source: quick_xml::de::DeError }, + + #[error("Generating SAS keys with SAS tokens auth is not supported")] + SASforSASNotSupported, + + #[error("Generating SAS keys while skipping signatures is not supported")] + SASwithSkipSignature, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + match err { + Error::GetRequest { source, path } + | Error::DeleteRequest { source, path } + | Error::PutRequest { source, path } => source.error(STORE, path), + _ => Self::Generic { + store: STORE, + source: Box::new(err), + }, + } + } +} + +/// Configuration for [AzureClient] +#[derive(Debug)] +pub(crate) struct AzureConfig { + pub account: String, + pub container: String, + pub credentials: AzureCredentialProvider, + pub retry_config: RetryConfig, + pub service: Url, + pub is_emulator: bool, + pub skip_signature: bool, + pub disable_tagging: bool, + pub client_options: ClientOptions, +} + +impl AzureConfig { + pub(crate) fn path_url(&self, path: &Path) -> Url { + let mut url = self.service.clone(); + { + let mut path_mut = url.path_segments_mut().unwrap(); + if self.is_emulator { + path_mut.push(&self.account); + } + path_mut.push(&self.container).extend(path.parts()); + } + url + } + async fn get_credential(&self) -> Result>> { + if self.skip_signature { + Ok(None) + } else { + Some(self.credentials.get_credential().await).transpose() + } + } +} + +/// A builder for a put request allowing customisation of the headers and query string +struct PutRequest<'a> { + path: &'a Path, + config: &'a AzureConfig, + payload: PutPayload, + builder: HttpRequestBuilder, + idempotent: bool, +} + +impl PutRequest<'_> { + fn header(self, k: &HeaderName, v: &str) -> Self { + let builder = self.builder.header(k, v); + Self { builder, ..self } + } + + fn query(self, query: &T) -> Self { + let builder = self.builder.query(query); + Self { builder, ..self } + } + + fn idempotent(self, idempotent: bool) -> Self { + Self { idempotent, ..self } + } + + fn with_tags(mut self, tags: TagSet) -> Self { + let tags = tags.encoded(); + if !tags.is_empty() && !self.config.disable_tagging { + self.builder = self.builder.header(&TAGS_HEADER, tags); + } + self + } + + fn with_attributes(self, attributes: Attributes) -> Self { + let mut builder = self.builder; + let mut has_content_type = false; + for (k, v) in &attributes { + builder = match k { + Attribute::CacheControl => builder.header(&MS_CACHE_CONTROL, v.as_ref()), + Attribute::ContentDisposition => { + builder.header(&MS_CONTENT_DISPOSITION, v.as_ref()) + } + Attribute::ContentEncoding => builder.header(&MS_CONTENT_ENCODING, v.as_ref()), + Attribute::ContentLanguage => builder.header(&MS_CONTENT_LANGUAGE, v.as_ref()), + Attribute::ContentType => { + has_content_type = true; + builder.header(&MS_CONTENT_TYPE, v.as_ref()) + } + Attribute::StorageClass => builder.header(ACCESS_TIER_HEADER, v.as_ref()), + Attribute::Metadata(k_suffix) => builder.header( + &format!("{USER_DEFINED_METADATA_HEADER_PREFIX}{k_suffix}"), + v.as_ref(), + ), + }; + } + + if !has_content_type { + if let Some(value) = self.config.client_options.get_content_type(self.path) { + builder = builder.header(&MS_CONTENT_TYPE, value); + } + } + Self { builder, ..self } + } + + fn with_extensions(self, extensions: ::http::Extensions) -> Self { + let builder = self.builder.extensions(extensions); + Self { builder, ..self } + } + + async fn send(self) -> Result { + let credential = self.config.get_credential().await?; + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + let response = self + .builder + .header(CONTENT_LENGTH, self.payload.content_length()) + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .idempotent(self.idempotent) + .payload(Some(self.payload)) + .send() + .await + .map_err(|source| { + let path = self.path.as_ref().into(); + Error::PutRequest { path, source } + })?; + + Ok(response) + } +} + +#[inline] +fn extend(dst: &mut Vec, data: &[u8]) { + dst.extend_from_slice(data); +} + +// Write header names as title case. The header name is assumed to be ASCII. +// We need it because Azure is not always treating headers as case insensitive. +fn title_case(dst: &mut Vec, name: &[u8]) { + dst.reserve(name.len()); + + // Ensure first character is uppercased + let mut prev = b'-'; + for &(mut c) in name { + if prev == b'-' { + c.make_ascii_uppercase(); + } + dst.push(c); + prev = c; + } +} + +fn write_headers(headers: &HeaderMap, dst: &mut Vec) { + for (name, value) in headers { + // We need special case handling here otherwise Azure returns 400 + // due to `Content-Id` instead of `Content-ID` + if name == "content-id" { + extend(dst, b"Content-ID"); + } else { + title_case(dst, name.as_str().as_bytes()); + } + extend(dst, b": "); + extend(dst, value.as_bytes()); + extend(dst, b"\r\n"); + } +} + +// https://docs.oasis-open.org/odata/odata/v4.0/errata02/os/complete/part1-protocol/odata-v4.0-errata02-os-part1-protocol-complete.html#_Toc406398359 +fn serialize_part_delete_request( + dst: &mut Vec, + boundary: &str, + idx: usize, + request: HttpRequest, + relative_url: String, +) { + // Encode start marker for part + extend(dst, b"--"); + extend(dst, boundary.as_bytes()); + extend(dst, b"\r\n"); + + // Encode part headers + let mut part_headers = HeaderMap::new(); + part_headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/http")); + part_headers.insert( + "Content-Transfer-Encoding", + HeaderValue::from_static("binary"), + ); + // Azure returns 400 if we send `Content-Id` instead of `Content-ID` + part_headers.insert("Content-ID", HeaderValue::from(idx)); + write_headers(&part_headers, dst); + extend(dst, b"\r\n"); + + // Encode the subrequest request-line + extend(dst, b"DELETE "); + extend(dst, format!("/{relative_url} ").as_bytes()); + extend(dst, b"HTTP/1.1"); + extend(dst, b"\r\n"); + + // Encode subrequest headers + write_headers(request.headers(), dst); + extend(dst, b"\r\n"); + extend(dst, b"\r\n"); +} + +fn parse_multipart_response_boundary(response: &HttpResponse) -> Result { + let invalid_response = |msg: &str| Error::InvalidBulkDeleteResponse { + reason: msg.to_string(), + }; + + let content_type = response + .headers() + .get(CONTENT_TYPE) + .ok_or_else(|| invalid_response("missing Content-Type"))?; + + let boundary = content_type + .as_ref() + .strip_prefix(b"multipart/mixed; boundary=") + .ok_or_else(|| invalid_response("invalid Content-Type value"))? + .to_vec(); + + let boundary = + String::from_utf8(boundary).map_err(|_| invalid_response("invalid multipart boundary"))?; + + Ok(boundary) +} + +fn invalid_response(msg: &str) -> Error { + Error::InvalidBulkDeleteResponse { + reason: msg.to_string(), + } +} + +#[derive(Debug)] +struct MultipartField { + headers: HeaderMap, + content: Bytes, +} + +fn parse_multipart_body_fields(body: Bytes, boundary: &[u8]) -> Result> { + let start_marker = [b"--", boundary, b"\r\n"].concat(); + let next_marker = &start_marker[..start_marker.len() - 2]; + let end_marker = [b"--", boundary, b"--\r\n"].concat(); + + // There should be at most 256 responses per batch + let mut fields = Vec::with_capacity(256); + let mut remaining: &[u8] = body.as_ref(); + loop { + remaining = remaining + .strip_prefix(start_marker.as_slice()) + .ok_or_else(|| invalid_response("missing start marker for field"))?; + + // The documentation only mentions two headers for fields, we leave some extra margin + let mut scratch = [httparse::EMPTY_HEADER; 10]; + let mut headers = HeaderMap::new(); + match httparse::parse_headers(remaining, &mut scratch) { + Ok(httparse::Status::Complete((pos, headers_slice))) => { + remaining = &remaining[pos..]; + for header in headers_slice { + headers.insert( + HeaderName::from_bytes(header.name.as_bytes()).expect("valid"), + HeaderValue::from_bytes(header.value).expect("valid"), + ); + } + } + _ => return Err(invalid_response("unable to parse field headers").into()), + }; + + let next_pos = remaining + .windows(next_marker.len()) + .position(|window| window == next_marker) + .ok_or_else(|| invalid_response("early EOF while seeking to next boundary"))?; + + fields.push(MultipartField { + headers, + content: body.slice_ref(&remaining[..next_pos]), + }); + + remaining = &remaining[next_pos..]; + + // Support missing final CRLF + if remaining == end_marker || remaining == &end_marker[..end_marker.len() - 2] { + break; + } + } + Ok(fields) +} + +async fn parse_blob_batch_delete_body( + batch_body: Bytes, + boundary: String, + paths: &[Path], +) -> Result>> { + let mut results: Vec> = paths.iter().cloned().map(Ok).collect(); + + for field in parse_multipart_body_fields(batch_body, boundary.as_bytes())? { + let id = field + .headers + .get("content-id") + .and_then(|v| std::str::from_utf8(v.as_bytes()).ok()) + .and_then(|v| v.parse::().ok()); + + // Parse part response headers + // Documentation mentions 5 headers and states that other standard HTTP headers + // may be provided, in order to not incurr in more complexity to support an arbitrary + // amount of headers we chose a conservative amount and error otherwise + // https://learn.microsoft.com/en-us/rest/api/storageservices/delete-blob?tabs=microsoft-entra-id#response-headers + let mut headers = [httparse::EMPTY_HEADER; 48]; + let mut part_response = httparse::Response::new(&mut headers); + match part_response.parse(&field.content) { + Ok(httparse::Status::Complete(_)) => {} + _ => return Err(invalid_response("unable to parse response").into()), + }; + + match (id, part_response.code) { + (Some(_id), Some(code)) if (200..300).contains(&code) => {} + (Some(id), Some(404)) => { + results[id] = Err(crate::Error::NotFound { + path: paths[id].as_ref().to_string(), + source: Error::DeleteFailed { + path: paths[id].as_ref().to_string(), + code: 404.to_string(), + reason: part_response.reason.unwrap_or_default().to_string(), + } + .into(), + }); + } + (Some(id), Some(code)) => { + results[id] = Err(Error::DeleteFailed { + path: paths[id].as_ref().to_string(), + code: code.to_string(), + reason: part_response.reason.unwrap_or_default().to_string(), + } + .into()); + } + (None, Some(code)) => { + return Err(Error::BulkDeleteRequestInvalidInput { + code: code.to_string(), + reason: part_response.reason.unwrap_or_default().to_string(), + } + .into()) + } + _ => return Err(invalid_response("missing part response status code").into()), + } + } + + Ok(results) +} + +#[derive(Debug)] +pub(crate) struct AzureClient { + config: AzureConfig, + client: HttpClient, +} + +impl AzureClient { + /// create a new instance of [AzureClient] + pub(crate) fn new(config: AzureConfig, client: HttpClient) -> Self { + Self { config, client } + } + + /// Returns the config + pub(crate) fn config(&self) -> &AzureConfig { + &self.config + } + + async fn get_credential(&self) -> Result>> { + self.config.get_credential().await + } + + fn put_request<'a>(&'a self, path: &'a Path, payload: PutPayload) -> PutRequest<'a> { + let url = self.config.path_url(path); + let builder = self.client.request(Method::PUT, url.as_str()); + + PutRequest { + path, + builder, + payload, + config: &self.config, + idempotent: false, + } + } + + /// Make an Azure PUT request + pub(crate) async fn put_blob( + &self, + path: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let PutOptions { + mode, + tags, + attributes, + extensions, + } = opts; + + let builder = self + .put_request(path, payload) + .with_attributes(attributes) + .with_extensions(extensions) + .with_tags(tags); + + let builder = match &mode { + PutMode::Overwrite => builder.idempotent(true), + PutMode::Create => builder.header(&IF_NONE_MATCH, "*"), + PutMode::Update(v) => { + let etag = v.e_tag.as_ref().ok_or(Error::MissingETag)?; + builder.header(&IF_MATCH, etag) + } + }; + + let response = builder.header(&BLOB_TYPE, "BlockBlob").send().await?; + Ok(get_put_result(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?) + } + + /// PUT a block + pub(crate) async fn put_block( + &self, + path: &Path, + _part_idx: usize, + payload: PutPayload, + ) -> Result { + let part_idx = u128::from_be_bytes(rand::rng().random()); + let content_id = format!("{part_idx:032x}"); + let block_id = BASE64_STANDARD.encode(&content_id); + + self.put_request(path, payload) + .query(&[("comp", "block"), ("blockid", &block_id)]) + .idempotent(true) + .send() + .await?; + + Ok(PartId { content_id }) + } + + /// PUT a block list + pub(crate) async fn put_block_list( + &self, + path: &Path, + parts: Vec, + opts: PutMultipartOptions, + ) -> Result { + let PutMultipartOptions { + tags, + attributes, + extensions, + } = opts; + + let blocks = parts + .into_iter() + .map(|part| BlockId::from(part.content_id)) + .collect(); + + let payload = BlockList { blocks }.to_xml().into(); + let response = self + .put_request(path, payload) + .with_attributes(attributes) + .with_tags(tags) + .with_extensions(extensions) + .query(&[("comp", "blocklist")]) + .idempotent(true) + .send() + .await?; + + Ok(get_put_result(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?) + } + + /// Make an Azure Delete request + pub(crate) async fn delete_request( + &self, + path: &Path, + query: &T, + ) -> Result<()> { + let credential = self.get_credential().await?; + let url = self.config.path_url(path); + + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + self.client + .delete(url.as_str()) + .query(query) + .header(&DELETE_SNAPSHOTS, "include") + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .send() + .await + .map_err(|source| { + let path = path.as_ref().into(); + Error::DeleteRequest { source, path } + })?; + + Ok(()) + } + + fn build_bulk_delete_body( + &self, + boundary: &str, + paths: &[Path], + credential: &Option>, + ) -> Vec { + let mut body_bytes = Vec::with_capacity(paths.len() * 2048); + + for (idx, path) in paths.iter().enumerate() { + let url = self.config.path_url(path); + + // Build subrequest with proper authorization + let request = self + .client + .delete(url.as_str()) + .header(CONTENT_LENGTH, HeaderValue::from(0)) + // Each subrequest must be authorized individually [1] and we use + // the CredentialExt for this. + // [1]: https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch?tabs=microsoft-entra-id#request-body + .with_azure_authorization(credential, &self.config.account) + .into_parts() + .1 + .unwrap(); + + let url: Url = request.uri().to_string().parse().unwrap(); + + // Url for part requests must be relative and without base + let relative_url = self.config.service.make_relative(&url).unwrap(); + + serialize_part_delete_request(&mut body_bytes, boundary, idx, request, relative_url) + } + + // Encode end marker + extend(&mut body_bytes, b"--"); + extend(&mut body_bytes, boundary.as_bytes()); + extend(&mut body_bytes, b"--"); + extend(&mut body_bytes, b"\r\n"); + body_bytes + } + + pub(crate) async fn bulk_delete_request(&self, paths: Vec) -> Result>> { + if paths.is_empty() { + return Ok(Vec::new()); + } + + let credential = self.get_credential().await?; + + // https://www.ietf.org/rfc/rfc2046 + let random_bytes = rand::random::<[u8; 16]>(); // 128 bits + let boundary = format!("batch_{}", BASE64_STANDARD_NO_PAD.encode(random_bytes)); + + let body_bytes = self.build_bulk_delete_body(&boundary, &paths, &credential); + + // Send multipart request + let url = self.config.path_url(&Path::from("/")); + let batch_response = self + .client + .post(url.as_str()) + .query(&[("restype", "container"), ("comp", "batch")]) + .header( + CONTENT_TYPE, + HeaderValue::from_str(format!("multipart/mixed; boundary={boundary}").as_str()) + .unwrap(), + ) + .header(CONTENT_LENGTH, HeaderValue::from(body_bytes.len())) + .body(body_bytes) + .with_azure_authorization(&credential, &self.config.account) + .send_retry(&self.config.retry_config) + .await + .map_err(|source| Error::BulkDeleteRequest { source })?; + + let boundary = parse_multipart_response_boundary(&batch_response)?; + + let batch_body = batch_response + .into_body() + .bytes() + .await + .map_err(|source| Error::BulkDeleteRequestBody { source })?; + + let results = parse_blob_batch_delete_body(batch_body, boundary, &paths).await?; + + Ok(results) + } + + /// Make an Azure Copy request + pub(crate) async fn copy_request(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> { + let credential = self.get_credential().await?; + let url = self.config.path_url(to); + let mut source = self.config.path_url(from); + + // If using SAS authorization must include the headers in the URL + // + if let Some(AzureCredential::SASToken(pairs)) = credential.as_deref() { + source.query_pairs_mut().extend_pairs(pairs); + } + + let mut builder = self + .client + .request(Method::PUT, url.as_str()) + .header(©_SOURCE, source.to_string()) + .header(CONTENT_LENGTH, HeaderValue::from_static("0")); + + if !overwrite { + builder = builder.header(IF_NONE_MATCH, "*"); + } + + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + builder + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .idempotent(overwrite) + .send() + .await + .map_err(|err| err.error(STORE, from.to_string()))?; + + Ok(()) + } + + /// Make a Get User Delegation Key request + /// + async fn get_user_delegation_key( + &self, + start: &DateTime, + end: &DateTime, + ) -> Result { + let credential = self.get_credential().await?; + let url = self.config.service.clone(); + + let start = start.to_rfc3339_opts(chrono::SecondsFormat::Secs, true); + let expiry = end.to_rfc3339_opts(chrono::SecondsFormat::Secs, true); + + let mut body = String::new(); + body.push_str("\n\n"); + body.push_str(&format!( + "\t{start}\n\t{expiry}\n" + )); + body.push_str(""); + + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + + let response = self + .client + .post(url.as_str()) + .body(body) + .query(&[("restype", "service"), ("comp", "userdelegationkey")]) + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .idempotent(true) + .send() + .await + .map_err(|source| Error::DelegationKeyRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::DelegationKeyResponseBody { source })?; + + let response: UserDelegationKey = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::DelegationKeyResponse { source })?; + + Ok(response) + } + + /// Creat an AzureSigner for generating SAS tokens (pre-signed urls). + /// + /// Depending on the type of credential, this will either use the account key or a user delegation key. + /// Since delegation keys are acquired ad-hoc, the signer aloows for signing multiple urls with the same key. + pub(crate) async fn signer(&self, expires_in: Duration) -> Result { + let credential = self.get_credential().await?; + let signed_start = chrono::Utc::now(); + let signed_expiry = signed_start + expires_in; + match credential.as_deref() { + Some(AzureCredential::BearerToken(_)) => { + let key = self + .get_user_delegation_key(&signed_start, &signed_expiry) + .await?; + let signing_key = AzureAccessKey::try_new(&key.value)?; + Ok(AzureSigner::new( + signing_key, + self.config.account.clone(), + signed_start, + signed_expiry, + Some(key), + )) + } + Some(AzureCredential::AccessKey(key)) => Ok(AzureSigner::new( + key.to_owned(), + self.config.account.clone(), + signed_start, + signed_expiry, + None, + )), + None => Err(Error::SASwithSkipSignature.into()), + _ => Err(Error::SASforSASNotSupported.into()), + } + } + + #[cfg(test)] + pub(crate) async fn get_blob_tagging(&self, path: &Path) -> Result { + let credential = self.get_credential().await?; + let url = self.config.path_url(path); + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + let response = self + .client + .get(url.as_str()) + .query(&[("comp", "tags")]) + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .send() + .await + .map_err(|source| { + let path = path.as_ref().into(); + Error::GetRequest { source, path } + })?; + + Ok(response) + } +} + +#[async_trait] +impl GetClient for AzureClient { + const STORE: &'static str = STORE; + + const HEADER_CONFIG: HeaderConfig = HeaderConfig { + etag_required: true, + last_modified_required: true, + version_header: Some(VERSION_HEADER), + user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), + }; + + fn retry_config(&self) -> &RetryConfig { + &self.config.retry_config + } + + /// Make an Azure GET request + /// + /// + async fn get_request( + &self, + ctx: &mut RetryContext, + path: &Path, + options: GetOptions, + ) -> Result { + // As of 2024-01-02, Azure does not support suffix requests, + // so we should fail fast here rather than sending one + if let Some(GetRange::Suffix(_)) = options.range.as_ref() { + return Err(crate::Error::NotSupported { + source: "Azure does not support suffix range requests".into(), + }); + } + + let credential = self.get_credential().await?; + let url = self.config.path_url(path); + let method = match options.head { + true => Method::HEAD, + false => Method::GET, + }; + + let mut builder = self + .client + .request(method, url.as_str()) + .header(CONTENT_LENGTH, HeaderValue::from_static("0")) + .body(Bytes::new()); + + if let Some(v) = &options.version { + builder = builder.query(&[("versionid", v)]) + } + + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + + let response = builder + .with_get_options(options) + .with_azure_authorization(&credential, &self.config.account) + .retryable_request() + .sensitive(sensitive) + .send(ctx) + .await + .map_err(|source| { + let path = path.as_ref().into(); + Error::GetRequest { source, path } + })?; + + match response.headers().get("x-ms-resource-type") { + Some(resource) if resource.as_ref() != b"file" => Err(crate::Error::NotFound { + path: path.to_string(), + source: format!( + "Not a file, got x-ms-resource-type: {}", + String::from_utf8_lossy(resource.as_ref()) + ) + .into(), + }), + _ => Ok(response), + } + } +} + +#[async_trait] +impl ListClient for Arc { + /// Make an Azure List request + async fn list_request( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + if opts.offset.is_some() { + return Err(crate::Error::NotSupported { + source: "Azure does not support listing with offsets".into(), + }); + } + + let credential = self.get_credential().await?; + let url = self.config.path_url(&Path::default()); + + let mut query = Vec::with_capacity(5); + query.push(("restype", "container")); + query.push(("comp", "list")); + + if let Some(prefix) = prefix { + query.push(("prefix", prefix)) + } + + if let Some(delimiter) = &opts.delimiter { + query.push(("delimiter", delimiter.as_ref())) + } + + if let Some(token) = &opts.page_token { + query.push(("marker", token.as_ref())) + } + + let max_keys_str; + if let Some(max_keys) = &opts.max_keys { + max_keys_str = max_keys.to_string(); + query.push(("maxresults", max_keys_str.as_ref())) + } + + let sensitive = credential + .as_deref() + .map(|c| c.sensitive_request()) + .unwrap_or_default(); + + let response = self + .client + .get(url.as_str()) + .extensions(opts.extensions) + .query(&query) + .with_azure_authorization(&credential, &self.config.account) + .retryable(&self.config.retry_config) + .sensitive(sensitive) + .send() + .await + .map_err(|source| Error::ListRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::ListResponseBody { source })?; + + let mut response: ListResultInternal = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidListResponse { source })?; + + let token = response.next_marker.take().filter(|x| !x.is_empty()); + + Ok(PaginatedListResult { + result: to_list_result(response, prefix)?, + page_token: token, + }) + } +} + +/// Raw / internal response from list requests +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct ListResultInternal { + pub prefix: Option, + pub max_results: Option, + pub delimiter: Option, + pub next_marker: Option, + pub blobs: Blobs, +} + +fn to_list_result(value: ListResultInternal, prefix: Option<&str>) -> Result { + let prefix = prefix.unwrap_or_default(); + let common_prefixes = value + .blobs + .blob_prefix + .into_iter() + .map(|x| Ok(Path::parse(x.name)?)) + .collect::>()?; + + let objects = value + .blobs + .blobs + .into_iter() + // Note: Filters out directories from list results when hierarchical namespaces are + // enabled. When we want directories, its always via the BlobPrefix mechanics, + // and during lists we state that prefixes are evaluated on path segment basis. + .filter(|blob| { + !matches!(blob.properties.resource_type.as_ref(), Some(typ) if typ == "directory") + && blob.name.len() > prefix.len() + }) + .map(ObjectMeta::try_from) + .collect::>()?; + + Ok(ListResult { + common_prefixes, + objects, + }) +} + +/// Collection of blobs and potentially shared prefixes returned from list requests. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct Blobs { + #[serde(default)] + pub blob_prefix: Vec, + #[serde(rename = "Blob", default)] + pub blobs: Vec, +} + +/// Common prefix in list blobs response +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct BlobPrefix { + pub name: String, +} + +/// Details for a specific blob +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct Blob { + pub name: String, + pub version_id: Option, + pub is_current_version: Option, + pub deleted: Option, + pub properties: BlobProperties, + pub metadata: Option>, +} + +impl TryFrom for ObjectMeta { + type Error = crate::Error; + + fn try_from(value: Blob) -> Result { + Ok(Self { + location: Path::parse(value.name)?, + last_modified: value.properties.last_modified, + size: value.properties.content_length, + e_tag: value.properties.e_tag, + version: None, // For consistency with S3 and GCP which don't include this + }) + } +} + +/// Properties associated with individual blobs. The actual list +/// of returned properties is much more exhaustive, but we limit +/// the parsed fields to the ones relevant in this crate. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "PascalCase")] +struct BlobProperties { + #[serde(deserialize_with = "deserialize_rfc1123", rename = "Last-Modified")] + pub last_modified: DateTime, + #[serde(rename = "Content-Length")] + pub content_length: u64, + #[serde(rename = "Content-Type")] + pub content_type: String, + #[serde(rename = "Content-Encoding")] + pub content_encoding: Option, + #[serde(rename = "Content-Language")] + pub content_language: Option, + #[serde(rename = "Etag")] + pub e_tag: Option, + #[serde(rename = "ResourceType")] + pub resource_type: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct BlockId(Bytes); + +impl BlockId { + pub(crate) fn new(block_id: impl Into) -> Self { + Self(block_id.into()) + } +} + +impl From for BlockId +where + B: Into, +{ + fn from(v: B) -> Self { + Self::new(v) + } +} + +impl AsRef<[u8]> for BlockId { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub(crate) struct BlockList { + pub blocks: Vec, +} + +impl BlockList { + pub(crate) fn to_xml(&self) -> String { + let mut s = String::new(); + s.push_str("\n\n"); + for block_id in &self.blocks { + let node = format!( + "\t{}\n", + BASE64_STANDARD.encode(block_id) + ); + s.push_str(&node); + } + + s.push_str(""); + s + } +} + +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct UserDelegationKey { + pub signed_oid: String, + pub signed_tid: String, + pub signed_start: String, + pub signed_expiry: String, + pub signed_service: String, + pub signed_version: String, + pub value: String, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::StaticCredentialProvider; + use bytes::Bytes; + use regex::bytes::Regex; + use reqwest::Client; + + #[test] + fn deserde_azure() { + const S: &str = " + + + + blob0.txt + + Thu, 01 Jul 2021 10:44:59 GMT + Thu, 01 Jul 2021 10:44:59 GMT + Thu, 07 Jul 2022 14:38:48 GMT + 0x8D93C7D4629C227 + 8 + text/plain + + + + rvr3UC1SmUw7AZV2NqPN0g== + + + BlockBlob + Hot + true + unlocked + available + true + + uservalue + + + + blob1.txt + + Thu, 01 Jul 2021 10:44:59 GMT + Thu, 01 Jul 2021 10:44:59 GMT + 0x8D93C7D463004D6 + 8 + text/plain + + + + rvr3UC1SmUw7AZV2NqPN0g== + + + BlockBlob + Hot + true + unlocked + available + true + + + + + blob2.txt + + Thu, 01 Jul 2021 10:44:59 GMT + Thu, 01 Jul 2021 10:44:59 GMT + 0x8D93C7D4636478A + 8 + text/plain + + + + rvr3UC1SmUw7AZV2NqPN0g== + + + BlockBlob + Hot + true + unlocked + available + true + + + + + +"; + + let mut _list_blobs_response_internal: ListResultInternal = + quick_xml::de::from_str(S).unwrap(); + } + + #[test] + fn deserde_azurite() { + const S: &str = " + + + + 5000 + + + + blob0.txt + + Thu, 01 Jul 2021 10:45:02 GMT + Thu, 01 Jul 2021 10:45:02 GMT + 0x228281B5D517B20 + 8 + text/plain + rvr3UC1SmUw7AZV2NqPN0g== + BlockBlob + unlocked + available + true + Hot + true + Thu, 01 Jul 2021 10:45:02 GMT + + + + blob1.txt + + Thu, 01 Jul 2021 10:45:02 GMT + Thu, 01 Jul 2021 10:45:02 GMT + 0x1DD959381A8A860 + 8 + text/plain + rvr3UC1SmUw7AZV2NqPN0g== + BlockBlob + unlocked + available + true + Hot + true + Thu, 01 Jul 2021 10:45:02 GMT + + + + blob2.txt + + Thu, 01 Jul 2021 10:45:02 GMT + Thu, 01 Jul 2021 10:45:02 GMT + 0x1FBE9C9B0C7B650 + 8 + text/plain + rvr3UC1SmUw7AZV2NqPN0g== + BlockBlob + unlocked + available + true + Hot + true + Thu, 01 Jul 2021 10:45:02 GMT + + + + +"; + + let _list_blobs_response_internal: ListResultInternal = quick_xml::de::from_str(S).unwrap(); + } + + #[test] + fn to_xml() { + const S: &str = " + +\tbnVtZXJvMQ== +\tbnVtZXJvMg== +\tbnVtZXJvMw== +"; + let mut blocks = BlockList { blocks: Vec::new() }; + blocks.blocks.push(Bytes::from_static(b"numero1").into()); + blocks.blocks.push("numero2".into()); + blocks.blocks.push("numero3".into()); + + let res: &str = &blocks.to_xml(); + + assert_eq!(res, S) + } + + #[test] + fn test_delegated_key_response() { + const S: &str = r#" + + String containing a GUID value + String containing a GUID value + String formatted as ISO date + String formatted as ISO date + b + String specifying REST api version to use to create the user delegation key + String containing the user delegation key +"#; + + let _delegated_key_response_internal: UserDelegationKey = + quick_xml::de::from_str(S).unwrap(); + } + + #[tokio::test] + async fn test_build_bulk_delete_body() { + let credential_provider = Arc::new(StaticCredentialProvider::new( + AzureCredential::BearerToken("static-token".to_string()), + )); + + let config = AzureConfig { + account: "testaccount".to_string(), + container: "testcontainer".to_string(), + credentials: credential_provider, + service: "http://example.com".try_into().unwrap(), + retry_config: Default::default(), + is_emulator: false, + skip_signature: false, + disable_tagging: false, + client_options: Default::default(), + }; + + let client = AzureClient::new(config, HttpClient::new(Client::new())); + + let credential = client.get_credential().await.unwrap(); + let paths = &[Path::from("a"), Path::from("b"), Path::from("c")]; + + let boundary = "batch_statictestboundary".to_string(); + + let body_bytes = client.build_bulk_delete_body(&boundary, paths, &credential); + + // Replace Date header value with a static date + let re = Regex::new("Date:[^\r]+").unwrap(); + let body_bytes = re + .replace_all(&body_bytes, b"Date: Tue, 05 Nov 2024 15:01:15 GMT") + .to_vec(); + + let expected_body = b"--batch_statictestboundary\r +Content-Type: application/http\r +Content-Transfer-Encoding: binary\r +Content-ID: 0\r +\r +DELETE /testcontainer/a HTTP/1.1\r +Content-Length: 0\r +Date: Tue, 05 Nov 2024 15:01:15 GMT\r +X-Ms-Version: 2023-11-03\r +Authorization: Bearer static-token\r +\r +\r +--batch_statictestboundary\r +Content-Type: application/http\r +Content-Transfer-Encoding: binary\r +Content-ID: 1\r +\r +DELETE /testcontainer/b HTTP/1.1\r +Content-Length: 0\r +Date: Tue, 05 Nov 2024 15:01:15 GMT\r +X-Ms-Version: 2023-11-03\r +Authorization: Bearer static-token\r +\r +\r +--batch_statictestboundary\r +Content-Type: application/http\r +Content-Transfer-Encoding: binary\r +Content-ID: 2\r +\r +DELETE /testcontainer/c HTTP/1.1\r +Content-Length: 0\r +Date: Tue, 05 Nov 2024 15:01:15 GMT\r +X-Ms-Version: 2023-11-03\r +Authorization: Bearer static-token\r +\r +\r +--batch_statictestboundary--\r\n" + .to_vec(); + + assert_eq!(expected_body, body_bytes); + } + + #[tokio::test] + async fn test_parse_blob_batch_delete_body() { + let response_body = b"--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r +Content-Type: application/http\r +Content-ID: 0\r +\r +HTTP/1.1 202 Accepted\r +x-ms-delete-type-permanent: true\r +x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e284f\r +x-ms-version: 2018-11-09\r +\r +--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r +Content-Type: application/http\r +Content-ID: 1\r +\r +HTTP/1.1 202 Accepted\r +x-ms-delete-type-permanent: true\r +x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e2851\r +x-ms-version: 2018-11-09\r +\r +--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed\r +Content-Type: application/http\r +Content-ID: 2\r +\r +HTTP/1.1 404 The specified blob does not exist.\r +x-ms-error-code: BlobNotFound\r +x-ms-request-id: 778fdc83-801e-0000-62ff-0334671e2852\r +x-ms-version: 2018-11-09\r +Content-Length: 216\r +Content-Type: application/xml\r +\r + +BlobNotFoundThe specified blob does not exist. +RequestId:778fdc83-801e-0000-62ff-0334671e2852 +Time:2018-06-14T16:46:54.6040685Z\r +--batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed--\r\n"; + + let response: HttpResponse = http::Response::builder() + .status(202) + .header("Transfer-Encoding", "chunked") + .header( + "Content-Type", + "multipart/mixed; boundary=batchresponse_66925647-d0cb-4109-b6d3-28efe3e1e5ed", + ) + .header("x-ms-request-id", "778fdc83-801e-0000-62ff-033467000000") + .header("x-ms-version", "2018-11-09") + .body(Bytes::from(response_body.as_slice()).into()) + .unwrap(); + + let boundary = parse_multipart_response_boundary(&response).unwrap(); + let body = response.into_body().bytes().await.unwrap(); + + let paths = &[Path::from("a"), Path::from("b"), Path::from("c")]; + + let results = parse_blob_batch_delete_body(body, boundary, paths) + .await + .unwrap(); + + assert!(results[0].is_ok()); + assert_eq!(&paths[0], results[0].as_ref().unwrap()); + + assert!(results[1].is_ok()); + assert_eq!(&paths[1], results[1].as_ref().unwrap()); + + assert!(results[2].is_err()); + let err = results[2].as_ref().unwrap_err(); + let crate::Error::NotFound { source, .. } = err else { + unreachable!("must be not found") + }; + let Some(Error::DeleteFailed { path, code, reason }) = source.downcast_ref::() + else { + unreachable!("must be client error") + }; + + assert_eq!(paths[2].as_ref(), path); + assert_eq!("404", code); + assert_eq!("The specified blob does not exist.", reason); + } +} diff --git a/rust/object_store/src/azure/credential.rs b/rust/object_store/src/azure/credential.rs new file mode 100644 index 0000000000..372088428c --- /dev/null +++ b/rust/object_store/src/azure/credential.rs @@ -0,0 +1,1220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::client::UserDelegationKey; +use crate::azure::STORE; +use crate::client::builder::{add_query_pairs, HttpRequestBuilder}; +use crate::client::retry::RetryExt; +use crate::client::token::{TemporaryToken, TokenCache}; +use crate::client::{CredentialProvider, HttpClient, HttpError, HttpRequest, TokenProvider}; +use crate::util::hmac_sha256; +use crate::RetryConfig; +use async_trait::async_trait; +use base64::prelude::{BASE64_STANDARD, BASE64_URL_SAFE_NO_PAD}; +use base64::Engine; +use chrono::{DateTime, SecondsFormat, Utc}; +use http::header::{ + HeaderMap, HeaderName, HeaderValue, ACCEPT, AUTHORIZATION, CONTENT_ENCODING, CONTENT_LANGUAGE, + CONTENT_LENGTH, CONTENT_TYPE, DATE, IF_MATCH, IF_MODIFIED_SINCE, IF_NONE_MATCH, + IF_UNMODIFIED_SINCE, RANGE, +}; +use http::Method; +use serde::Deserialize; +use std::borrow::Cow; +use std::collections::HashMap; +use std::fmt::Debug; +use std::ops::Deref; +use std::process::Command; +use std::str; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime}; +use url::Url; + +static AZURE_VERSION: HeaderValue = HeaderValue::from_static("2023-11-03"); +static VERSION: HeaderName = HeaderName::from_static("x-ms-version"); +pub(crate) static BLOB_TYPE: HeaderName = HeaderName::from_static("x-ms-blob-type"); +pub(crate) static DELETE_SNAPSHOTS: HeaderName = HeaderName::from_static("x-ms-delete-snapshots"); +pub(crate) static COPY_SOURCE: HeaderName = HeaderName::from_static("x-ms-copy-source"); +static CONTENT_MD5: HeaderName = HeaderName::from_static("content-md5"); +static PARTNER_TOKEN: HeaderName = HeaderName::from_static("x-ms-partner-token"); +static CLUSTER_IDENTIFIER: HeaderName = HeaderName::from_static("x-ms-cluster-identifier"); +static WORKLOAD_RESOURCE: HeaderName = HeaderName::from_static("x-ms-workload-resource-moniker"); +static PROXY_HOST: HeaderName = HeaderName::from_static("x-ms-proxy-host"); +pub(crate) const RFC1123_FMT: &str = "%a, %d %h %Y %T GMT"; +const CONTENT_TYPE_JSON: &str = "application/json"; +const MSI_SECRET_ENV_KEY: &str = "IDENTITY_HEADER"; +const MSI_API_VERSION: &str = "2019-08-01"; +const TOKEN_MIN_TTL: u64 = 300; + +/// OIDC scope used when interacting with OAuth2 APIs +/// +/// +const AZURE_STORAGE_SCOPE: &str = "https://storage.azure.com/.default"; + +/// Resource ID used when obtaining an access token from the metadata endpoint +/// +/// +const AZURE_STORAGE_RESOURCE: &str = "https://storage.azure.com"; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Error performing token request: {}", source)] + TokenRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting token response body: {}", source)] + TokenResponseBody { source: HttpError }, + + #[error("Error reading federated token file ")] + FederatedTokenFile, + + #[error("Invalid Access Key: {}", source)] + InvalidAccessKey { source: base64::DecodeError }, + + #[error("'az account get-access-token' command failed: {message}")] + AzureCli { message: String }, + + #[error("Failed to parse azure cli response: {source}")] + AzureCliResponse { source: serde_json::Error }, + + #[error("Generating SAS keys with SAS tokens auth is not supported")] + SASforSASNotSupported, +} + +pub(crate) type Result = std::result::Result; + +impl From for crate::Error { + fn from(value: Error) -> Self { + Self::Generic { + store: STORE, + source: Box::new(value), + } + } +} + +/// A shared Azure Storage Account Key +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct AzureAccessKey(Vec); + +impl AzureAccessKey { + /// Create a new [`AzureAccessKey`], checking it for validity + pub fn try_new(key: &str) -> Result { + let key = BASE64_STANDARD + .decode(key) + .map_err(|source| Error::InvalidAccessKey { source })?; + + Ok(Self(key)) + } +} + +/// An Azure storage credential +#[derive(Debug, Eq, PartialEq)] +pub enum AzureCredential { + /// A shared access key + /// + /// + AccessKey(AzureAccessKey), + /// A shared access signature + /// + /// + SASToken(Vec<(String, String)>), + /// An authorization token + /// + /// + BearerToken(String), +} + +impl AzureCredential { + /// Determines if the credential requires the request be treated as sensitive + pub fn sensitive_request(&self) -> bool { + match self { + Self::AccessKey(_) => false, + Self::BearerToken(_) => false, + // SAS tokens are sent as query parameters in the url + Self::SASToken(_) => true, + } + } +} + +/// A list of known Azure authority hosts +pub mod authority_hosts { + /// China-based Azure Authority Host + pub const AZURE_CHINA: &str = "https://login.chinacloudapi.cn"; + /// Germany-based Azure Authority Host + pub const AZURE_GERMANY: &str = "https://login.microsoftonline.de"; + /// US Government Azure Authority Host + pub const AZURE_GOVERNMENT: &str = "https://login.microsoftonline.us"; + /// Public Cloud Azure Authority Host + pub const AZURE_PUBLIC_CLOUD: &str = "https://login.microsoftonline.com"; +} + +pub(crate) struct AzureSigner { + signing_key: AzureAccessKey, + start: DateTime, + end: DateTime, + account: String, + delegation_key: Option, +} + +impl AzureSigner { + pub(crate) fn new( + signing_key: AzureAccessKey, + account: String, + start: DateTime, + end: DateTime, + delegation_key: Option, + ) -> Self { + Self { + signing_key, + account, + start, + end, + delegation_key, + } + } + + pub(crate) fn sign(&self, method: &Method, url: &mut Url) -> Result<()> { + let (str_to_sign, query_pairs) = match &self.delegation_key { + Some(delegation_key) => string_to_sign_user_delegation_sas( + url, + method, + &self.account, + &self.start, + &self.end, + delegation_key, + ), + None => string_to_sign_service_sas(url, method, &self.account, &self.start, &self.end), + }; + let auth = hmac_sha256(&self.signing_key.0, str_to_sign); + url.query_pairs_mut().extend_pairs(query_pairs); + url.query_pairs_mut() + .append_pair("sig", BASE64_STANDARD.encode(auth).as_str()); + Ok(()) + } +} + +fn add_date_and_version_headers(request: &mut HttpRequest) { + // rfc2822 string should never contain illegal characters + let date = Utc::now(); + let date_str = date.format(RFC1123_FMT).to_string(); + // we formatted the data string ourselves, so unwrapping should be fine + let date_val = HeaderValue::from_str(&date_str).unwrap(); + request.headers_mut().insert(DATE, date_val); + request + .headers_mut() + .insert(&VERSION, AZURE_VERSION.clone()); +} + +/// Authorize a [`HttpRequest`] with an [`AzureAuthorizer`] +#[derive(Debug)] +pub struct AzureAuthorizer<'a> { + credential: &'a AzureCredential, + account: &'a str, +} + +impl<'a> AzureAuthorizer<'a> { + /// Create a new [`AzureAuthorizer`] + pub fn new(credential: &'a AzureCredential, account: &'a str) -> Self { + AzureAuthorizer { + credential, + account, + } + } + + /// Authorize `request` + pub fn authorize(&self, request: &mut HttpRequest) { + add_date_and_version_headers(request); + + match self.credential { + AzureCredential::AccessKey(key) => { + let url = Url::parse(&request.uri().to_string()).unwrap(); + let signature = generate_authorization( + request.headers(), + &url, + request.method(), + self.account, + key, + ); + + // "signature" is a base 64 encoded string so it should never + // contain illegal characters + request.headers_mut().append( + AUTHORIZATION, + HeaderValue::from_str(signature.as_str()).unwrap(), + ); + } + AzureCredential::BearerToken(token) => { + request.headers_mut().append( + AUTHORIZATION, + HeaderValue::from_str(format!("Bearer {token}").as_str()).unwrap(), + ); + } + AzureCredential::SASToken(query_pairs) => { + add_query_pairs(request.uri_mut(), query_pairs); + } + } + } +} + +pub(crate) trait CredentialExt { + /// Apply authorization to requests against azure storage accounts + /// + fn with_azure_authorization( + self, + credential: &Option>, + account: &str, + ) -> Self; +} + +impl CredentialExt for HttpRequestBuilder { + fn with_azure_authorization( + self, + credential: &Option>, + account: &str, + ) -> Self { + let (client, request) = self.into_parts(); + let mut request = request.expect("request valid"); + + match credential.as_deref() { + Some(credential) => { + AzureAuthorizer::new(credential, account).authorize(&mut request); + } + None => { + add_date_and_version_headers(&mut request); + } + } + + Self::from_parts(client, request) + } +} + +/// Generate signed key for authorization via access keys +/// +fn generate_authorization( + h: &HeaderMap, + u: &Url, + method: &Method, + account: &str, + key: &AzureAccessKey, +) -> String { + let str_to_sign = string_to_sign(h, u, method, account); + let auth = hmac_sha256(&key.0, str_to_sign); + format!("SharedKey {}:{}", account, BASE64_STANDARD.encode(auth)) +} + +fn add_if_exists<'a>(h: &'a HeaderMap, key: &HeaderName) -> &'a str { + h.get(key) + .map(|s| s.to_str()) + .transpose() + .ok() + .flatten() + .unwrap_or_default() +} + +fn string_to_sign_sas( + u: &Url, + method: &Method, + account: &str, + start: &DateTime, + end: &DateTime, +) -> (String, String, String, String, String) { + // NOTE: for now only blob signing is supported. + let signed_resource = "b".to_string(); + + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas#permissions-for-a-directory-container-or-blob + let signed_permissions = match *method { + // read and list permissions + Method::GET => match signed_resource.as_str() { + "c" => "rl", + "b" => "r", + _ => unreachable!(), + }, + // write permissions (also allows crating a new blob in a sub-key) + Method::PUT => "w", + // delete permissions + Method::DELETE => "d", + // other methods are not used in any of the current operations + _ => "", + } + .to_string(); + let signed_start = start.to_rfc3339_opts(SecondsFormat::Secs, true); + let signed_expiry = end.to_rfc3339_opts(SecondsFormat::Secs, true); + let canonicalized_resource = if u.host_str().unwrap_or_default().contains(account) { + format!("/blob/{}{}", account, u.path()) + } else { + // NOTE: in case of the emulator, the account name is not part of the host + // but the path starts with the account name + format!("/blob{}", u.path()) + }; + + ( + signed_resource, + signed_permissions, + signed_start, + signed_expiry, + canonicalized_resource, + ) +} + +/// Create a string to be signed for authorization via [service sas]. +/// +/// [service sas]: https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas#version-2020-12-06-and-later +fn string_to_sign_service_sas( + u: &Url, + method: &Method, + account: &str, + start: &DateTime, + end: &DateTime, +) -> (String, HashMap<&'static str, String>) { + let (signed_resource, signed_permissions, signed_start, signed_expiry, canonicalized_resource) = + string_to_sign_sas(u, method, account, start, end); + + let string_to_sign = format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", + signed_permissions, + signed_start, + signed_expiry, + canonicalized_resource, + "", // signed identifier + "", // signed ip + "", // signed protocol + &AZURE_VERSION.to_str().unwrap(), // signed version + signed_resource, // signed resource + "", // signed snapshot time + "", // signed encryption scope + "", // rscc - response header: Cache-Control + "", // rscd - response header: Content-Disposition + "", // rsce - response header: Content-Encoding + "", // rscl - response header: Content-Language + "", // rsct - response header: Content-Type + ); + + let mut pairs = HashMap::new(); + pairs.insert("sv", AZURE_VERSION.to_str().unwrap().to_string()); + pairs.insert("sp", signed_permissions); + pairs.insert("st", signed_start); + pairs.insert("se", signed_expiry); + pairs.insert("sr", signed_resource); + + (string_to_sign, pairs) +} + +/// Create a string to be signed for authorization via [user delegation sas]. +/// +/// [user delegation sas]: https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#version-2020-12-06-and-later +fn string_to_sign_user_delegation_sas( + u: &Url, + method: &Method, + account: &str, + start: &DateTime, + end: &DateTime, + delegation_key: &UserDelegationKey, +) -> (String, HashMap<&'static str, String>) { + let (signed_resource, signed_permissions, signed_start, signed_expiry, canonicalized_resource) = + string_to_sign_sas(u, method, account, start, end); + + let string_to_sign = format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}", + signed_permissions, + signed_start, + signed_expiry, + canonicalized_resource, + delegation_key.signed_oid, // signed key object id + delegation_key.signed_tid, // signed key tenant id + delegation_key.signed_start, // signed key start + delegation_key.signed_expiry, // signed key expiry + delegation_key.signed_service, // signed key service + delegation_key.signed_version, // signed key version + "", // signed authorized user object id + "", // signed unauthorized user object id + "", // signed correlation id + "", // signed ip + "", // signed protocol + &AZURE_VERSION.to_str().unwrap(), // signed version + signed_resource, // signed resource + "", // signed snapshot time + "", // signed encryption scope + "", // rscc - response header: Cache-Control + "", // rscd - response header: Content-Disposition + "", // rsce - response header: Content-Encoding + "", // rscl - response header: Content-Language + "", // rsct - response header: Content-Type + ); + + let mut pairs = HashMap::new(); + pairs.insert("sv", AZURE_VERSION.to_str().unwrap().to_string()); + pairs.insert("sp", signed_permissions); + pairs.insert("st", signed_start); + pairs.insert("se", signed_expiry); + pairs.insert("sr", signed_resource); + pairs.insert("skoid", delegation_key.signed_oid.clone()); + pairs.insert("sktid", delegation_key.signed_tid.clone()); + pairs.insert("skt", delegation_key.signed_start.clone()); + pairs.insert("ske", delegation_key.signed_expiry.clone()); + pairs.insert("sks", delegation_key.signed_service.clone()); + pairs.insert("skv", delegation_key.signed_version.clone()); + + (string_to_sign, pairs) +} + +/// +fn string_to_sign(h: &HeaderMap, u: &Url, method: &Method, account: &str) -> String { + // content length must only be specified if != 0 + // this is valid from 2015-02-21 + let content_length = h + .get(&CONTENT_LENGTH) + .map(|s| s.to_str()) + .transpose() + .ok() + .flatten() + .filter(|&v| v != "0") + .unwrap_or_default(); + format!( + "{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}{}", + method.as_ref(), + add_if_exists(h, &CONTENT_ENCODING), + add_if_exists(h, &CONTENT_LANGUAGE), + content_length, + add_if_exists(h, &CONTENT_MD5), + add_if_exists(h, &CONTENT_TYPE), + add_if_exists(h, &DATE), + add_if_exists(h, &IF_MODIFIED_SINCE), + add_if_exists(h, &IF_MATCH), + add_if_exists(h, &IF_NONE_MATCH), + add_if_exists(h, &IF_UNMODIFIED_SINCE), + add_if_exists(h, &RANGE), + canonicalize_header(h), + canonicalize_resource(account, u) + ) +} + +/// +fn canonicalize_header(headers: &HeaderMap) -> String { + let mut names = headers + .iter() + .filter(|&(k, _)| k.as_str().starts_with("x-ms")) + // TODO remove unwraps + .map(|(k, _)| (k.as_str(), headers.get(k).unwrap().to_str().unwrap())) + .collect::>(); + names.sort_unstable(); + + let mut result = String::new(); + for (name, value) in names { + result.push_str(name); + result.push(':'); + result.push_str(value); + result.push('\n'); + } + result +} + +/// +fn canonicalize_resource(account: &str, uri: &Url) -> String { + let mut can_res: String = String::new(); + can_res.push('/'); + can_res.push_str(account); + can_res.push_str(uri.path().to_string().as_str()); + can_res.push('\n'); + + // query parameters + let query_pairs = uri.query_pairs(); + { + let mut qps: Vec = Vec::new(); + for (q, _) in query_pairs { + if !(qps.iter().any(|x| x == &*q)) { + qps.push(q.into_owned()); + } + } + + qps.sort(); + + for qparam in qps { + // find correct parameter + let ret = lexy_sort(query_pairs, &qparam); + + can_res = can_res + &qparam.to_lowercase() + ":"; + + for (i, item) in ret.iter().enumerate() { + if i > 0 { + can_res.push(','); + } + can_res.push_str(item); + } + + can_res.push('\n'); + } + }; + + can_res[0..can_res.len() - 1].to_owned() +} + +fn lexy_sort<'a>( + vec: impl Iterator, Cow<'a, str>)> + 'a, + query_param: &str, +) -> Vec> { + let mut values = vec + .filter(|(k, _)| *k == query_param) + .map(|(_, v)| v) + .collect::>(); + values.sort_unstable(); + values +} + +/// +#[derive(Deserialize, Debug)] +struct OAuthTokenResponse { + access_token: String, + expires_in: u64, +} + +/// Encapsulates the logic to perform an OAuth token challenge +/// +/// +#[derive(Debug)] +pub(crate) struct ClientSecretOAuthProvider { + token_url: String, + client_id: String, + client_secret: String, +} + +impl ClientSecretOAuthProvider { + /// Create a new [`ClientSecretOAuthProvider`] for an azure backed store + pub(crate) fn new( + client_id: String, + client_secret: String, + tenant_id: impl AsRef, + authority_host: Option, + ) -> Self { + let authority_host = + authority_host.unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned()); + + Self { + token_url: format!( + "{}/{}/oauth2/v2.0/token", + authority_host, + tenant_id.as_ref() + ), + client_id, + client_secret, + } + } +} + +#[async_trait::async_trait] +impl TokenProvider for ClientSecretOAuthProvider { + type Credential = AzureCredential; + + /// Fetch a token + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let response: OAuthTokenResponse = client + .request(Method::POST, &self.token_url) + .header(ACCEPT, HeaderValue::from_static(CONTENT_TYPE_JSON)) + .form([ + ("client_id", self.client_id.as_str()), + ("client_secret", self.client_secret.as_str()), + ("scope", AZURE_STORAGE_SCOPE), + ("grant_type", "client_credentials"), + ]) + .retryable(retry) + .idempotent(true) + .send() + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + + Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(response.access_token)), + expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)), + }) + } +} + +fn expires_on_string<'de, D>(deserializer: D) -> std::result::Result +where + D: serde::de::Deserializer<'de>, +{ + let v = String::deserialize(deserializer)?; + let v = v.parse::().map_err(serde::de::Error::custom)?; + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map_err(serde::de::Error::custom)?; + + Ok(Instant::now() + Duration::from_secs(v.saturating_sub(now.as_secs()))) +} + +/// NOTE: expires_on is a String version of unix epoch time, not an integer. +/// +/// +#[derive(Debug, Clone, Deserialize)] +struct ImdsTokenResponse { + pub access_token: String, + #[serde(deserialize_with = "expires_on_string")] + pub expires_on: Instant, +} + +/// Attempts authentication using a managed identity that has been assigned to the deployment environment. +/// +/// This authentication type works in Azure VMs, App Service and Azure Functions applications, as well as the Azure Cloud Shell +/// +#[derive(Debug)] +pub(crate) struct ImdsManagedIdentityProvider { + msi_endpoint: String, + client_id: Option, + object_id: Option, + msi_res_id: Option, +} + +impl ImdsManagedIdentityProvider { + /// Create a new [`ImdsManagedIdentityProvider`] for an azure backed store + pub(crate) fn new( + client_id: Option, + object_id: Option, + msi_res_id: Option, + msi_endpoint: Option, + ) -> Self { + let msi_endpoint = msi_endpoint + .unwrap_or_else(|| "http://169.254.169.254/metadata/identity/oauth2/token".to_owned()); + + Self { + msi_endpoint, + client_id, + object_id, + msi_res_id, + } + } +} + +#[async_trait::async_trait] +impl TokenProvider for ImdsManagedIdentityProvider { + type Credential = AzureCredential; + + /// Fetch a token + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let mut query_items = vec![ + ("api-version", MSI_API_VERSION), + ("resource", AZURE_STORAGE_RESOURCE), + ]; + + let mut identity = None; + if let Some(client_id) = &self.client_id { + identity = Some(("client_id", client_id)); + } + if let Some(object_id) = &self.object_id { + identity = Some(("object_id", object_id)); + } + if let Some(msi_res_id) = &self.msi_res_id { + identity = Some(("msi_res_id", msi_res_id)); + } + if let Some((key, value)) = identity { + query_items.push((key, value)); + } + + let mut builder = client + .request(Method::GET, &self.msi_endpoint) + .header("metadata", "true") + .query(&query_items); + + if let Ok(val) = std::env::var(MSI_SECRET_ENV_KEY) { + builder = builder.header("x-identity-header", val); + }; + + let response: ImdsTokenResponse = builder + .send_retry(retry) + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + + Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(response.access_token)), + expiry: Some(response.expires_on), + }) + } +} + +/// Credential for using workload identity federation +/// +/// +#[derive(Debug)] +pub(crate) struct WorkloadIdentityOAuthProvider { + token_url: String, + client_id: String, + federated_token_file: String, +} + +impl WorkloadIdentityOAuthProvider { + /// Create a new [`WorkloadIdentityOAuthProvider`] for an azure backed store + pub(crate) fn new( + client_id: impl Into, + federated_token_file: impl Into, + tenant_id: impl AsRef, + authority_host: Option, + ) -> Self { + let authority_host = + authority_host.unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned()); + + Self { + token_url: format!( + "{}/{}/oauth2/v2.0/token", + authority_host, + tenant_id.as_ref() + ), + client_id: client_id.into(), + federated_token_file: federated_token_file.into(), + } + } +} + +#[async_trait::async_trait] +impl TokenProvider for WorkloadIdentityOAuthProvider { + type Credential = AzureCredential; + + /// Fetch a token + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let token_str = std::fs::read_to_string(&self.federated_token_file) + .map_err(|_| Error::FederatedTokenFile)?; + + // https://learn.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow#third-case-access-token-request-with-a-federated-credential + let response: OAuthTokenResponse = client + .request(Method::POST, &self.token_url) + .header(ACCEPT, HeaderValue::from_static(CONTENT_TYPE_JSON)) + .form([ + ("client_id", self.client_id.as_str()), + ( + "client_assertion_type", + "urn:ietf:params:oauth:client-assertion-type:jwt-bearer", + ), + ("client_assertion", token_str.as_str()), + ("scope", AZURE_STORAGE_SCOPE), + ("grant_type", "client_credentials"), + ]) + .retryable(retry) + .idempotent(true) + .send() + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + + Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(response.access_token)), + expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)), + }) + } +} + +mod az_cli_date_format { + use chrono::{DateTime, TimeZone}; + use serde::{self, Deserialize, Deserializer}; + + pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + // expiresOn from azure cli uses the local timezone + let date = chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%d %H:%M:%S.%6f") + .map_err(serde::de::Error::custom)?; + chrono::Local + .from_local_datetime(&date) + .single() + .ok_or(serde::de::Error::custom( + "azure cli returned ambiguous expiry date", + )) + } +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +struct AzureCliTokenResponse { + pub access_token: String, + #[serde(with = "az_cli_date_format")] + pub expires_on: DateTime, + pub token_type: String, +} + +#[derive(Default, Debug)] +pub(crate) struct AzureCliCredential { + cache: TokenCache>, +} + +impl AzureCliCredential { + pub(crate) fn new() -> Self { + Self::default() + } + + /// Fetch a token + async fn fetch_token(&self) -> Result>> { + // on window az is a cmd and it should be called like this + // see https://doc.rust-lang.org/nightly/std/process/struct.Command.html + let program = if cfg!(target_os = "windows") { + "cmd" + } else { + "az" + }; + let mut args = Vec::new(); + if cfg!(target_os = "windows") { + args.push("/C"); + args.push("az"); + } + args.push("account"); + args.push("get-access-token"); + args.push("--output"); + args.push("json"); + args.push("--scope"); + args.push(AZURE_STORAGE_SCOPE); + + match Command::new(program).args(args).output() { + Ok(az_output) if az_output.status.success() => { + let output = str::from_utf8(&az_output.stdout).map_err(|_| Error::AzureCli { + message: "az response is not a valid utf-8 string".to_string(), + })?; + + let token_response = serde_json::from_str::(output) + .map_err(|source| Error::AzureCliResponse { source })?; + + if !token_response.token_type.eq_ignore_ascii_case("bearer") { + return Err(Error::AzureCli { + message: format!( + "got unexpected token type from azure cli: {0}", + token_response.token_type + ), + }); + } + let duration = + token_response.expires_on.naive_local() - chrono::Local::now().naive_local(); + Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(token_response.access_token)), + expiry: Some( + Instant::now() + + duration.to_std().map_err(|_| Error::AzureCli { + message: "az returned invalid lifetime".to_string(), + })?, + ), + }) + } + Ok(az_output) => { + let message = String::from_utf8_lossy(&az_output.stderr); + Err(Error::AzureCli { + message: message.into(), + }) + } + Err(e) => match e.kind() { + std::io::ErrorKind::NotFound => Err(Error::AzureCli { + message: "Azure Cli not installed".into(), + }), + error_kind => Err(Error::AzureCli { + message: format!("io error: {error_kind:?}"), + }), + }, + } + } +} + +/// Encapsulates the logic to perform an OAuth token challenge for Fabric +#[derive(Debug)] +pub(crate) struct FabricTokenOAuthProvider { + fabric_token_service_url: String, + fabric_workload_host: String, + fabric_session_token: String, + fabric_cluster_identifier: String, + storage_access_token: Option, + token_expiry: Option, +} + +#[derive(Debug, Deserialize)] +struct Claims { + exp: u64, +} + +impl FabricTokenOAuthProvider { + /// Create a new [`FabricTokenOAuthProvider`] for an azure backed store + pub(crate) fn new( + fabric_token_service_url: impl Into, + fabric_workload_host: impl Into, + fabric_session_token: impl Into, + fabric_cluster_identifier: impl Into, + storage_access_token: Option, + ) -> Self { + let (storage_access_token, token_expiry) = match storage_access_token { + Some(token) => match Self::validate_and_get_expiry(&token) { + Some(expiry) if expiry > Self::get_current_timestamp() + TOKEN_MIN_TTL => { + (Some(token), Some(expiry)) + } + _ => (None, None), + }, + None => (None, None), + }; + + Self { + fabric_token_service_url: fabric_token_service_url.into(), + fabric_workload_host: fabric_workload_host.into(), + fabric_session_token: fabric_session_token.into(), + fabric_cluster_identifier: fabric_cluster_identifier.into(), + storage_access_token, + token_expiry, + } + } + + fn validate_and_get_expiry(token: &str) -> Option { + let payload = token.split('.').nth(1)?; + let decoded_bytes = BASE64_URL_SAFE_NO_PAD.decode(payload).ok()?; + let decoded_str = str::from_utf8(&decoded_bytes).ok()?; + let claims: Claims = serde_json::from_str(decoded_str).ok()?; + Some(claims.exp) + } + + fn get_current_timestamp() -> u64 { + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map_or(0, |d| d.as_secs()) + } +} + +#[async_trait::async_trait] +impl TokenProvider for FabricTokenOAuthProvider { + type Credential = AzureCredential; + + /// Fetch a token + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + if let Some(storage_access_token) = &self.storage_access_token { + if let Some(expiry) = self.token_expiry { + let exp_in = expiry - Self::get_current_timestamp(); + if exp_in > TOKEN_MIN_TTL { + return Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(storage_access_token.clone())), + expiry: Some(Instant::now() + Duration::from_secs(exp_in)), + }); + } + } + } + + let query_items = vec![("resource", AZURE_STORAGE_RESOURCE)]; + let access_token: String = client + .request(Method::GET, &self.fabric_token_service_url) + .header(&PARTNER_TOKEN, self.fabric_session_token.as_str()) + .header(&CLUSTER_IDENTIFIER, self.fabric_cluster_identifier.as_str()) + .header(&WORKLOAD_RESOURCE, self.fabric_cluster_identifier.as_str()) + .header(&PROXY_HOST, self.fabric_workload_host.as_str()) + .query(&query_items) + .retryable(retry) + .idempotent(true) + .send() + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .text() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + let exp_in = Self::validate_and_get_expiry(&access_token) + .map_or(3600, |expiry| expiry - Self::get_current_timestamp()); + Ok(TemporaryToken { + token: Arc::new(AzureCredential::BearerToken(access_token)), + expiry: Some(Instant::now() + Duration::from_secs(exp_in)), + }) + } +} + +#[async_trait] +impl CredentialProvider for AzureCliCredential { + type Credential = AzureCredential; + + async fn get_credential(&self) -> crate::Result> { + Ok(self.cache.get_or_insert_with(|| self.fetch_token()).await?) + } +} + +#[cfg(test)] +mod tests { + use futures::executor::block_on; + use http::{Response, StatusCode}; + use http_body_util::BodyExt; + use reqwest::{Client, Method}; + use tempfile::NamedTempFile; + + use super::*; + use crate::azure::MicrosoftAzureBuilder; + use crate::client::mock_server::MockServer; + use crate::{ObjectStore, Path}; + + #[tokio::test] + async fn test_managed_identity() { + let server = MockServer::new().await; + + std::env::set_var(MSI_SECRET_ENV_KEY, "env-secret"); + + let endpoint = server.url(); + let client = HttpClient::new(Client::new()); + let retry_config = RetryConfig::default(); + + // Test IMDS + server.push_fn(|req| { + assert_eq!(req.uri().path(), "/metadata/identity/oauth2/token"); + assert!(req.uri().query().unwrap().contains("client_id=client_id")); + assert_eq!(req.method(), &Method::GET); + let t = req + .headers() + .get("x-identity-header") + .unwrap() + .to_str() + .unwrap(); + assert_eq!(t, "env-secret"); + let t = req.headers().get("metadata").unwrap().to_str().unwrap(); + assert_eq!(t, "true"); + Response::new( + r#" + { + "access_token": "TOKEN", + "refresh_token": "", + "expires_in": "3599", + "expires_on": "1506484173", + "not_before": "1506480273", + "resource": "https://management.azure.com/", + "token_type": "Bearer" + } + "# + .to_string(), + ) + }); + + let credential = ImdsManagedIdentityProvider::new( + Some("client_id".into()), + None, + None, + Some(format!("{endpoint}/metadata/identity/oauth2/token")), + ); + + let token = credential + .fetch_token(&client, &retry_config) + .await + .unwrap(); + + assert_eq!( + token.token.as_ref(), + &AzureCredential::BearerToken("TOKEN".into()) + ); + } + + #[tokio::test] + async fn test_workload_identity() { + let server = MockServer::new().await; + let tokenfile = NamedTempFile::new().unwrap(); + let tenant = "tenant"; + std::fs::write(tokenfile.path(), "federated-token").unwrap(); + + let endpoint = server.url(); + let client = HttpClient::new(Client::new()); + let retry_config = RetryConfig::default(); + + // Test IMDS + server.push_fn(move |req| { + assert_eq!(req.uri().path(), format!("/{tenant}/oauth2/v2.0/token")); + assert_eq!(req.method(), &Method::POST); + let body = block_on(async move { req.into_body().collect().await.unwrap().to_bytes() }); + let body = String::from_utf8(body.to_vec()).unwrap(); + assert!(body.contains("federated-token")); + Response::new( + r#" + { + "access_token": "TOKEN", + "refresh_token": "", + "expires_in": 3599, + "expires_on": "1506484173", + "not_before": "1506480273", + "resource": "https://management.azure.com/", + "token_type": "Bearer" + } + "# + .to_string(), + ) + }); + + let credential = WorkloadIdentityOAuthProvider::new( + "client_id", + tokenfile.path().to_str().unwrap(), + tenant, + Some(endpoint.to_string()), + ); + + let token = credential + .fetch_token(&client, &retry_config) + .await + .unwrap(); + + assert_eq!( + token.token.as_ref(), + &AzureCredential::BearerToken("TOKEN".into()) + ); + } + + #[tokio::test] + async fn test_no_credentials() { + let server = MockServer::new().await; + + let endpoint = server.url(); + let store = MicrosoftAzureBuilder::new() + .with_account("test") + .with_container_name("test") + .with_allow_http(true) + .with_bearer_token_authorization("token") + .with_endpoint(endpoint.to_string()) + .with_skip_signature(true) + .build() + .unwrap(); + + server.push_fn(|req| { + assert_eq!(req.method(), &Method::GET); + assert!(req.headers().get("Authorization").is_none()); + Response::builder() + .status(StatusCode::NOT_FOUND) + .body("not found".to_string()) + .unwrap() + }); + + let path = Path::from("file.txt"); + match store.get(&path).await { + Err(crate::Error::NotFound { .. }) => {} + _ => { + panic!("unexpected response"); + } + } + } +} diff --git a/rust/object_store/src/azure/mod.rs b/rust/object_store/src/azure/mod.rs new file mode 100644 index 0000000000..f65bf9f3cf --- /dev/null +++ b/rust/object_store/src/azure/mod.rs @@ -0,0 +1,409 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store implementation for Azure blob storage +//! +//! ## Streaming uploads +//! +//! [ObjectStore::put_multipart] will upload data in blocks and write a blob from those blocks. +//! +//! Unused blocks will automatically be dropped after 7 days. +use crate::{ + multipart::{MultipartStore, PartId}, + path::Path, + signer::Signer, + GetOptions, GetResult, ListResult, MultipartId, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, +}; +use async_trait::async_trait; +use futures::stream::{BoxStream, StreamExt, TryStreamExt}; +use reqwest::Method; +use std::fmt::Debug; +use std::sync::Arc; +use std::time::Duration; +use url::Url; + +use crate::client::get::GetClientExt; +use crate::client::list::{ListClient, ListClientExt}; +use crate::client::CredentialProvider; +pub use credential::{authority_hosts, AzureAccessKey, AzureAuthorizer}; + +mod builder; +mod client; +mod credential; + +/// [`CredentialProvider`] for [`MicrosoftAzure`] +pub type AzureCredentialProvider = Arc>; +use crate::azure::client::AzureClient; +use crate::client::parts::Parts; +use crate::list::{PaginatedListOptions, PaginatedListResult, PaginatedListStore}; +pub use builder::{AzureConfigKey, MicrosoftAzureBuilder}; +pub use credential::AzureCredential; + +const STORE: &str = "MicrosoftAzure"; + +/// Interface for [Microsoft Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/). +#[derive(Debug)] +pub struct MicrosoftAzure { + client: Arc, +} + +impl MicrosoftAzure { + /// Returns the [`AzureCredentialProvider`] used by [`MicrosoftAzure`] + pub fn credentials(&self) -> &AzureCredentialProvider { + &self.client.config().credentials + } + + /// Create a full URL to the resource specified by `path` with this instance's configuration. + fn path_url(&self, path: &Path) -> Url { + self.client.config().path_url(path) + } +} + +impl std::fmt::Display for MicrosoftAzure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "MicrosoftAzure {{ account: {}, container: {} }}", + self.client.config().account, + self.client.config().container + ) + } +} + +#[async_trait] +impl ObjectStore for MicrosoftAzure { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.client.put_blob(location, payload, opts).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + Ok(Box::new(AzureMultiPartUpload { + part_idx: 0, + opts, + state: Arc::new(UploadState { + client: Arc::clone(&self.client), + location: location.clone(), + parts: Default::default(), + }), + })) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.client.get_opts(location, options).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.client.delete_request(location, &()).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.client.list(prefix) + } + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, Result>, + ) -> BoxStream<'a, Result> { + locations + .try_chunks(256) + .map(move |locations| async { + // Early return the error. We ignore the paths that have already been + // collected into the chunk. + let locations = locations.map_err(|e| e.1)?; + self.client + .bulk_delete_request(locations) + .await + .map(futures::stream::iter) + }) + .buffered(20) + .try_flatten() + .boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.client.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy_request(from, to, true).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy_request(from, to, false).await + } +} + +#[async_trait] +impl Signer for MicrosoftAzure { + /// Create a URL containing the relevant [Service SAS] query parameters that authorize a request + /// via `method` to the resource at `path` valid for the duration specified in `expires_in`. + /// + /// [Service SAS]: https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas + /// + /// # Example + /// + /// This example returns a URL that will enable a user to upload a file to + /// "some-folder/some-file.txt" in the next hour. + /// + /// ``` + /// # async fn example() -> Result<(), Box> { + /// # use object_store::{azure::MicrosoftAzureBuilder, path::Path, signer::Signer}; + /// # use reqwest::Method; + /// # use std::time::Duration; + /// # + /// let azure = MicrosoftAzureBuilder::new() + /// .with_account("my-account") + /// .with_access_key("my-access-key") + /// .with_container_name("my-container") + /// .build()?; + /// + /// let url = azure.signed_url( + /// Method::PUT, + /// &Path::from("some-folder/some-file.txt"), + /// Duration::from_secs(60 * 60) + /// ).await?; + /// # Ok(()) + /// # } + /// ``` + async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result { + let mut url = self.path_url(path); + let signer = self.client.signer(expires_in).await?; + signer.sign(&method, &mut url)?; + Ok(url) + } + + async fn signed_urls( + &self, + method: Method, + paths: &[Path], + expires_in: Duration, + ) -> Result> { + let mut urls = Vec::with_capacity(paths.len()); + let signer = self.client.signer(expires_in).await?; + for path in paths { + let mut url = self.path_url(path); + signer.sign(&method, &mut url)?; + urls.push(url); + } + Ok(urls) + } +} + +/// Relevant docs: +/// In Azure Blob Store, parts are "blocks" +/// put_multipart_part -> PUT block +/// complete -> PUT block list +/// abort -> No equivalent; blocks are simply dropped after 7 days +#[derive(Debug)] +struct AzureMultiPartUpload { + part_idx: usize, + state: Arc, + opts: PutMultipartOptions, +} + +#[derive(Debug)] +struct UploadState { + location: Path, + parts: Parts, + client: Arc, +} + +#[async_trait] +impl MultipartUpload for AzureMultiPartUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + let idx = self.part_idx; + self.part_idx += 1; + let state = Arc::clone(&self.state); + Box::pin(async move { + let part = state.client.put_block(&state.location, idx, data).await?; + state.parts.put(idx, part); + Ok(()) + }) + } + + async fn complete(&mut self) -> Result { + let parts = self.state.parts.finish(self.part_idx)?; + + self.state + .client + .put_block_list(&self.state.location, parts, std::mem::take(&mut self.opts)) + .await + } + + async fn abort(&mut self) -> Result<()> { + // Nothing to do + Ok(()) + } +} + +#[async_trait] +impl MultipartStore for MicrosoftAzure { + async fn create_multipart(&self, _: &Path) -> Result { + Ok(String::new()) + } + + async fn put_part( + &self, + path: &Path, + _: &MultipartId, + part_idx: usize, + data: PutPayload, + ) -> Result { + self.client.put_block(path, part_idx, data).await + } + + async fn complete_multipart( + &self, + path: &Path, + _: &MultipartId, + parts: Vec, + ) -> Result { + self.client + .put_block_list(path, parts, Default::default()) + .await + } + + async fn abort_multipart(&self, _: &Path, _: &MultipartId) -> Result<()> { + // There is no way to drop blocks that have been uploaded. Instead, they simply + // expire in 7 days. + Ok(()) + } +} + +#[async_trait] +impl PaginatedListStore for MicrosoftAzure { + async fn list_paginated( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + self.client.list_request(prefix, opts).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::integration::*; + use crate::tests::*; + use bytes::Bytes; + + #[tokio::test] + async fn azure_blob_test() { + maybe_skip_integration!(); + let integration = MicrosoftAzureBuilder::from_env().build().unwrap(); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + put_opts(&integration, true).await; + multipart(&integration, &integration).await; + multipart_race_condition(&integration, false).await; + multipart_out_of_order(&integration).await; + signing(&integration).await; + list_paginated(&integration, &integration).await; + + let validate = !integration.client.config().disable_tagging; + tagging( + Arc::new(MicrosoftAzure { + client: Arc::clone(&integration.client), + }), + validate, + |p| { + let client = Arc::clone(&integration.client); + async move { client.get_blob_tagging(&p).await } + }, + ) + .await; + + // Azurite doesn't support attributes properly + if !integration.client.config().is_emulator { + put_get_attributes(&integration).await; + } + } + + #[ignore = "Used for manual testing against a real storage account."] + #[tokio::test] + async fn test_user_delegation_key() { + let account = std::env::var("AZURE_ACCOUNT_NAME").unwrap(); + let container = std::env::var("AZURE_CONTAINER_NAME").unwrap(); + let client_id = std::env::var("AZURE_CLIENT_ID").unwrap(); + let client_secret = std::env::var("AZURE_CLIENT_SECRET").unwrap(); + let tenant_id = std::env::var("AZURE_TENANT_ID").unwrap(); + let integration = MicrosoftAzureBuilder::new() + .with_account(account) + .with_container_name(container) + .with_client_id(client_id) + .with_client_secret(client_secret) + .with_tenant_id(&tenant_id) + .build() + .unwrap(); + + let data = Bytes::from("hello world"); + let path = Path::from("file.txt"); + integration.put(&path, data.clone().into()).await.unwrap(); + + let signed = integration + .signed_url(Method::GET, &path, Duration::from_secs(60)) + .await + .unwrap(); + + let resp = reqwest::get(signed).await.unwrap(); + let loaded = resp.bytes().await.unwrap(); + + assert_eq!(data, loaded); + } + + #[test] + fn azure_test_config_get_value() { + let azure_client_id = "object_store:fake_access_key_id".to_string(); + let azure_storage_account_name = "object_store:fake_secret_key".to_string(); + let azure_storage_token = "object_store:fake_default_region".to_string(); + let builder = MicrosoftAzureBuilder::new() + .with_config(AzureConfigKey::ClientId, &azure_client_id) + .with_config(AzureConfigKey::AccountName, &azure_storage_account_name) + .with_config(AzureConfigKey::Token, &azure_storage_token); + + assert_eq!( + builder.get_config_value(&AzureConfigKey::ClientId).unwrap(), + azure_client_id + ); + assert_eq!( + builder + .get_config_value(&AzureConfigKey::AccountName) + .unwrap(), + azure_storage_account_name + ); + assert_eq!( + builder.get_config_value(&AzureConfigKey::Token).unwrap(), + azure_storage_token + ); + } +} diff --git a/rust/object_store/src/buffered.rs b/rust/object_store/src/buffered.rs new file mode 100644 index 0000000000..00bea050ed --- /dev/null +++ b/rust/object_store/src/buffered.rs @@ -0,0 +1,679 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utilities for performing tokio-style buffered IO + +use crate::path::Path; +use crate::{ + Attributes, Extensions, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, + PutPayloadMut, TagSet, WriteMultipart, +}; +use bytes::Bytes; +use futures::future::{BoxFuture, FutureExt}; +use futures::ready; +use std::cmp::Ordering; +use std::io::{Error, ErrorKind, SeekFrom}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; + +/// The default buffer size used by [`BufReader`] +pub const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024; + +/// An async-buffered reader compatible with the tokio IO traits +/// +/// Internally this maintains a buffer of the requested size, and uses [`ObjectStore::get_range`] +/// to populate its internal buffer once depleted. This buffer is cleared on seek. +/// +/// Whilst simple, this interface will typically be outperformed by the native [`ObjectStore`] +/// methods that better map to the network APIs. This is because most object stores have +/// very [high first-byte latencies], on the order of 100-200ms, and so avoiding unnecessary +/// round-trips is critical to throughput. +/// +/// Systems looking to sequentially scan a file should instead consider using [`ObjectStore::get`], +/// or [`ObjectStore::get_opts`], or [`ObjectStore::get_range`] to read a particular range. +/// +/// Systems looking to read multiple ranges of a file should instead consider using +/// [`ObjectStore::get_ranges`], which will optimise the vectored IO. +/// +/// [high first-byte latencies]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html +pub struct BufReader { + /// The object store to fetch data from + store: Arc, + /// The size of the object + size: u64, + /// The path to the object + path: Path, + /// The current position in the object + cursor: u64, + /// The number of bytes to read in a single request + capacity: usize, + /// The buffered data if any + buffer: Buffer, +} + +impl std::fmt::Debug for BufReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BufReader") + .field("path", &self.path) + .field("size", &self.size) + .field("capacity", &self.capacity) + .finish() + } +} + +enum Buffer { + Empty, + Pending(BoxFuture<'static, std::io::Result>), + Ready(Bytes), +} + +impl BufReader { + /// Create a new [`BufReader`] from the provided [`ObjectMeta`] and [`ObjectStore`] + pub fn new(store: Arc, meta: &ObjectMeta) -> Self { + Self::with_capacity(store, meta, DEFAULT_BUFFER_SIZE) + } + + /// Create a new [`BufReader`] from the provided [`ObjectMeta`], [`ObjectStore`], and `capacity` + pub fn with_capacity(store: Arc, meta: &ObjectMeta, capacity: usize) -> Self { + Self { + path: meta.location.clone(), + size: meta.size as _, + store, + capacity, + cursor: 0, + buffer: Buffer::Empty, + } + } + + fn poll_fill_buf_impl( + &mut self, + cx: &mut Context<'_>, + amnt: usize, + ) -> Poll> { + let buf = &mut self.buffer; + loop { + match buf { + Buffer::Empty => { + let store = Arc::clone(&self.store); + let path = self.path.clone(); + let start = self.cursor.min(self.size) as _; + let end = self.cursor.saturating_add(amnt as u64).min(self.size) as _; + + if start == end { + return Poll::Ready(Ok(&[])); + } + + *buf = Buffer::Pending(Box::pin(async move { + Ok(store.get_range(&path, start..end).await?) + })) + } + Buffer::Pending(fut) => match ready!(fut.poll_unpin(cx)) { + Ok(b) => *buf = Buffer::Ready(b), + Err(e) => return Poll::Ready(Err(e)), + }, + Buffer::Ready(r) => return Poll::Ready(Ok(r)), + } + } + } +} + +impl AsyncSeek for BufReader { + fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> std::io::Result<()> { + self.cursor = match position { + SeekFrom::Start(offset) => offset, + SeekFrom::End(offset) => checked_add_signed(self.size, offset).ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + format!( + "Seeking {offset} from end of {} byte file would result in overflow", + self.size + ), + ) + })?, + SeekFrom::Current(offset) => { + checked_add_signed(self.cursor, offset).ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + format!( + "Seeking {offset} from current offset of {} would result in overflow", + self.cursor + ), + ) + })? + } + }; + self.buffer = Buffer::Empty; + Ok(()) + } + + fn poll_complete(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(self.cursor)) + } +} + +impl AsyncRead for BufReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + out: &mut ReadBuf<'_>, + ) -> Poll> { + // Read the maximum of the internal buffer and `out` + let to_read = out.remaining().max(self.capacity); + let r = match ready!(self.poll_fill_buf_impl(cx, to_read)) { + Ok(buf) => { + let to_consume = out.remaining().min(buf.len()); + out.put_slice(&buf[..to_consume]); + self.consume(to_consume); + Ok(()) + } + Err(e) => Err(e), + }; + Poll::Ready(r) + } +} + +impl AsyncBufRead for BufReader { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let capacity = self.capacity; + self.get_mut().poll_fill_buf_impl(cx, capacity) + } + + fn consume(mut self: Pin<&mut Self>, amt: usize) { + match &mut self.buffer { + Buffer::Empty => assert_eq!(amt, 0, "cannot consume from empty buffer"), + Buffer::Ready(b) => match b.len().cmp(&amt) { + Ordering::Less => panic!("{amt} exceeds buffer sized of {}", b.len()), + Ordering::Greater => *b = b.slice(amt..), + Ordering::Equal => self.buffer = Buffer::Empty, + }, + Buffer::Pending(_) => panic!("cannot consume from pending buffer"), + } + self.cursor += amt as u64; + } +} + +/// An async buffered writer compatible with the tokio IO traits +/// +/// This writer adaptively uses [`ObjectStore::put`] or +/// [`ObjectStore::put_multipart`] depending on the amount of data that has +/// been written. +/// +/// Up to `capacity` bytes will be buffered in memory, and flushed on shutdown +/// using [`ObjectStore::put`]. If `capacity` is exceeded, data will instead be +/// streamed using [`ObjectStore::put_multipart`] +pub struct BufWriter { + capacity: usize, + max_concurrency: usize, + attributes: Option, + tags: Option, + extensions: Option, + state: BufWriterState, + store: Arc, +} + +impl std::fmt::Debug for BufWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BufWriter") + .field("capacity", &self.capacity) + .finish() + } +} + +enum BufWriterState { + /// Buffer up to capacity bytes + Buffer(Path, PutPayloadMut), + /// [`ObjectStore::put_multipart`] + Prepare(BoxFuture<'static, crate::Result>), + /// Write to a multipart upload + Write(Option), + /// [`ObjectStore::put`] + Flush(BoxFuture<'static, crate::Result<()>>), +} + +impl BufWriter { + /// Create a new [`BufWriter`] from the provided [`ObjectStore`] and [`Path`] + pub fn new(store: Arc, path: Path) -> Self { + Self::with_capacity(store, path, 10 * 1024 * 1024) + } + + /// Create a new [`BufWriter`] from the provided [`ObjectStore`], [`Path`] and `capacity` + pub fn with_capacity(store: Arc, path: Path, capacity: usize) -> Self { + Self { + capacity, + store, + max_concurrency: 8, + attributes: None, + tags: None, + extensions: None, + state: BufWriterState::Buffer(path, PutPayloadMut::new()), + } + } + + /// Override the maximum number of in-flight requests for this writer + /// + /// Defaults to 8 + pub fn with_max_concurrency(self, max_concurrency: usize) -> Self { + Self { + max_concurrency, + ..self + } + } + + /// Set the attributes of the uploaded object + pub fn with_attributes(self, attributes: Attributes) -> Self { + Self { + attributes: Some(attributes), + ..self + } + } + + /// Set the tags of the uploaded object + pub fn with_tags(self, tags: TagSet) -> Self { + Self { + tags: Some(tags), + ..self + } + } + + /// Set the extensions of the uploaded object + /// + /// Implementation-specific extensions. Intended for use by [`ObjectStore`] implementations + /// that need to pass context-specific information (like tracing spans) via trait methods. + /// + /// These extensions are ignored entirely by backends offered through this crate. + pub fn with_extensions(self, extensions: Extensions) -> Self { + Self { + extensions: Some(extensions), + ..self + } + } + + /// Write data to the writer in [`Bytes`]. + /// + /// Unlike [`AsyncWrite::poll_write`], `put` can write data without extra copying. + /// + /// This API is recommended while the data source generates [`Bytes`]. + pub async fn put(&mut self, bytes: Bytes) -> crate::Result<()> { + loop { + return match &mut self.state { + BufWriterState::Write(Some(write)) => { + write.wait_for_capacity(self.max_concurrency).await?; + write.put(bytes); + Ok(()) + } + BufWriterState::Write(None) | BufWriterState::Flush(_) => { + panic!("Already shut down") + } + // NOTE + // + // This case should never happen in practice, but rust async API does + // make it possible for users to call `put` before `poll_write` returns `Ready`. + // + // We allow such usage by `await` the future and continue the loop. + BufWriterState::Prepare(f) => { + self.state = BufWriterState::Write(f.await?.into()); + continue; + } + BufWriterState::Buffer(path, b) => { + if b.content_length().saturating_add(bytes.len()) < self.capacity { + b.push(bytes); + Ok(()) + } else { + let buffer = std::mem::take(b); + let path = std::mem::take(path); + let opts = PutMultipartOptions { + attributes: self.attributes.take().unwrap_or_default(), + tags: self.tags.take().unwrap_or_default(), + extensions: self.extensions.take().unwrap_or_default(), + }; + let upload = self.store.put_multipart_opts(&path, opts).await?; + let mut chunked = + WriteMultipart::new_with_chunk_size(upload, self.capacity); + for chunk in buffer.freeze() { + chunked.put(chunk); + } + chunked.put(bytes); + self.state = BufWriterState::Write(Some(chunked)); + Ok(()) + } + } + }; + } + } + + /// Abort this writer, cleaning up any partially uploaded state + /// + /// # Panic + /// + /// Panics if this writer has already been shutdown or aborted + pub async fn abort(&mut self) -> crate::Result<()> { + match &mut self.state { + BufWriterState::Buffer(_, _) | BufWriterState::Prepare(_) => Ok(()), + BufWriterState::Flush(_) => panic!("Already shut down"), + BufWriterState::Write(x) => x.take().unwrap().abort().await, + } + } +} + +impl AsyncWrite for BufWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let cap = self.capacity; + let max_concurrency = self.max_concurrency; + loop { + return match &mut self.state { + BufWriterState::Write(Some(write)) => { + ready!(write.poll_for_capacity(cx, max_concurrency))?; + write.write(buf); + Poll::Ready(Ok(buf.len())) + } + BufWriterState::Write(None) | BufWriterState::Flush(_) => { + panic!("Already shut down") + } + BufWriterState::Prepare(f) => { + self.state = BufWriterState::Write(ready!(f.poll_unpin(cx)?).into()); + continue; + } + BufWriterState::Buffer(path, b) => { + if b.content_length().saturating_add(buf.len()) >= cap { + let buffer = std::mem::take(b); + let path = std::mem::take(path); + let opts = PutMultipartOptions { + attributes: self.attributes.take().unwrap_or_default(), + tags: self.tags.take().unwrap_or_default(), + extensions: self.extensions.take().unwrap_or_default(), + }; + let store = Arc::clone(&self.store); + self.state = BufWriterState::Prepare(Box::pin(async move { + let upload = store.put_multipart_opts(&path, opts).await?; + let mut chunked = WriteMultipart::new_with_chunk_size(upload, cap); + for chunk in buffer.freeze() { + chunked.put(chunk); + } + Ok(chunked) + })); + continue; + } + b.extend_from_slice(buf); + Poll::Ready(Ok(buf.len())) + } + }; + } + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + loop { + return match &mut self.state { + BufWriterState::Write(_) | BufWriterState::Buffer(_, _) => Poll::Ready(Ok(())), + BufWriterState::Flush(_) => panic!("Already shut down"), + BufWriterState::Prepare(f) => { + self.state = BufWriterState::Write(ready!(f.poll_unpin(cx)?).into()); + continue; + } + }; + } + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + loop { + match &mut self.state { + BufWriterState::Prepare(f) => { + self.state = BufWriterState::Write(ready!(f.poll_unpin(cx)?).into()); + } + BufWriterState::Buffer(p, b) => { + let buf = std::mem::take(b); + let path = std::mem::take(p); + let opts = PutOptions { + attributes: self.attributes.take().unwrap_or_default(), + tags: self.tags.take().unwrap_or_default(), + ..Default::default() + }; + let store = Arc::clone(&self.store); + self.state = BufWriterState::Flush(Box::pin(async move { + store.put_opts(&path, buf.into(), opts).await?; + Ok(()) + })); + } + BufWriterState::Flush(f) => return f.poll_unpin(cx).map_err(std::io::Error::from), + BufWriterState::Write(x) => { + let upload = x.take().ok_or_else(|| { + std::io::Error::new( + ErrorKind::InvalidInput, + "Cannot shutdown a writer that has already been shut down", + ) + })?; + self.state = BufWriterState::Flush( + async move { + upload.finish().await?; + Ok(()) + } + .boxed(), + ) + } + } + } + } +} + +/// Port of standardised function as requires Rust 1.66 +/// +/// +#[inline] +fn checked_add_signed(a: u64, rhs: i64) -> Option { + let (res, overflowed) = a.overflowing_add(rhs as _); + let overflow = overflowed ^ (rhs < 0); + (!overflow).then_some(res) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memory::InMemory; + use crate::path::Path; + use crate::{Attribute, GetOptions}; + use itertools::Itertools; + use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; + + #[tokio::test] + async fn test_buf_reader() { + let store = Arc::new(InMemory::new()) as Arc; + + let existent = Path::from("exists.txt"); + const BYTES: usize = 4096; + + let data: Bytes = b"12345678".iter().cycle().copied().take(BYTES).collect(); + store.put(&existent, data.clone().into()).await.unwrap(); + + let meta = store.head(&existent).await.unwrap(); + + let mut reader = BufReader::new(Arc::clone(&store), &meta); + let mut out = Vec::with_capacity(BYTES); + let read = reader.read_to_end(&mut out).await.unwrap(); + + assert_eq!(read, BYTES); + assert_eq!(&out, &data); + + let err = reader.seek(SeekFrom::Current(i64::MIN)).await.unwrap_err(); + assert_eq!( + err.to_string(), + "Seeking -9223372036854775808 from current offset of 4096 would result in overflow" + ); + + reader.rewind().await.unwrap(); + + let err = reader.seek(SeekFrom::Current(-1)).await.unwrap_err(); + assert_eq!( + err.to_string(), + "Seeking -1 from current offset of 0 would result in overflow" + ); + + // Seeking beyond the bounds of the file is permitted but should return no data + reader.seek(SeekFrom::Start(u64::MAX)).await.unwrap(); + let buf = reader.fill_buf().await.unwrap(); + assert!(buf.is_empty()); + + let err = reader.seek(SeekFrom::Current(1)).await.unwrap_err(); + assert_eq!( + err.to_string(), + "Seeking 1 from current offset of 18446744073709551615 would result in overflow" + ); + + for capacity in [200, 1024, 4096, DEFAULT_BUFFER_SIZE] { + let store = Arc::clone(&store); + let mut reader = BufReader::with_capacity(store, &meta, capacity); + + let mut bytes_read = 0; + loop { + let buf = reader.fill_buf().await.unwrap(); + if buf.is_empty() { + assert_eq!(bytes_read, BYTES); + break; + } + assert!(buf.starts_with(b"12345678")); + bytes_read += 8; + reader.consume(8); + } + + let mut buf = Vec::with_capacity(76); + reader.seek(SeekFrom::Current(-76)).await.unwrap(); + reader.read_to_end(&mut buf).await.unwrap(); + assert_eq!(&buf, &data[BYTES - 76..]); + + reader.rewind().await.unwrap(); + let buffer = reader.fill_buf().await.unwrap(); + assert_eq!(buffer, &data[..capacity.min(BYTES)]); + + reader.seek(SeekFrom::Start(325)).await.unwrap(); + let buffer = reader.fill_buf().await.unwrap(); + assert_eq!(buffer, &data[325..(325 + capacity).min(BYTES)]); + + reader.seek(SeekFrom::End(0)).await.unwrap(); + let buffer = reader.fill_buf().await.unwrap(); + assert!(buffer.is_empty()); + } + } + + // Note: `BufWriter::with_tags` functionality is tested in `crate::tests::tagging` + #[tokio::test] + async fn test_buf_writer() { + let store = Arc::new(InMemory::new()) as Arc; + let path = Path::from("file.txt"); + let attributes = Attributes::from_iter([ + (Attribute::ContentType, "text/html"), + (Attribute::CacheControl, "max-age=604800"), + ]); + + // Test put + let mut writer = BufWriter::with_capacity(Arc::clone(&store), path.clone(), 30) + .with_attributes(attributes.clone()); + writer.write_all(&[0; 20]).await.unwrap(); + writer.flush().await.unwrap(); + writer.write_all(&[0; 5]).await.unwrap(); + writer.shutdown().await.unwrap(); + let response = store + .get_opts( + &path, + GetOptions { + head: true, + ..Default::default() + }, + ) + .await + .unwrap(); + assert_eq!(response.meta.size, 25); + assert_eq!(response.attributes, attributes); + + // Test multipart + let mut writer = BufWriter::with_capacity(Arc::clone(&store), path.clone(), 30) + .with_attributes(attributes.clone()); + writer.write_all(&[0; 20]).await.unwrap(); + writer.flush().await.unwrap(); + writer.write_all(&[0; 20]).await.unwrap(); + writer.shutdown().await.unwrap(); + let response = store + .get_opts( + &path, + GetOptions { + head: true, + ..Default::default() + }, + ) + .await + .unwrap(); + assert_eq!(response.meta.size, 40); + assert_eq!(response.attributes, attributes); + } + + #[tokio::test] + async fn test_buf_writer_with_put() { + let store = Arc::new(InMemory::new()) as Arc; + let path = Path::from("file.txt"); + + // Test put + let mut writer = BufWriter::with_capacity(Arc::clone(&store), path.clone(), 30); + writer + .put(Bytes::from((0..20).collect_vec())) + .await + .unwrap(); + writer + .put(Bytes::from((20..25).collect_vec())) + .await + .unwrap(); + writer.shutdown().await.unwrap(); + let response = store + .get_opts( + &path, + GetOptions { + head: true, + ..Default::default() + }, + ) + .await + .unwrap(); + assert_eq!(response.meta.size, 25); + assert_eq!(response.bytes().await.unwrap(), (0..25).collect_vec()); + + // Test multipart + let mut writer = BufWriter::with_capacity(Arc::clone(&store), path.clone(), 30); + writer + .put(Bytes::from((0..20).collect_vec())) + .await + .unwrap(); + writer + .put(Bytes::from((20..40).collect_vec())) + .await + .unwrap(); + writer.shutdown().await.unwrap(); + let response = store + .get_opts( + &path, + GetOptions { + head: true, + ..Default::default() + }, + ) + .await + .unwrap(); + assert_eq!(response.meta.size, 40); + assert_eq!(response.bytes().await.unwrap(), (0..40).collect_vec()); + } +} diff --git a/rust/object_store/src/chunked.rs b/rust/object_store/src/chunked.rs new file mode 100644 index 0000000000..8af3b2c42f --- /dev/null +++ b/rust/object_store/src/chunked.rs @@ -0,0 +1,236 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A [`ChunkedStore`] that can be used to test streaming behaviour + +use std::fmt::{Debug, Display, Formatter}; +use std::ops::Range; +use std::sync::Arc; + +use async_trait::async_trait; +use bytes::{BufMut, Bytes, BytesMut}; +use futures::stream::BoxStream; +use futures::StreamExt; + +use crate::path::Path; +use crate::{ + GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutResult, +}; +use crate::{PutPayload, Result}; + +/// Wraps a [`ObjectStore`] and makes its get response return chunks +/// in a controllable manner. +/// +/// A `ChunkedStore` makes the memory consumption and performance of +/// the wrapped [`ObjectStore`] worse. It is intended for use within +/// tests, to control the chunks in the produced output streams. For +/// example, it is used to verify the delimiting logic in +/// newline_delimited_stream. +#[derive(Debug)] +pub struct ChunkedStore { + inner: Arc, + chunk_size: usize, // chunks are in memory, so we use usize not u64 +} + +impl ChunkedStore { + /// Creates a new [`ChunkedStore`] with the specified chunk_size + pub fn new(inner: Arc, chunk_size: usize) -> Self { + Self { inner, chunk_size } + } +} + +impl Display for ChunkedStore { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "ChunkedStore({})", self.inner) + } +} + +#[async_trait] +impl ObjectStore for ChunkedStore { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.inner.put_opts(location, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + self.inner.put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + self.inner.put_multipart_opts(location, opts).await + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let r = self.inner.get_opts(location, options).await?; + let stream = match r.payload { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + GetResultPayload::File(file, path) => { + crate::local::chunked_stream(file, path, r.range.clone(), self.chunk_size) + } + GetResultPayload::Stream(stream) => { + let buffer = BytesMut::new(); + futures::stream::unfold( + (stream, buffer, false, self.chunk_size), + |(mut stream, mut buffer, mut exhausted, chunk_size)| async move { + // Keep accumulating bytes until we reach capacity as long as + // the stream can provide them: + if exhausted { + return None; + } + while buffer.len() < chunk_size { + match stream.next().await { + None => { + exhausted = true; + let slice = buffer.split_off(0).freeze(); + return Some(( + Ok(slice), + (stream, buffer, exhausted, chunk_size), + )); + } + Some(Ok(bytes)) => { + buffer.put(bytes); + } + Some(Err(e)) => { + return Some(( + Err(crate::Error::Generic { + store: "ChunkedStore", + source: Box::new(e), + }), + (stream, buffer, exhausted, chunk_size), + )) + } + }; + } + // Return the chunked values as the next value in the stream + let slice = buffer.split_to(chunk_size).freeze(); + Some((Ok(slice), (stream, buffer, exhausted, chunk_size))) + }, + ) + .boxed() + } + }; + Ok(GetResult { + payload: GetResultPayload::Stream(stream), + ..r + }) + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + self.inner.get_range(location, range).await + } + + async fn head(&self, location: &Path) -> Result { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.inner.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + self.inner.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy_if_not_exists(from, to).await + } +} + +#[cfg(test)] +mod tests { + use futures::StreamExt; + + #[cfg(feature = "fs")] + use crate::integration::*; + #[cfg(feature = "fs")] + use crate::local::LocalFileSystem; + use crate::memory::InMemory; + use crate::path::Path; + + use super::*; + + #[tokio::test] + async fn test_chunked_basic() { + let location = Path::parse("test").unwrap(); + let store: Arc = Arc::new(InMemory::new()); + store.put(&location, vec![0; 1001].into()).await.unwrap(); + + for chunk_size in [10, 20, 31] { + let store = ChunkedStore::new(Arc::clone(&store), chunk_size); + let mut s = match store.get(&location).await.unwrap().payload { + GetResultPayload::Stream(s) => s, + _ => unreachable!(), + }; + + let mut remaining = 1001; + while let Some(next) = s.next().await { + let size = next.unwrap().len() as u64; + let expected = remaining.min(chunk_size as u64); + assert_eq!(size, expected); + remaining -= expected; + } + assert_eq!(remaining, 0); + } + } + + #[cfg(feature = "fs")] + #[tokio::test] + async fn test_chunked() { + let temporary = tempfile::tempdir().unwrap(); + let integrations: &[Arc] = &[ + Arc::new(InMemory::new()), + Arc::new(LocalFileSystem::new_with_prefix(temporary.path()).unwrap()), + ]; + + for integration in integrations { + let integration = ChunkedStore::new(Arc::clone(integration), 100); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + } + } +} diff --git a/rust/object_store/src/client/backoff.rs b/rust/object_store/src/client/backoff.rs new file mode 100644 index 0000000000..e1160d6017 --- /dev/null +++ b/rust/object_store/src/client/backoff.rs @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use rand::{prelude::*, rng}; +use std::time::Duration; + +/// Exponential backoff with decorrelated jitter algorithm +/// +/// The first backoff will always be `init_backoff`. +/// +/// Subsequent backoffs will pick a random value between `init_backoff` and +/// `base * previous` where `previous` is the duration of the previous backoff +/// +/// See +#[allow(missing_copy_implementations)] +#[derive(Debug, Clone)] +pub struct BackoffConfig { + /// The initial backoff duration + pub init_backoff: Duration, + /// The maximum backoff duration + pub max_backoff: Duration, + /// The multiplier to use for the next backoff duration + pub base: f64, +} + +impl Default for BackoffConfig { + fn default() -> Self { + Self { + init_backoff: Duration::from_millis(100), + max_backoff: Duration::from_secs(15), + base: 2., + } + } +} + +/// [`Backoff`] can be created from a [`BackoffConfig`] +/// +/// Consecutive calls to [`Backoff::next`] will return the next backoff interval +/// +pub(crate) struct Backoff { + init_backoff: f64, + next_backoff_secs: f64, + max_backoff_secs: f64, + base: f64, + rng: Option>, +} + +impl std::fmt::Debug for Backoff { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Backoff") + .field("init_backoff", &self.init_backoff) + .field("next_backoff_secs", &self.next_backoff_secs) + .field("max_backoff_secs", &self.max_backoff_secs) + .field("base", &self.base) + .finish() + } +} + +impl Backoff { + /// Create a new [`Backoff`] from the provided [`BackoffConfig`] + pub(crate) fn new(config: &BackoffConfig) -> Self { + Self::new_with_rng(config, None) + } + + /// Creates a new `Backoff` with the optional `rng` + /// + /// Used [`rand::rng()`] if no rng provided + pub(crate) fn new_with_rng( + config: &BackoffConfig, + rng: Option>, + ) -> Self { + let init_backoff = config.init_backoff.as_secs_f64(); + Self { + init_backoff, + next_backoff_secs: init_backoff, + max_backoff_secs: config.max_backoff.as_secs_f64(), + base: config.base, + rng, + } + } + + /// Returns the next backoff duration to wait for + pub(crate) fn next(&mut self) -> Duration { + let range = self.init_backoff..(self.next_backoff_secs * self.base); + + let rand_backoff = match self.rng.as_mut() { + Some(rng) => rng.random_range(range), + None => rng().random_range(range), + }; + + let next_backoff = self.max_backoff_secs.min(rand_backoff); + Duration::from_secs_f64(std::mem::replace(&mut self.next_backoff_secs, next_backoff)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::rand_core::impls::fill_bytes_via_next; + + struct FixedRng(u64); + + impl RngCore for FixedRng { + fn next_u32(&mut self) -> u32 { + self.0 as _ + } + + fn next_u64(&mut self) -> u64 { + self.0 + } + + fn fill_bytes(&mut self, dst: &mut [u8]) { + fill_bytes_via_next(self, dst) + } + } + + #[test] + fn test_backoff() { + let init_backoff_secs = 1.; + let max_backoff_secs = 500.; + let base = 3.; + + let config = BackoffConfig { + init_backoff: Duration::from_secs_f64(init_backoff_secs), + max_backoff: Duration::from_secs_f64(max_backoff_secs), + base, + }; + + let assert_fuzzy_eq = |a: f64, b: f64| assert!((b - a).abs() < 0.0001, "{a} != {b}"); + + // Create a static rng that takes the minimum of the range + let rng = Box::new(FixedRng(0)); + let mut backoff = Backoff::new_with_rng(&config, Some(rng)); + + for _ in 0..20 { + assert_eq!(backoff.next().as_secs_f64(), init_backoff_secs); + } + + // Create a static rng that takes the maximum of the range + let rng = Box::new(FixedRng(u64::MAX)); + let mut backoff = Backoff::new_with_rng(&config, Some(rng)); + + for i in 0..20 { + let value = (base.powi(i) * init_backoff_secs).min(max_backoff_secs); + assert_fuzzy_eq(backoff.next().as_secs_f64(), value); + } + + // Create a static rng that takes the mid point of the range + let rng = Box::new(FixedRng(u64::MAX / 2)); + let mut backoff = Backoff::new_with_rng(&config, Some(rng)); + + let mut value = init_backoff_secs; + for _ in 0..20 { + assert_fuzzy_eq(backoff.next().as_secs_f64(), value); + value = + (init_backoff_secs + (value * base - init_backoff_secs) / 2.).min(max_backoff_secs); + } + } +} diff --git a/rust/object_store/src/client/builder.rs b/rust/object_store/src/client/builder.rs new file mode 100644 index 0000000000..257cb57060 --- /dev/null +++ b/rust/object_store/src/client/builder.rs @@ -0,0 +1,329 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::{HttpClient, HttpError, HttpErrorKind, HttpRequest, HttpRequestBody}; +use http::header::{InvalidHeaderName, InvalidHeaderValue}; +use http::uri::InvalidUri; +use http::{HeaderName, HeaderValue, Method, Uri}; + +#[derive(Debug, thiserror::Error)] +pub(crate) enum RequestBuilderError { + #[error("Invalid URI")] + InvalidUri(#[from] InvalidUri), + + #[error("Invalid Header Value")] + InvalidHeaderValue(#[from] InvalidHeaderValue), + + #[error("Invalid Header Name")] + InvalidHeaderName(#[from] InvalidHeaderName), + + #[error("JSON serialization error")] + SerdeJson(#[from] serde_json::Error), + + #[error("URL serialization error")] + SerdeUrl(#[from] serde_urlencoded::ser::Error), +} + +impl From for HttpError { + fn from(value: RequestBuilderError) -> Self { + Self::new(HttpErrorKind::Request, value) + } +} + +impl From for RequestBuilderError { + fn from(value: std::convert::Infallible) -> Self { + match value {} + } +} + +pub(crate) struct HttpRequestBuilder { + client: HttpClient, + request: Result, +} + +impl HttpRequestBuilder { + pub(crate) fn new(client: HttpClient) -> Self { + Self { + client, + request: Ok(HttpRequest::new(HttpRequestBody::empty())), + } + } + + #[cfg(any(feature = "aws", feature = "azure"))] + pub(crate) fn from_parts(client: HttpClient, request: HttpRequest) -> Self { + Self { + client, + request: Ok(request), + } + } + + pub(crate) fn method(mut self, method: Method) -> Self { + if let Ok(r) = &mut self.request { + *r.method_mut() = method; + } + self + } + + pub(crate) fn uri(mut self, url: U) -> Self + where + U: TryInto, + U::Error: Into, + { + match (url.try_into(), &mut self.request) { + (Ok(uri), Ok(r)) => *r.uri_mut() = uri, + (Err(e), Ok(_)) => self.request = Err(e.into()), + (_, Err(_)) => {} + } + self + } + + pub(crate) fn extensions(mut self, extensions: ::http::Extensions) -> Self { + if let Ok(r) = &mut self.request { + *r.extensions_mut() = extensions; + } + self + } + + pub(crate) fn header(mut self, name: K, value: V) -> Self + where + K: TryInto, + K::Error: Into, + V: TryInto, + V::Error: Into, + { + match (name.try_into(), value.try_into(), &mut self.request) { + (Ok(name), Ok(value), Ok(r)) => { + r.headers_mut().insert(name, value); + } + (Err(e), _, Ok(_)) => self.request = Err(e.into()), + (_, Err(e), Ok(_)) => self.request = Err(e.into()), + (_, _, Err(_)) => {} + } + self + } + + #[cfg(feature = "aws")] + pub(crate) fn headers(mut self, headers: http::HeaderMap) -> Self { + use http::header::{Entry, OccupiedEntry}; + + if let Ok(ref mut req) = self.request { + // IntoIter of HeaderMap yields (Option, HeaderValue). + // The first time a name is yielded, it will be Some(name), and if + // there are more values with the same name, the next yield will be + // None. + + let mut prev_entry: Option> = None; + for (key, value) in headers { + match key { + Some(key) => match req.headers_mut().entry(key) { + Entry::Occupied(mut e) => { + e.insert(value); + prev_entry = Some(e); + } + Entry::Vacant(e) => { + let e = e.insert_entry(value); + prev_entry = Some(e); + } + }, + None => match prev_entry { + Some(ref mut entry) => { + entry.append(value); + } + None => unreachable!("HeaderMap::into_iter yielded None first"), + }, + } + } + } + self + } + + #[cfg(feature = "gcp")] + pub(crate) fn bearer_auth(mut self, token: &str) -> Self { + let value = HeaderValue::try_from(format!("Bearer {token}")); + match (value, &mut self.request) { + (Ok(mut v), Ok(r)) => { + v.set_sensitive(true); + r.headers_mut().insert(http::header::AUTHORIZATION, v); + } + (Err(e), Ok(_)) => self.request = Err(e.into()), + (_, Err(_)) => {} + } + self + } + + #[cfg(any(feature = "aws", feature = "gcp"))] + pub(crate) fn json(mut self, s: S) -> Self { + match (serde_json::to_vec(&s), &mut self.request) { + (Ok(json), Ok(request)) => { + *request.body_mut() = json.into(); + } + (Err(e), Ok(_)) => self.request = Err(e.into()), + (_, Err(_)) => {} + } + self + } + + #[cfg(any(test, feature = "aws", feature = "gcp", feature = "azure"))] + pub(crate) fn query(mut self, query: &T) -> Self { + let mut error = None; + if let Ok(ref mut req) = self.request { + let mut out = format!("{}?", req.uri().path()); + let start_position = out.len(); + let mut encoder = form_urlencoded::Serializer::for_suffix(&mut out, start_position); + let serializer = serde_urlencoded::Serializer::new(&mut encoder); + + if let Err(err) = query.serialize(serializer) { + error = Some(err.into()); + } + + match http::uri::PathAndQuery::from_maybe_shared(out) { + Ok(p) => { + let mut parts = req.uri().clone().into_parts(); + parts.path_and_query = Some(p); + *req.uri_mut() = Uri::from_parts(parts).unwrap(); + } + Err(err) => error = Some(err.into()), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + #[cfg(any(feature = "gcp", feature = "azure"))] + pub(crate) fn form(mut self, form: T) -> Self { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_urlencoded::to_string(form) { + Ok(body) => { + req.headers_mut().insert( + http::header::CONTENT_TYPE, + HeaderValue::from_static("application/x-www-form-urlencoded"), + ); + *req.body_mut() = body.into(); + } + Err(err) => error = Some(err.into()), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] + pub(crate) fn body(mut self, b: impl Into) -> Self { + if let Ok(r) = &mut self.request { + *r.body_mut() = b.into(); + } + self + } + + pub(crate) fn into_parts(self) -> (HttpClient, Result) { + (self.client, self.request) + } +} + +#[cfg(any(test, feature = "azure"))] +pub(crate) fn add_query_pairs(uri: &mut Uri, query_pairs: I) +where + I: IntoIterator, + I::Item: std::borrow::Borrow<(K, V)>, + K: AsRef, + V: AsRef, +{ + let mut parts = uri.clone().into_parts(); + + let mut out = match parts.path_and_query { + Some(p) => match p.query() { + Some(query) => format!("{}?{}", p.path(), query), + None => format!("{}?", p.path()), + }, + None => "/?".to_string(), + }; + let mut serializer = if out.ends_with('?') { + let start_position = out.len(); + form_urlencoded::Serializer::for_suffix(&mut out, start_position) + } else { + form_urlencoded::Serializer::new(&mut out) + }; + + serializer.extend_pairs(query_pairs); + + parts.path_and_query = Some(out.try_into().unwrap()); + *uri = Uri::from_parts(parts).unwrap(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_query_pairs() { + let mut uri = Uri::from_static("https://foo@example.com/bananas"); + + add_query_pairs(&mut uri, [("foo", "1")]); + assert_eq!(uri.to_string(), "https://foo@example.com/bananas?foo=1"); + + add_query_pairs(&mut uri, [("bingo", "foo"), ("auth", "test")]); + assert_eq!( + uri.to_string(), + "https://foo@example.com/bananas?foo=1&bingo=foo&auth=test" + ); + + add_query_pairs(&mut uri, [("t1", "funky shenanigans"), ("a", "😀")]); + assert_eq!( + uri.to_string(), + "https://foo@example.com/bananas?foo=1&bingo=foo&auth=test&t1=funky+shenanigans&a=%F0%9F%98%80" + ); + } + + #[test] + fn test_add_query_pairs_no_path() { + let mut uri = Uri::from_static("https://foo@example.com"); + add_query_pairs(&mut uri, [("foo", "1")]); + assert_eq!(uri.to_string(), "https://foo@example.com/?foo=1"); + } + + #[test] + fn test_request_builder_query() { + let client = HttpClient::new(reqwest::Client::new()); + assert_request_uri( + HttpRequestBuilder::new(client.clone()).uri("http://example.com/bananas"), + "http://example.com/bananas", + ); + + assert_request_uri( + HttpRequestBuilder::new(client.clone()) + .uri("http://example.com/bananas") + .query(&[("foo", "1")]), + "http://example.com/bananas?foo=1", + ); + + assert_request_uri( + HttpRequestBuilder::new(client.clone()) + .uri("http://example.com") + .query(&[("foo", "1")]), + "http://example.com/?foo=1", + ); + } + + fn assert_request_uri(builder: HttpRequestBuilder, expected: &str) { + assert_eq!(builder.into_parts().1.unwrap().uri().to_string(), expected) + } +} diff --git a/rust/object_store/src/client/dns.rs b/rust/object_store/src/client/dns.rs new file mode 100644 index 0000000000..32e9291bac --- /dev/null +++ b/rust/object_store/src/client/dns.rs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::net::ToSocketAddrs; + +use rand::prelude::SliceRandom; +use reqwest::dns::{Addrs, Name, Resolve, Resolving}; +use tokio::task::JoinSet; + +type DynErr = Box; + +#[derive(Debug)] +pub(crate) struct ShuffleResolver; + +impl Resolve for ShuffleResolver { + fn resolve(&self, name: Name) -> Resolving { + Box::pin(async move { + // use `JoinSet` to propagate cancelation + let mut tasks = JoinSet::new(); + tasks.spawn_blocking(move || { + let it = (name.as_str(), 0).to_socket_addrs()?; + let mut addrs = it.collect::>(); + + addrs.shuffle(&mut rand::rng()); + + Ok(Box::new(addrs.into_iter()) as Addrs) + }); + + tasks + .join_next() + .await + .expect("spawned on task") + .map_err(|err| Box::new(err) as DynErr)? + }) + } +} diff --git a/rust/object_store/src/client/get.rs b/rust/object_store/src/client/get.rs new file mode 100644 index 0000000000..51d4e1bf29 --- /dev/null +++ b/rust/object_store/src/client/get.rs @@ -0,0 +1,698 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::header::{get_etag, header_meta, HeaderConfig}; +use crate::client::retry::RetryContext; +use crate::client::{HttpResponse, HttpResponseBody}; +use crate::path::Path; +use crate::{ + Attribute, Attributes, GetOptions, GetRange, GetResult, GetResultPayload, ObjectMeta, Result, + RetryConfig, +}; +use async_trait::async_trait; +use bytes::Bytes; +use futures::stream::BoxStream; +use futures::StreamExt; +use http::header::{ + CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_RANGE, + CONTENT_TYPE, +}; +use http::StatusCode; +use http_body_util::BodyExt; +use reqwest::header::ToStrError; +use std::ops::Range; +use std::sync::Arc; +use tracing::info; + +/// A client that can perform a get request +#[async_trait] +pub(crate) trait GetClient: Send + Sync + 'static { + const STORE: &'static str; + + /// Configure the [`HeaderConfig`] for this client + const HEADER_CONFIG: HeaderConfig; + + fn retry_config(&self) -> &RetryConfig; + + async fn get_request( + &self, + ctx: &mut RetryContext, + path: &Path, + options: GetOptions, + ) -> Result; +} + +/// Extension trait for [`GetClient`] that adds common retrieval functionality +#[async_trait] +pub(crate) trait GetClientExt { + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result; +} + +#[async_trait] +impl GetClientExt for Arc { + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let ctx = GetContext { + location: location.clone(), + options, + client: Self::clone(self), + retry_ctx: RetryContext::new(self.retry_config()), + }; + + ctx.get_result().await + } +} + +struct ContentRange { + /// The range of the object returned + range: Range, + /// The total size of the object being requested + size: u64, +} + +impl ContentRange { + /// Parse a content range of the form `bytes -/` + /// + /// + fn from_str(s: &str) -> Option { + let rem = s.trim().strip_prefix("bytes ")?; + let (range, size) = rem.split_once('/')?; + let size = size.parse().ok()?; + + let (start_s, end_s) = range.split_once('-')?; + + let start = start_s.parse().ok()?; + let end: u64 = end_s.parse().ok()?; + + Some(Self { + size, + range: start..end + 1, + }) + } +} + +/// A specialized `Error` for get-related errors +#[derive(Debug, thiserror::Error)] +enum GetResultError { + #[error(transparent)] + Header { + #[from] + source: crate::client::header::Error, + }, + + #[error(transparent)] + InvalidRangeRequest { + #[from] + source: crate::util::InvalidGetRange, + }, + + #[error("Received non-partial response when range requested")] + NotPartial, + + #[error("Content-Range header not present in partial response")] + NoContentRange, + + #[error("Failed to parse value for CONTENT_RANGE header: \"{value}\"")] + ParseContentRange { value: String }, + + #[error("Content-Range header contained non UTF-8 characters")] + InvalidContentRange { source: ToStrError }, + + #[error("Cache-Control header contained non UTF-8 characters")] + InvalidCacheControl { source: ToStrError }, + + #[error("Content-Disposition header contained non UTF-8 characters")] + InvalidContentDisposition { source: ToStrError }, + + #[error("Content-Encoding header contained non UTF-8 characters")] + InvalidContentEncoding { source: ToStrError }, + + #[error("Content-Language header contained non UTF-8 characters")] + InvalidContentLanguage { source: ToStrError }, + + #[error("Content-Type header contained non UTF-8 characters")] + InvalidContentType { source: ToStrError }, + + #[error("Metadata value for \"{key:?}\" contained non UTF-8 characters")] + InvalidMetadata { key: String }, + + #[error("Requested {expected:?}, got {actual:?}")] + UnexpectedRange { + expected: Range, + actual: Range, + }, +} + +/// Retry context for a streaming get request +struct GetContext { + client: Arc, + location: Path, + options: GetOptions, + retry_ctx: RetryContext, +} + +impl GetContext { + async fn get_result(mut self) -> Result { + if let Some(r) = &self.options.range { + r.is_valid().map_err(Self::err)?; + } + + let request = self + .client + .get_request(&mut self.retry_ctx, &self.location, self.options.clone()) + .await?; + + let (parts, body) = request.into_parts(); + let (range, meta) = get_range_meta( + T::HEADER_CONFIG, + &self.location, + self.options.range.as_ref(), + &parts, + ) + .map_err(Self::err)?; + + let attributes = get_attributes(T::HEADER_CONFIG, &parts.headers).map_err(Self::err)?; + let stream = self.retry_stream(body, meta.e_tag.clone(), range.clone()); + + Ok(GetResult { + payload: GetResultPayload::Stream(stream), + meta, + range, + attributes, + }) + } + + fn retry_stream( + self, + body: HttpResponseBody, + etag: Option, + range: Range, + ) -> BoxStream<'static, Result> { + futures::stream::try_unfold( + (self, body, etag, range), + |(mut ctx, mut body, etag, mut range)| async move { + while let Some(ret) = body.frame().await { + match (ret, &etag) { + (Ok(frame), _) => match frame.into_data() { + Ok(bytes) => { + range.start += bytes.len() as u64; + return Ok(Some((bytes, (ctx, body, etag, range)))); + } + Err(_) => continue, // Isn't data frame + }, + // Retry all response body errors + (Err(e), Some(etag)) if !ctx.retry_ctx.exhausted() => { + let sleep = ctx.retry_ctx.backoff(); + info!( + "Encountered error while reading response body: {}. Retrying in {}s", + e, + sleep.as_secs_f32() + ); + + tokio::time::sleep(sleep).await; + + let options = GetOptions { + range: Some(GetRange::Bounded(range.clone())), + ..ctx.options.clone() + }; + + // Note: this will potentially retry internally if applicable + let request = ctx + .client + .get_request(&mut ctx.retry_ctx, &ctx.location, options) + .await + .map_err(Self::err)?; + + let (parts, retry_body) = request.into_parts(); + let retry_etag = get_etag(&parts.headers).map_err(Self::err)?; + + if etag != &retry_etag { + // Return the original error + return Err(Self::err(e)); + } + + body = retry_body; + } + (Err(e), _) => return Err(Self::err(e)), + } + } + Ok(None) + }, + ) + .boxed() + } + + fn err(e: E) -> crate::Error { + crate::Error::Generic { + store: T::STORE, + source: Box::new(e), + } + } +} + +fn get_range_meta( + cfg: HeaderConfig, + location: &Path, + range: Option<&GetRange>, + response: &http::response::Parts, +) -> Result<(Range, ObjectMeta), GetResultError> { + let mut meta = header_meta(location, &response.headers, cfg)?; + let range = if let Some(expected) = range { + if response.status != StatusCode::PARTIAL_CONTENT { + return Err(GetResultError::NotPartial); + } + + let value = parse_range(&response.headers)?; + let actual = value.range; + + // Update size to reflect the full size of the object (#5272) + meta.size = value.size; + + let expected = expected.as_range(meta.size)?; + if actual != expected { + return Err(GetResultError::UnexpectedRange { expected, actual }); + } + + actual + } else { + 0..meta.size + }; + + Ok((range, meta)) +} + +/// Extracts the [CONTENT_RANGE] header +fn parse_range(headers: &http::HeaderMap) -> Result { + let val = headers + .get(CONTENT_RANGE) + .ok_or(GetResultError::NoContentRange)?; + + let value = val + .to_str() + .map_err(|source| GetResultError::InvalidContentRange { source })?; + + ContentRange::from_str(value).ok_or_else(|| { + let value = value.into(); + GetResultError::ParseContentRange { value } + }) +} + +/// Extracts [`Attributes`] from the response headers +fn get_attributes( + cfg: HeaderConfig, + headers: &http::HeaderMap, +) -> Result { + macro_rules! parse_attributes { + ($headers:expr, $(($header:expr, $attr:expr, $map_err:expr)),*) => {{ + let mut attributes = Attributes::new(); + $( + if let Some(x) = $headers.get($header) { + let x = x.to_str().map_err($map_err)?; + attributes.insert($attr, x.to_string().into()); + } + )* + attributes + }} + } + + let mut attributes = parse_attributes!( + headers, + (CACHE_CONTROL, Attribute::CacheControl, |source| { + GetResultError::InvalidCacheControl { source } + }), + ( + CONTENT_DISPOSITION, + Attribute::ContentDisposition, + |source| GetResultError::InvalidContentDisposition { source } + ), + (CONTENT_ENCODING, Attribute::ContentEncoding, |source| { + GetResultError::InvalidContentEncoding { source } + }), + (CONTENT_LANGUAGE, Attribute::ContentLanguage, |source| { + GetResultError::InvalidContentLanguage { source } + }), + (CONTENT_TYPE, Attribute::ContentType, |source| { + GetResultError::InvalidContentType { source } + }) + ); + + // Add attributes that match the user-defined metadata prefix (e.g. x-amz-meta-) + if let Some(prefix) = cfg.user_defined_metadata_prefix { + for (key, val) in headers { + if let Some(suffix) = key.as_str().strip_prefix(prefix) { + if let Ok(val_str) = val.to_str() { + attributes.insert( + Attribute::Metadata(suffix.to_string().into()), + val_str.to_string().into(), + ); + } else { + return Err(GetResultError::InvalidMetadata { + key: key.to_string(), + }); + } + } + } + } + Ok(attributes) +} + +#[cfg(test)] +mod tests { + use super::*; + use http::header::*; + + fn make_response( + object_size: usize, + status: StatusCode, + content_range: Option<&str>, + headers: Option>, + ) -> http::response::Parts { + let mut builder = http::Response::builder(); + if let Some(range) = content_range { + builder = builder.header(CONTENT_RANGE, range); + } + + if let Some(headers) = headers { + for (key, value) in headers { + builder = builder.header(key, value); + } + } + + builder + .status(status) + .header(CONTENT_LENGTH, object_size) + .body(()) + .unwrap() + .into_parts() + .0 + } + + const CFG: HeaderConfig = HeaderConfig { + etag_required: false, + last_modified_required: false, + version_header: None, + user_defined_metadata_prefix: Some("x-test-meta-"), + }; + + #[tokio::test] + async fn test_get_range_meta() { + let path = Path::from("test"); + + let resp = make_response(12, StatusCode::OK, None, None); + let (range, meta) = get_range_meta(CFG, &path, None, &resp).unwrap(); + assert_eq!(meta.size, 12); + assert_eq!(range, 0..12); + + let get_range = GetRange::from(2..3); + + let resp = make_response(12, StatusCode::PARTIAL_CONTENT, Some("bytes 2-2/12"), None); + let (range, meta) = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap(); + assert_eq!(meta.size, 12); + assert_eq!(range, 2..3); + + let resp = make_response(12, StatusCode::OK, None, None); + let err = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap_err(); + assert_eq!( + err.to_string(), + "Received non-partial response when range requested" + ); + + let resp = make_response(12, StatusCode::PARTIAL_CONTENT, Some("bytes 2-3/12"), None); + let err = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap_err(); + assert_eq!(err.to_string(), "Requested 2..3, got 2..4"); + + let resp = make_response(12, StatusCode::PARTIAL_CONTENT, Some("bytes 2-2/*"), None); + let err = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap_err(); + assert_eq!( + err.to_string(), + "Failed to parse value for CONTENT_RANGE header: \"bytes 2-2/*\"" + ); + + let resp = make_response(12, StatusCode::PARTIAL_CONTENT, None, None); + let err = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap_err(); + assert_eq!( + err.to_string(), + "Content-Range header not present in partial response" + ); + + let resp = make_response(2, StatusCode::PARTIAL_CONTENT, Some("bytes 2-3/2"), None); + let err = get_range_meta(CFG, &path, Some(&get_range), &resp).unwrap_err(); + assert_eq!( + err.to_string(), + "Wanted range starting at 2, but object was only 2 bytes long" + ); + + let resp = make_response(6, StatusCode::PARTIAL_CONTENT, Some("bytes 2-5/6"), None); + let (range, meta) = get_range_meta(CFG, &path, Some(&GetRange::Suffix(4)), &resp).unwrap(); + assert_eq!(meta.size, 6); + assert_eq!(range, 2..6); + + let resp = make_response(6, StatusCode::PARTIAL_CONTENT, Some("bytes 2-3/6"), None); + let err = get_range_meta(CFG, &path, Some(&GetRange::Suffix(4)), &resp).unwrap_err(); + assert_eq!(err.to_string(), "Requested 2..6, got 2..4"); + } + + #[test] + fn test_get_attributes() { + let resp = make_response( + 12, + StatusCode::OK, + None, + Some(vec![("x-test-meta-foo", "bar")]), + ); + + let attributes = get_attributes(CFG, &resp.headers).unwrap(); + assert_eq!( + attributes.get(&Attribute::Metadata("foo".into())), + Some(&"bar".into()) + ); + } +} +#[cfg(all(test, feature = "http", not(target_arch = "wasm32")))] +mod http_tests { + use crate::client::mock_server::MockServer; + use crate::client::{HttpError, HttpErrorKind, HttpResponseBody}; + use crate::http::HttpBuilder; + use crate::path::Path; + use crate::{ClientOptions, ObjectStore, RetryConfig}; + use bytes::Bytes; + use futures::FutureExt; + use http::header::{CONTENT_LENGTH, CONTENT_RANGE, ETAG, RANGE}; + use http::{Response, StatusCode}; + use hyper::body::Frame; + use std::pin::Pin; + use std::task::{ready, Context, Poll}; + use std::time::Duration; + + #[derive(Debug, thiserror::Error)] + #[error("ChunkedErr")] + struct ChunkedErr {} + + /// A Body from a list of results + /// + /// Sleeps between each frame to avoid the HTTP Server coalescing the frames + struct Chunked { + chunks: std::vec::IntoIter>, + sleep: Option>>, + } + + impl Chunked { + fn new(v: Vec>) -> Self { + Self { + chunks: v.into_iter(), + sleep: None, + } + } + } + + impl hyper::body::Body for Chunked { + type Data = Bytes; + type Error = HttpError; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll, Self::Error>>> { + if let Some(sleep) = &mut self.sleep { + ready!(sleep.poll_unpin(cx)); + self.sleep = None; + } + + Poll::Ready(match self.chunks.next() { + None => None, + Some(Ok(b)) => { + self.sleep = Some(Box::pin(tokio::time::sleep(Duration::from_millis(1)))); + Some(Ok(Frame::data(b))) + } + Some(Err(_)) => Some(Err(HttpError::new(HttpErrorKind::Unknown, ChunkedErr {}))), + }) + } + } + + impl From for HttpResponseBody { + fn from(value: Chunked) -> Self { + Self::new(value) + } + } + + #[tokio::test] + async fn test_stream_retry() { + let mock = MockServer::new().await; + let retry = RetryConfig { + backoff: Default::default(), + max_retries: 3, + retry_timeout: Duration::from_secs(1000), + }; + + let options = ClientOptions::new().with_allow_http(true); + let store = HttpBuilder::new() + .with_client_options(options) + .with_retry(retry) + .with_url(mock.url()) + .build() + .unwrap(); + + let path = Path::from("test"); + + // Test basic + let resp = Response::builder() + .header(CONTENT_LENGTH, 11) + .header(ETAG, "123") + .body("Hello World".to_string()) + .unwrap(); + + mock.push(resp); + + let b = store.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(b.as_ref(), b"Hello World"); + + // Should retry with range + mock.push( + Response::builder() + .header(CONTENT_LENGTH, 10) + .header(ETAG, "123") + .body(Chunked::new(vec![ + Ok(Bytes::from_static(b"banana")), + Err(()), + ])) + .unwrap(), + ); + + mock.push_fn(|req| { + assert_eq!( + req.headers().get(RANGE).unwrap().to_str().unwrap(), + "bytes=6-9" + ); + + Response::builder() + .status(StatusCode::PARTIAL_CONTENT) + .header(CONTENT_LENGTH, 3) + .header(ETAG, "123") + .header(CONTENT_RANGE, "bytes 6-9/10") + .body("123".to_string()) + .unwrap() + }); + + let ret = store.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(ret.as_ref(), b"banana123"); + + // Should retry multiple times + mock.push( + Response::builder() + .header(CONTENT_LENGTH, 20) + .header(ETAG, "foo") + .body(Chunked::new(vec![ + Ok(Bytes::from_static(b"hello")), + Err(()), + ])) + .unwrap(), + ); + + mock.push_fn(|req| { + assert_eq!( + req.headers().get(RANGE).unwrap().to_str().unwrap(), + "bytes=5-19" + ); + + Response::builder() + .status(StatusCode::PARTIAL_CONTENT) + .header(CONTENT_LENGTH, 15) + .header(ETAG, "foo") + .header(CONTENT_RANGE, "bytes 5-19/20") + .body(Chunked::new(vec![Ok(Bytes::from_static(b"baz")), Err(())])) + .unwrap() + }); + + mock.push_fn::<_, String>(|req| { + assert_eq!( + req.headers().get(RANGE).unwrap().to_str().unwrap(), + "bytes=8-19" + ); + Response::builder() + .status(StatusCode::BAD_GATEWAY) + .body("ignored".to_string()) + .unwrap() + }); + + mock.push_fn(|req| { + assert_eq!( + req.headers().get(RANGE).unwrap().to_str().unwrap(), + "bytes=8-19" + ); + + Response::builder() + .status(StatusCode::PARTIAL_CONTENT) + .header(CONTENT_LENGTH, 12) + .header(ETAG, "foo") + .header(CONTENT_RANGE, "bytes 8-19/20") + .body("123456789012".to_string()) + .unwrap() + }); + + let ret = store.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(ret.as_ref(), b"hellobaz123456789012"); + + // Should abort if etag doesn't match + mock.push( + Response::builder() + .header(CONTENT_LENGTH, 12) + .header(ETAG, "foo") + .body(Chunked::new(vec![Ok(Bytes::from_static(b"test")), Err(())])) + .unwrap(), + ); + + mock.push_fn(|req| { + assert_eq!( + req.headers().get(RANGE).unwrap().to_str().unwrap(), + "bytes=4-11" + ); + + Response::builder() + .status(StatusCode::PARTIAL_CONTENT) + .header(CONTENT_LENGTH, 7) + .header(ETAG, "baz") + .header(CONTENT_RANGE, "bytes 4-11/12") + .body("1234567".to_string()) + .unwrap() + }); + + let err = store.get(&path).await.unwrap().bytes().await.unwrap_err(); + assert_eq!( + err.to_string(), + "Generic HTTP error: HTTP error: request or response body error" + ); + } +} diff --git a/rust/object_store/src/client/header.rs b/rust/object_store/src/client/header.rs new file mode 100644 index 0000000000..d7e14b3fb5 --- /dev/null +++ b/rust/object_store/src/client/header.rs @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Logic for extracting ObjectMeta from headers used by AWS, GCP and Azure + +use crate::path::Path; +use crate::ObjectMeta; +use chrono::{DateTime, TimeZone, Utc}; +use http::header::{CONTENT_LENGTH, ETAG, LAST_MODIFIED}; +use http::HeaderMap; + +#[derive(Debug, Copy, Clone)] +/// Configuration for header extraction +pub(crate) struct HeaderConfig { + /// Whether to require an ETag header when extracting [`ObjectMeta`] from headers. + /// + /// Defaults to `true` + pub etag_required: bool, + + /// Whether to require a Last-Modified header when extracting [`ObjectMeta`] from headers. + /// + /// Defaults to `true` + pub last_modified_required: bool, + + /// The version header name if any + pub version_header: Option<&'static str>, + + /// The user defined metadata prefix if any + pub user_defined_metadata_prefix: Option<&'static str>, +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("ETag Header missing from response")] + MissingEtag, + + #[error("Received header containing non-ASCII data")] + BadHeader { source: reqwest::header::ToStrError }, + + #[error("Last-Modified Header missing from response")] + MissingLastModified, + + #[error("Content-Length Header missing from response")] + MissingContentLength, + + #[error("Invalid last modified '{}': {}", last_modified, source)] + InvalidLastModified { + last_modified: String, + source: chrono::ParseError, + }, + + #[error("Invalid content length '{}': {}", content_length, source)] + InvalidContentLength { + content_length: String, + source: std::num::ParseIntError, + }, +} + +/// Extracts a PutResult from the provided [`HeaderMap`] +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) fn get_put_result( + headers: &HeaderMap, + version: &str, +) -> Result { + let e_tag = Some(get_etag(headers)?); + let version = get_version(headers, version)?; + Ok(crate::PutResult { e_tag, version }) +} + +/// Extracts a optional version from the provided [`HeaderMap`] +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) fn get_version(headers: &HeaderMap, version: &str) -> Result, Error> { + Ok(match headers.get(version) { + Some(x) => Some( + x.to_str() + .map_err(|source| Error::BadHeader { source })? + .to_string(), + ), + None => None, + }) +} + +/// Extracts an etag from the provided [`HeaderMap`] +pub(crate) fn get_etag(headers: &HeaderMap) -> Result { + let e_tag = headers.get(ETAG).ok_or(Error::MissingEtag)?; + Ok(e_tag + .to_str() + .map_err(|source| Error::BadHeader { source })? + .to_string()) +} + +/// Extracts [`ObjectMeta`] from the provided [`HeaderMap`] +pub(crate) fn header_meta( + location: &Path, + headers: &HeaderMap, + cfg: HeaderConfig, +) -> Result { + let last_modified = match headers.get(LAST_MODIFIED) { + Some(last_modified) => { + let last_modified = last_modified + .to_str() + .map_err(|source| Error::BadHeader { source })?; + + DateTime::parse_from_rfc2822(last_modified) + .map_err(|source| Error::InvalidLastModified { + last_modified: last_modified.into(), + source, + })? + .with_timezone(&Utc) + } + None if cfg.last_modified_required => return Err(Error::MissingLastModified), + None => Utc.timestamp_nanos(0), + }; + + let e_tag = match get_etag(headers) { + Ok(e_tag) => Some(e_tag), + Err(Error::MissingEtag) if !cfg.etag_required => None, + Err(e) => return Err(e), + }; + + let content_length = headers + .get(CONTENT_LENGTH) + .ok_or(Error::MissingContentLength)?; + + let content_length = content_length + .to_str() + .map_err(|source| Error::BadHeader { source })?; + + let size = content_length + .parse() + .map_err(|source| Error::InvalidContentLength { + content_length: content_length.into(), + source, + })?; + + let version = match cfg.version_header.and_then(|h| headers.get(h)) { + Some(v) => Some( + v.to_str() + .map_err(|source| Error::BadHeader { source })? + .to_string(), + ), + None => None, + }; + + Ok(ObjectMeta { + location: location.clone(), + last_modified, + version, + size, + e_tag, + }) +} diff --git a/rust/object_store/src/client/http/body.rs b/rust/object_store/src/client/http/body.rs new file mode 100644 index 0000000000..504bebe065 --- /dev/null +++ b/rust/object_store/src/client/http/body.rs @@ -0,0 +1,242 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::{HttpError, HttpErrorKind}; +use crate::{collect_bytes, PutPayload}; +use bytes::Bytes; +use futures::stream::BoxStream; +use futures::StreamExt; +use http_body_util::combinators::BoxBody; +use http_body_util::{BodyExt, Full}; +use hyper::body::{Body, Frame, SizeHint}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +/// An HTTP Request +pub type HttpRequest = http::Request; + +/// The [`Body`] of an [`HttpRequest`] +#[derive(Debug, Clone)] +pub struct HttpRequestBody(Inner); + +impl HttpRequestBody { + /// An empty [`HttpRequestBody`] + pub fn empty() -> Self { + Self(Inner::Bytes(Bytes::new())) + } + + #[cfg(not(target_arch = "wasm32"))] + pub(crate) fn into_reqwest(self) -> reqwest::Body { + match self.0 { + Inner::Bytes(b) => b.into(), + Inner::PutPayload(_, payload) => reqwest::Body::wrap_stream(futures::stream::iter( + payload.into_iter().map(Ok::<_, HttpError>), + )), + } + } + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + pub(crate) fn into_reqwest(self) -> reqwest::Body { + match self.0 { + Inner::Bytes(b) => b.into(), + Inner::PutPayload(_, payload) => Bytes::from(payload).into(), + } + } + + /// Returns true if this body is empty + pub fn is_empty(&self) -> bool { + match &self.0 { + Inner::Bytes(x) => x.is_empty(), + Inner::PutPayload(_, x) => x.iter().any(|x| !x.is_empty()), + } + } + + /// Returns the total length of the [`Bytes`] in this body + pub fn content_length(&self) -> usize { + match &self.0 { + Inner::Bytes(x) => x.len(), + Inner::PutPayload(_, x) => x.content_length(), + } + } + + /// If this body consists of a single contiguous [`Bytes`], returns it + pub fn as_bytes(&self) -> Option<&Bytes> { + match &self.0 { + Inner::Bytes(x) => Some(x), + _ => None, + } + } +} + +impl From for HttpRequestBody { + fn from(value: Bytes) -> Self { + Self(Inner::Bytes(value)) + } +} + +impl From> for HttpRequestBody { + fn from(value: Vec) -> Self { + Self(Inner::Bytes(value.into())) + } +} + +impl From for HttpRequestBody { + fn from(value: String) -> Self { + Self(Inner::Bytes(value.into())) + } +} + +impl From for HttpRequestBody { + fn from(value: PutPayload) -> Self { + Self(Inner::PutPayload(0, value)) + } +} + +#[derive(Debug, Clone)] +enum Inner { + Bytes(Bytes), + PutPayload(usize, PutPayload), +} + +impl Body for HttpRequestBody { + type Data = Bytes; + type Error = HttpError; + + fn poll_frame( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll, Self::Error>>> { + Poll::Ready(match &mut self.0 { + Inner::Bytes(bytes) => { + let out = bytes.split_off(0); + if out.is_empty() { + None + } else { + Some(Ok(Frame::data(out))) + } + } + Inner::PutPayload(offset, payload) => { + let slice = payload.as_ref(); + if *offset == slice.len() { + None + } else { + Some(Ok(Frame::data( + slice[std::mem::replace(offset, *offset + 1)].clone(), + ))) + } + } + }) + } + + fn is_end_stream(&self) -> bool { + match self.0 { + Inner::Bytes(ref bytes) => bytes.is_empty(), + Inner::PutPayload(offset, ref body) => offset == body.as_ref().len(), + } + } + + fn size_hint(&self) -> SizeHint { + match self.0 { + Inner::Bytes(ref bytes) => SizeHint::with_exact(bytes.len() as u64), + Inner::PutPayload(offset, ref payload) => { + let iter = payload.as_ref().iter().skip(offset); + SizeHint::with_exact(iter.map(|x| x.len() as u64).sum()) + } + } + } +} + +/// An HTTP response +pub type HttpResponse = http::Response; + +/// The body of an [`HttpResponse`] +#[derive(Debug)] +pub struct HttpResponseBody(BoxBody); + +impl HttpResponseBody { + /// Create an [`HttpResponseBody`] from the provided [`Body`] + /// + /// Note: [`BodyExt::map_err`] can be used to alter error variants + pub fn new(body: B) -> Self + where + B: Body + Send + Sync + 'static, + { + Self(BoxBody::new(body)) + } + + /// Collects this response into a [`Bytes`] + pub async fn bytes(self) -> Result { + let size_hint = self.0.size_hint().lower(); + let s = self.0.into_data_stream(); + collect_bytes(s, Some(size_hint)).await + } + + /// Returns a stream of this response data + pub fn bytes_stream(self) -> BoxStream<'static, Result> { + self.0.into_data_stream().boxed() + } + + /// Returns the response as a [`String`] + pub(crate) async fn text(self) -> Result { + let b = self.bytes().await?; + String::from_utf8(b.into()).map_err(|e| HttpError::new(HttpErrorKind::Decode, e)) + } + + #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] + pub(crate) async fn json(self) -> Result { + let b = self.bytes().await?; + serde_json::from_slice(&b).map_err(|e| HttpError::new(HttpErrorKind::Decode, e)) + } +} + +impl Body for HttpResponseBody { + type Data = Bytes; + type Error = HttpError; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll, Self::Error>>> { + Pin::new(&mut self.0).poll_frame(cx) + } + + fn is_end_stream(&self) -> bool { + self.0.is_end_stream() + } + + fn size_hint(&self) -> SizeHint { + self.0.size_hint() + } +} + +impl From for HttpResponseBody { + fn from(value: Bytes) -> Self { + Self::new(Full::new(value).map_err(|e| match e {})) + } +} + +impl From> for HttpResponseBody { + fn from(value: Vec) -> Self { + Bytes::from(value).into() + } +} + +impl From for HttpResponseBody { + fn from(value: String) -> Self { + Bytes::from(value).into() + } +} diff --git a/rust/object_store/src/client/http/connection.rs b/rust/object_store/src/client/http/connection.rs new file mode 100644 index 0000000000..b8a79f406d --- /dev/null +++ b/rust/object_store/src/client/http/connection.rs @@ -0,0 +1,399 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::builder::{HttpRequestBuilder, RequestBuilderError}; +use crate::client::{HttpRequest, HttpResponse, HttpResponseBody}; +use crate::ClientOptions; +use async_trait::async_trait; +use http::{Method, Uri}; +use http_body_util::BodyExt; +use std::error::Error; +use std::sync::Arc; +use tokio::runtime::Handle; +use tracing::error; + +/// An HTTP protocol error +/// +/// Clients should return this when an HTTP request fails to be completed, e.g. because +/// of a connection issue. This does **not** include HTTP requests that are return +/// non 2xx Status Codes, as these should instead be returned as an [`HttpResponse`] +/// with the appropriate status code set. +#[derive(Debug, thiserror::Error)] +#[error("HTTP error: {source}")] +pub struct HttpError { + kind: HttpErrorKind, + #[source] + source: Box, +} + +/// Identifies the kind of [`HttpError`] +/// +/// This is used, among other things, to determine if a request can be retried +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum HttpErrorKind { + /// An error occurred whilst connecting to the remote + /// + /// Will be automatically retried + Connect, + /// An error occurred whilst making the request + /// + /// Will be automatically retried + Request, + /// Request timed out + /// + /// Will be automatically retried if the request is idempotent + Timeout, + /// The request was aborted + /// + /// Will be automatically retried if the request is idempotent + Interrupted, + /// An error occurred whilst decoding the response + /// + /// Will not be automatically retried + Decode, + /// An unknown error occurred + /// + /// Will not be automatically retried + Unknown, +} + +impl HttpError { + /// Create a new [`HttpError`] with the optional status code + pub fn new(kind: HttpErrorKind, e: E) -> Self + where + E: Error + Send + Sync + 'static, + { + Self { + kind, + source: Box::new(e), + } + } + + pub(crate) fn reqwest(e: reqwest::Error) -> Self { + error!("Captured reqwest error {e:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + #[cfg(not(target_arch = "wasm32"))] + let is_connect = || e.is_connect(); + #[cfg(target_arch = "wasm32")] + let is_connect = || false; + + let mut kind = if e.is_timeout() { + HttpErrorKind::Timeout + } else if is_connect() { + HttpErrorKind::Connect + } else if e.is_decode() { + HttpErrorKind::Decode + } else { + HttpErrorKind::Unknown + }; + + // Reqwest error variants aren't great, attempt to refine them + let mut source = e.source(); + while kind == HttpErrorKind::Unknown { + if let Some(e) = source { + if let Some(e) = e.downcast_ref::() { + if e.is_closed() || e.is_incomplete_message() || e.is_body_write_aborted() { + kind = HttpErrorKind::Request; + } else if e.is_timeout() { + kind = HttpErrorKind::Timeout; + } + } + if let Some(e) = e.downcast_ref::() { + match e.kind() { + std::io::ErrorKind::TimedOut => kind = HttpErrorKind::Timeout, + std::io::ErrorKind::ConnectionAborted + | std::io::ErrorKind::ConnectionReset + | std::io::ErrorKind::BrokenPipe + | std::io::ErrorKind::UnexpectedEof => kind = HttpErrorKind::Interrupted, + _ => {} + } + } + source = e.source(); + } else { + break; + } + } + + Self { + kind, + // We strip URL as it will be included by RetryError if not sensitive + source: Box::new(e.without_url()), + } + } + + /// Returns the [`HttpErrorKind`] + pub fn kind(&self) -> HttpErrorKind { + self.kind + } +} + +/// An asynchronous function from a [`HttpRequest`] to a [`HttpResponse`]. +#[async_trait] +pub trait HttpService: std::fmt::Debug + Send + Sync + 'static { + /// Perform [`HttpRequest`] returning [`HttpResponse`] + async fn call(&self, req: HttpRequest) -> Result; +} + +/// An HTTP client +#[derive(Debug, Clone)] +pub struct HttpClient(Arc); + +impl HttpClient { + /// Create a new [`HttpClient`] from an [`HttpService`] + pub fn new(service: impl HttpService + 'static) -> Self { + Self(Arc::new(service)) + } + + /// Performs [`HttpRequest`] using this client + pub async fn execute(&self, request: HttpRequest) -> Result { + self.0.call(request).await + } + + #[allow(unused)] + pub(crate) fn get(&self, url: U) -> HttpRequestBuilder + where + U: TryInto, + U::Error: Into, + { + self.request(Method::GET, url) + } + + #[allow(unused)] + pub(crate) fn post(&self, url: U) -> HttpRequestBuilder + where + U: TryInto, + U::Error: Into, + { + self.request(Method::POST, url) + } + + #[allow(unused)] + pub(crate) fn put(&self, url: U) -> HttpRequestBuilder + where + U: TryInto, + U::Error: Into, + { + self.request(Method::PUT, url) + } + + #[allow(unused)] + pub(crate) fn delete(&self, url: U) -> HttpRequestBuilder + where + U: TryInto, + U::Error: Into, + { + self.request(Method::DELETE, url) + } + + pub(crate) fn request(&self, method: Method, url: U) -> HttpRequestBuilder + where + U: TryInto, + U::Error: Into, + { + HttpRequestBuilder::new(self.clone()) + .uri(url) + .method(method) + } +} + +#[async_trait] +#[cfg(not(target_arch = "wasm32"))] +impl HttpService for reqwest::Client { + async fn call(&self, req: HttpRequest) -> Result { + let cloned_req = req.clone(); + let (parts, body) = req.into_parts(); + + let url = parts.uri.to_string().parse().unwrap(); + let mut req = reqwest::Request::new(parts.method, url); + *req.headers_mut() = parts.headers; + *req.body_mut() = Some(body.into_reqwest()); + + let r = self.execute(req).await.map_err(HttpError::reqwest); + if r.is_err() { + let err = r.as_ref().unwrap_err(); + error!("Object_store reqwest error:\nRequest: {cloned_req:?}\n\nError: {err:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + } + + let r = r?; + let res: http::Response = r.into(); + let (parts, body) = res.into_parts(); + + let body = HttpResponseBody::new(body.map_err(HttpError::reqwest)); + Ok(HttpResponse::from_parts(parts, body)) + } +} + +#[async_trait] +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +impl HttpService for reqwest::Client { + async fn call(&self, req: HttpRequest) -> Result { + use futures::{ + channel::{mpsc, oneshot}, + SinkExt, StreamExt, TryStreamExt, + }; + use http_body_util::{Empty, StreamBody}; + use wasm_bindgen_futures::spawn_local; + + let (parts, body) = req.into_parts(); + let url = parts.uri.to_string().parse().unwrap(); + let mut req = reqwest::Request::new(parts.method, url); + *req.headers_mut() = parts.headers; + *req.body_mut() = Some(body.into_reqwest()); + + let (mut tx, rx) = mpsc::channel(1); + let (tx_parts, rx_parts) = oneshot::channel(); + let res_fut = self.execute(req); + + spawn_local(async move { + match res_fut.await.map_err(HttpError::reqwest) { + Err(err) => { + let _ = tx_parts.send(Err(err)); + drop(tx); + } + Ok(res) => { + let (mut parts, _) = http::Response::new(Empty::<()>::new()).into_parts(); + parts.headers = res.headers().clone(); + parts.status = res.status(); + let _ = tx_parts.send(Ok(parts)); + let mut stream = res.bytes_stream().map_err(HttpError::reqwest); + while let Some(chunk) = stream.next().await { + if let Err(_e) = tx.send(chunk).await { + // Disconnected due to a transitive drop of the receiver + break; + } + } + } + } + }); + + let parts = rx_parts.await.unwrap()?; + let safe_stream = rx.map(|chunk| { + let frame = hyper::body::Frame::data(chunk?); + Ok(frame) + }); + let body = HttpResponseBody::new(StreamBody::new(safe_stream)); + + Ok(HttpResponse::from_parts(parts, body)) + } +} + +/// A factory for [`HttpClient`] +pub trait HttpConnector: std::fmt::Debug + Send + Sync + 'static { + /// Create a new [`HttpClient`] with the provided [`ClientOptions`] + fn connect(&self, options: &ClientOptions) -> crate::Result; +} + +/// [`HttpConnector`] using [`reqwest::Client`] +#[derive(Debug, Default)] +#[allow(missing_copy_implementations)] +#[cfg(not(all(target_arch = "wasm32", target_os = "wasi")))] +pub struct ReqwestConnector {} + +#[cfg(not(all(target_arch = "wasm32", target_os = "wasi")))] +impl HttpConnector for ReqwestConnector { + fn connect(&self, options: &ClientOptions) -> crate::Result { + let client = options.client()?; + Ok(HttpClient::new(client)) + } +} + +/// [`reqwest::Client`] connector that performs all I/O on the provided tokio +/// [`Runtime`] (thread pool). +/// +/// This adapter is most useful when you wish to segregate I/O from CPU bound +/// work that may be happening on the [`Runtime`]. +/// +/// [`Runtime`]: tokio::runtime::Runtime +/// +/// # Example: Spawning requests on separate runtime +/// +/// ``` +/// # use std::sync::Arc; +/// # use tokio::runtime::Runtime; +/// # use object_store::azure::MicrosoftAzureBuilder; +/// # use object_store::client::SpawnedReqwestConnector; +/// # use object_store::ObjectStore; +/// # fn get_io_runtime() -> Runtime { +/// # tokio::runtime::Builder::new_current_thread().build().unwrap() +/// # } +/// # fn main() -> Result<(), object_store::Error> { +/// // create a tokio runtime for I/O. +/// let io_runtime: Runtime = get_io_runtime(); +/// // configure a store using the runtime. +/// let handle = io_runtime.handle().clone(); // get a handle to the same runtime +/// let store: Arc = Arc::new( +/// MicrosoftAzureBuilder::new() +/// .with_http_connector(SpawnedReqwestConnector::new(handle)) +/// .with_container_name("my_container") +/// .with_account("my_account") +/// .build()? +/// ); +/// // any requests made using store will be spawned on the io_runtime +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug)] +#[allow(missing_copy_implementations)] +#[cfg(not(target_arch = "wasm32"))] +pub struct SpawnedReqwestConnector { + runtime: Handle, +} + +#[cfg(not(target_arch = "wasm32"))] +impl SpawnedReqwestConnector { + /// Create a new [`SpawnedReqwestConnector`] with the provided [`Handle`] to + /// a tokio [`Runtime`] + /// + /// [`Runtime`]: tokio::runtime::Runtime + pub fn new(runtime: Handle) -> Self { + Self { runtime } + } +} + +#[cfg(not(target_arch = "wasm32"))] +impl HttpConnector for SpawnedReqwestConnector { + fn connect(&self, options: &ClientOptions) -> crate::Result { + let spawn_service = super::SpawnService::new(options.client()?, self.runtime.clone()); + Ok(HttpClient::new(spawn_service)) + } +} + +#[cfg(all(target_arch = "wasm32", target_os = "wasi"))] +pub(crate) fn http_connector( + custom: Option>, +) -> crate::Result> { + match custom { + Some(x) => Ok(x), + None => Err(crate::Error::NotSupported { + source: "WASI architectures must provide an HTTPConnector" + .to_string() + .into(), + }), + } +} + +#[cfg(not(all(target_arch = "wasm32", target_os = "wasi")))] +pub(crate) fn http_connector( + custom: Option>, +) -> crate::Result> { + match custom { + Some(x) => Ok(x), + None => Ok(Arc::new(ReqwestConnector {})), + } +} diff --git a/rust/object_store/src/client/http/mod.rs b/rust/object_store/src/client/http/mod.rs new file mode 100644 index 0000000000..86e1e11d39 --- /dev/null +++ b/rust/object_store/src/client/http/mod.rs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! HTTP client abstraction + +mod body; +pub use body::*; + +mod connection; +pub use connection::*; + +mod spawn; +pub use spawn::*; diff --git a/rust/object_store/src/client/http/spawn.rs b/rust/object_store/src/client/http/spawn.rs new file mode 100644 index 0000000000..32c7fc4982 --- /dev/null +++ b/rust/object_store/src/client/http/spawn.rs @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::{ + HttpError, HttpErrorKind, HttpRequest, HttpResponse, HttpResponseBody, HttpService, +}; +use async_trait::async_trait; +use bytes::Bytes; +use http::Response; +use http_body_util::BodyExt; +use hyper::body::{Body, Frame}; +use std::pin::Pin; +use std::task::{Context, Poll}; +use thiserror::Error; +use tokio::runtime::Handle; +use tokio::task::JoinHandle; + +/// Spawn error +#[derive(Debug, Error)] +#[error("SpawnError")] +struct SpawnError {} + +impl From for HttpError { + fn from(value: SpawnError) -> Self { + Self::new(HttpErrorKind::Interrupted, value) + } +} + +/// Wraps a provided [`HttpService`] and runs it on a separate tokio runtime +/// +/// See example on [`SpawnedReqwestConnector`] +/// +/// [`SpawnedReqwestConnector`]: crate::client::http::SpawnedReqwestConnector +#[derive(Debug)] +pub struct SpawnService { + inner: T, + runtime: Handle, +} + +impl SpawnService { + /// Creates a new [`SpawnService`] from the provided + pub fn new(inner: T, runtime: Handle) -> Self { + Self { inner, runtime } + } +} + +#[async_trait] +impl HttpService for SpawnService { + async fn call(&self, req: HttpRequest) -> Result { + let inner = self.inner.clone(); + let (send, recv) = tokio::sync::oneshot::channel(); + + // We use an unbounded channel to prevent backpressure across the runtime boundary + // which could in turn starve the underlying IO operations + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + + let handle = SpawnHandle(self.runtime.spawn(async move { + let r = match HttpService::call(&inner, req).await { + Ok(resp) => resp, + Err(e) => { + let _ = send.send(Err(e)); + return; + } + }; + + let (parts, mut body) = r.into_parts(); + if send.send(Ok(parts)).is_err() { + return; + } + + while let Some(x) = body.frame().await { + if sender.send(x).is_err() { + return; + } + } + })); + + let parts = recv.await.map_err(|_| SpawnError {})??; + + Ok(Response::from_parts( + parts, + HttpResponseBody::new(SpawnBody { + stream: receiver, + _worker: handle, + }), + )) + } +} + +/// A wrapper around a [`JoinHandle`] that aborts on drop +struct SpawnHandle(JoinHandle<()>); +impl Drop for SpawnHandle { + fn drop(&mut self) { + self.0.abort(); + } +} + +type StreamItem = Result, HttpError>; + +struct SpawnBody { + stream: tokio::sync::mpsc::UnboundedReceiver, + _worker: SpawnHandle, +} + +impl Body for SpawnBody { + type Data = Bytes; + type Error = HttpError; + + fn poll_frame(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.stream.poll_recv(cx) + } +} + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(test)] +mod tests { + use super::*; + use crate::client::mock_server::MockServer; + use crate::client::retry::RetryExt; + use crate::client::HttpClient; + use crate::RetryConfig; + + async fn test_client(client: HttpClient) { + let (send, recv) = tokio::sync::oneshot::channel(); + + let mock = MockServer::new().await; + mock.push(Response::new("BANANAS".to_string())); + + let url = mock.url().to_string(); + let thread = std::thread::spawn(|| { + futures::executor::block_on(async move { + let retry = RetryConfig::default(); + let ret = client.get(url).send_retry(&retry).await.unwrap(); + let payload = ret.into_body().bytes().await.unwrap(); + assert_eq!(payload.as_ref(), b"BANANAS"); + let _ = send.send(()); + }) + }); + recv.await.unwrap(); + thread.join().unwrap(); + } + + #[tokio::test] + async fn test_spawn() { + let client = HttpClient::new(SpawnService::new(reqwest::Client::new(), Handle::current())); + test_client(client).await; + } + + #[tokio::test] + #[should_panic] + async fn test_no_spawn() { + let client = HttpClient::new(reqwest::Client::new()); + test_client(client).await; + } +} diff --git a/rust/object_store/src/client/list.rs b/rust/object_store/src/client/list.rs new file mode 100644 index 0000000000..7a2cf62aa5 --- /dev/null +++ b/rust/object_store/src/client/list.rs @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::pagination::stream_paginated; +use crate::list::{PaginatedListOptions, PaginatedListResult}; +use crate::path::{Path, DELIMITER}; +use crate::Result; +use crate::{ListResult, ObjectMeta}; +use async_trait::async_trait; +use futures::stream::BoxStream; +use futures::{StreamExt, TryStreamExt}; +use std::borrow::Cow; +use std::collections::BTreeSet; + +/// A client that can perform paginated list requests +#[async_trait] +pub(crate) trait ListClient: Send + Sync + 'static { + async fn list_request( + &self, + prefix: Option<&str>, + options: PaginatedListOptions, + ) -> Result; +} + +/// Extension trait for [`ListClient`] that adds common listing functionality +#[async_trait] +pub(crate) trait ListClientExt { + fn list_paginated( + &self, + prefix: Option<&Path>, + delimiter: bool, + offset: Option<&Path>, + ) -> BoxStream<'static, Result>; + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result>; + + #[allow(unused)] + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result>; + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result; +} + +#[async_trait] +impl ListClientExt for T { + fn list_paginated( + &self, + prefix: Option<&Path>, + delimiter: bool, + offset: Option<&Path>, + ) -> BoxStream<'static, Result> { + let offset = offset.map(|x| x.to_string()); + let prefix = prefix + .filter(|x| !x.as_ref().is_empty()) + .map(|p| format!("{}{}", p.as_ref(), DELIMITER)); + stream_paginated( + self.clone(), + (prefix, offset), + move |client, (prefix, offset), page_token| async move { + let r = client + .list_request( + prefix.as_deref(), + PaginatedListOptions { + offset: offset.clone(), + delimiter: delimiter.then_some(Cow::Borrowed(DELIMITER)), + page_token, + ..Default::default() + }, + ) + .await?; + Ok((r.result, (prefix, offset), r.page_token)) + }, + ) + .boxed() + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.list_paginated(prefix, false, None) + .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok))) + .try_flatten() + .boxed() + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + self.list_paginated(prefix, false, Some(offset)) + .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok))) + .try_flatten() + .boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let mut stream = self.list_paginated(prefix, true, None); + + let mut common_prefixes = BTreeSet::new(); + let mut objects = Vec::new(); + + while let Some(result) = stream.next().await { + let response = result?; + common_prefixes.extend(response.common_prefixes.into_iter()); + objects.extend(response.objects.into_iter()); + } + + Ok(ListResult { + common_prefixes: common_prefixes.into_iter().collect(), + objects, + }) + } +} diff --git a/rust/object_store/src/client/mock_server.rs b/rust/object_store/src/client/mock_server.rs new file mode 100644 index 0000000000..9caf731d00 --- /dev/null +++ b/rust/object_store/src/client/mock_server.rs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::{HttpResponse, HttpResponseBody}; +use futures::future::BoxFuture; +use futures::FutureExt; +use hyper::body::Incoming; +use hyper::server::conn::http1; +use hyper::service::service_fn; +use hyper::{Request, Response}; +use hyper_util::rt::TokioIo; +use parking_lot::Mutex; +use std::collections::VecDeque; +use std::convert::Infallible; +use std::future::Future; +use std::net::SocketAddr; +use std::sync::Arc; +use tokio::net::TcpListener; +use tokio::sync::oneshot; +use tokio::task::{JoinHandle, JoinSet}; + +pub(crate) type ResponseFn = + Box) -> BoxFuture<'static, HttpResponse> + Send>; + +/// A mock server +pub(crate) struct MockServer { + responses: Arc>>, + shutdown: oneshot::Sender<()>, + handle: JoinHandle<()>, + url: String, +} + +impl MockServer { + pub(crate) async fn new() -> Self { + let responses: Arc>> = + Arc::new(Mutex::new(VecDeque::with_capacity(10))); + + let addr = SocketAddr::from(([127, 0, 0, 1], 0)); + let listener = TcpListener::bind(addr).await.unwrap(); + + let (shutdown, mut rx) = oneshot::channel::<()>(); + + let url = format!("http://{}", listener.local_addr().unwrap()); + + let r = Arc::clone(&responses); + let handle = tokio::spawn(async move { + let mut set = JoinSet::new(); + + loop { + let (stream, _) = tokio::select! { + conn = listener.accept() => conn.unwrap(), + _ = &mut rx => break, + }; + + let r = Arc::clone(&r); + set.spawn(async move { + let _ = http1::Builder::new() + .serve_connection( + TokioIo::new(stream), + service_fn(move |req| { + let r = Arc::clone(&r); + let next = r.lock().pop_front(); + async move { + Ok::<_, Infallible>(match next { + Some(r) => r(req).await, + None => HttpResponse::new("Hello World".to_string().into()), + }) + } + }), + ) + .await; + }); + } + + set.abort_all(); + }); + + Self { + responses, + shutdown, + handle, + url, + } + } + + /// The url of the mock server + pub(crate) fn url(&self) -> &str { + &self.url + } + + /// Add a response + pub(crate) fn push>(&self, response: Response) { + let resp = response.map(Into::into); + self.push_fn(|_| resp) + } + + /// Add a response function + pub(crate) fn push_fn(&self, f: F) + where + F: FnOnce(Request) -> Response + Send + 'static, + B: Into, + { + let f = Box::new(|req| async move { f(req).map(Into::into) }.boxed()); + self.responses.lock().push_back(f) + } + + pub(crate) fn push_async_fn(&self, f: F) + where + F: FnOnce(Request) -> Fut + Send + 'static, + Fut: Future> + Send + 'static, + { + let f = Box::new(|r| f(r).map(|b| b.map(Into::into)).boxed()); + self.responses.lock().push_back(f) + } + + /// Shutdown the mock server + pub(crate) async fn shutdown(self) { + let _ = self.shutdown.send(()); + self.handle.await.unwrap() + } +} diff --git a/rust/object_store/src/client/mod.rs b/rust/object_store/src/client/mod.rs new file mode 100644 index 0000000000..5a11b7ad04 --- /dev/null +++ b/rust/object_store/src/client/mod.rs @@ -0,0 +1,1047 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Generic utilities for [`reqwest`] based [`ObjectStore`] implementations +//! +//! [`ObjectStore`]: crate::ObjectStore + +pub(crate) mod backoff; + +#[cfg(not(target_arch = "wasm32"))] +mod dns; + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(test)] +pub(crate) mod mock_server; + +pub(crate) mod retry; + +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) mod pagination; + +pub(crate) mod get; + +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) mod list; + +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) mod token; + +pub(crate) mod header; + +#[cfg(any(feature = "aws", feature = "gcp"))] +pub(crate) mod s3; + +pub(crate) mod builder; +mod http; + +#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] +pub(crate) mod parts; +pub use http::*; + +use async_trait::async_trait; +use reqwest::header::{HeaderMap, HeaderValue}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; + +#[cfg(not(target_arch = "wasm32"))] +use reqwest::{NoProxy, Proxy}; + +use crate::config::{fmt_duration, ConfigValue}; +use crate::path::Path; +use crate::{GetOptions, Result}; + +fn map_client_error(e: reqwest::Error) -> super::Error { + super::Error::Generic { + store: "HTTP client", + source: Box::new(e), + } +} + +static DEFAULT_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),); + +/// Configuration keys for [`ClientOptions`] +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)] +#[non_exhaustive] +pub enum ClientConfigKey { + /// Allow non-TLS, i.e. non-HTTPS connections + AllowHttp, + /// Skip certificate validation on https connections. + /// + /// # Warning + /// + /// You should think very carefully before using this method. If + /// invalid certificates are trusted, *any* certificate for *any* site + /// will be trusted for use. This includes expired certificates. This + /// introduces significant vulnerabilities, and should only be used + /// as a last resort or for testing + AllowInvalidCertificates, + /// Timeout for only the connect phase of a Client + ConnectTimeout, + /// default CONTENT_TYPE for uploads + DefaultContentType, + /// Only use http1 connections + Http1Only, + /// Interval for HTTP2 Ping frames should be sent to keep a connection alive. + Http2KeepAliveInterval, + /// Timeout for receiving an acknowledgement of the keep-alive ping. + Http2KeepAliveTimeout, + /// Enable HTTP2 keep alive pings for idle connections + Http2KeepAliveWhileIdle, + /// Sets the maximum frame size to use for HTTP2. + Http2MaxFrameSize, + /// Only use http2 connections + Http2Only, + /// The pool max idle timeout + /// + /// This is the length of time an idle connection will be kept alive + PoolIdleTimeout, + /// maximum number of idle connections per host + PoolMaxIdlePerHost, + /// HTTP proxy to use for requests + ProxyUrl, + /// PEM-formatted CA certificate for proxy connections + ProxyCaCertificate, + /// List of hosts that bypass proxy + ProxyExcludes, + /// Randomize order addresses that the DNS resolution yields. + /// + /// This will spread the connections across more servers. + RandomizeAddresses, + /// Request timeout + /// + /// The timeout is applied from when the request starts connecting until the + /// response body has finished + Timeout, + /// User-Agent header to be used by this client + UserAgent, +} + +impl AsRef for ClientConfigKey { + fn as_ref(&self) -> &str { + match self { + Self::AllowHttp => "allow_http", + Self::AllowInvalidCertificates => "allow_invalid_certificates", + Self::ConnectTimeout => "connect_timeout", + Self::DefaultContentType => "default_content_type", + Self::Http1Only => "http1_only", + Self::Http2Only => "http2_only", + Self::Http2KeepAliveInterval => "http2_keep_alive_interval", + Self::Http2KeepAliveTimeout => "http2_keep_alive_timeout", + Self::Http2KeepAliveWhileIdle => "http2_keep_alive_while_idle", + Self::Http2MaxFrameSize => "http2_max_frame_size", + Self::PoolIdleTimeout => "pool_idle_timeout", + Self::PoolMaxIdlePerHost => "pool_max_idle_per_host", + Self::ProxyUrl => "proxy_url", + Self::ProxyCaCertificate => "proxy_ca_certificate", + Self::ProxyExcludes => "proxy_excludes", + Self::RandomizeAddresses => "randomize_addresses", + Self::Timeout => "timeout", + Self::UserAgent => "user_agent", + } + } +} + +impl FromStr for ClientConfigKey { + type Err = super::Error; + + fn from_str(s: &str) -> Result { + match s { + "allow_http" => Ok(Self::AllowHttp), + "allow_invalid_certificates" => Ok(Self::AllowInvalidCertificates), + "connect_timeout" => Ok(Self::ConnectTimeout), + "default_content_type" => Ok(Self::DefaultContentType), + "http1_only" => Ok(Self::Http1Only), + "http2_only" => Ok(Self::Http2Only), + "http2_keep_alive_interval" => Ok(Self::Http2KeepAliveInterval), + "http2_keep_alive_timeout" => Ok(Self::Http2KeepAliveTimeout), + "http2_keep_alive_while_idle" => Ok(Self::Http2KeepAliveWhileIdle), + "http2_max_frame_size" => Ok(Self::Http2MaxFrameSize), + "pool_idle_timeout" => Ok(Self::PoolIdleTimeout), + "pool_max_idle_per_host" => Ok(Self::PoolMaxIdlePerHost), + "proxy_url" => Ok(Self::ProxyUrl), + "proxy_ca_certificate" => Ok(Self::ProxyCaCertificate), + "proxy_excludes" => Ok(Self::ProxyExcludes), + "randomize_addresses" => Ok(Self::RandomizeAddresses), + "timeout" => Ok(Self::Timeout), + "user_agent" => Ok(Self::UserAgent), + _ => Err(super::Error::UnknownConfigurationKey { + store: "HTTP", + key: s.into(), + }), + } + } +} + +/// Represents a CA certificate provided by the user. +/// +/// This is used to configure the client to trust a specific certificate. See +/// [Self::from_pem] for an example +#[derive(Debug, Clone)] +#[cfg(not(target_arch = "wasm32"))] +pub struct Certificate(reqwest::tls::Certificate); + +#[cfg(not(target_arch = "wasm32"))] +impl Certificate { + /// Create a `Certificate` from a PEM encoded certificate. + /// + /// # Example from a PEM file + /// + /// ```no_run + /// # use object_store::Certificate; + /// # use std::fs::File; + /// # use std::io::Read; + /// let mut buf = Vec::new(); + /// File::open("my_cert.pem").unwrap() + /// .read_to_end(&mut buf).unwrap(); + /// let cert = Certificate::from_pem(&buf).unwrap(); + /// + /// ``` + pub fn from_pem(pem: &[u8]) -> Result { + Ok(Self( + reqwest::tls::Certificate::from_pem(pem).map_err(map_client_error)?, + )) + } + + /// Create a collection of `Certificate` from a PEM encoded certificate + /// bundle. + /// + /// Files that contain such collections have extensions such as `.crt`, + /// `.cer` and `.pem` files. + pub fn from_pem_bundle(pem_bundle: &[u8]) -> Result> { + Ok(reqwest::tls::Certificate::from_pem_bundle(pem_bundle) + .map_err(map_client_error)? + .into_iter() + .map(Self) + .collect()) + } + + /// Create a `Certificate` from a binary DER encoded certificate. + pub fn from_der(der: &[u8]) -> Result { + Ok(Self( + reqwest::tls::Certificate::from_der(der).map_err(map_client_error)?, + )) + } +} + +/// HTTP client configuration for remote object stores +#[derive(Debug, Clone)] +pub struct ClientOptions { + user_agent: Option>, + #[cfg(not(target_arch = "wasm32"))] + root_certificates: Vec, + content_type_map: HashMap, + default_content_type: Option, + default_headers: Option, + proxy_url: Option, + proxy_ca_certificate: Option, + proxy_excludes: Option, + allow_http: ConfigValue, + allow_insecure: ConfigValue, + timeout: Option>, + connect_timeout: Option>, + pool_idle_timeout: Option>, + pool_max_idle_per_host: Option>, + http2_keep_alive_interval: Option>, + http2_keep_alive_timeout: Option>, + http2_keep_alive_while_idle: ConfigValue, + http2_max_frame_size: Option>, + http1_only: ConfigValue, + http2_only: ConfigValue, + randomize_addresses: ConfigValue, +} + +impl Default for ClientOptions { + fn default() -> Self { + // Defaults based on + // + // + // Which recommend a connection timeout of 3.1s and a request timeout of 2s + // + // As object store requests may involve the transfer of non-trivial volumes of data + // we opt for a slightly higher default timeout of 30 seconds + Self { + user_agent: None, + #[cfg(not(target_arch = "wasm32"))] + root_certificates: Default::default(), + content_type_map: Default::default(), + default_content_type: None, + default_headers: None, + proxy_url: None, + proxy_ca_certificate: None, + proxy_excludes: None, + allow_http: Default::default(), + allow_insecure: Default::default(), + timeout: Some(Duration::from_secs(30).into()), + connect_timeout: Some(Duration::from_secs(5).into()), + pool_idle_timeout: None, + pool_max_idle_per_host: None, + http2_keep_alive_interval: None, + http2_keep_alive_timeout: None, + http2_keep_alive_while_idle: Default::default(), + http2_max_frame_size: None, + // HTTP2 is known to be significantly slower than HTTP1, so we default + // to HTTP1 for now. + // https://github.com/apache/arrow-rs/issues/5194 + http1_only: true.into(), + http2_only: Default::default(), + randomize_addresses: true.into(), + } + } +} + +impl ClientOptions { + /// Create a new [`ClientOptions`] with default values + pub fn new() -> Self { + Default::default() + } + + /// Set an option by key + pub fn with_config(mut self, key: ClientConfigKey, value: impl Into) -> Self { + match key { + ClientConfigKey::AllowHttp => self.allow_http.parse(value), + ClientConfigKey::AllowInvalidCertificates => self.allow_insecure.parse(value), + ClientConfigKey::ConnectTimeout => { + self.connect_timeout = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::DefaultContentType => self.default_content_type = Some(value.into()), + ClientConfigKey::Http1Only => self.http1_only.parse(value), + ClientConfigKey::Http2Only => self.http2_only.parse(value), + ClientConfigKey::Http2KeepAliveInterval => { + self.http2_keep_alive_interval = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::Http2KeepAliveTimeout => { + self.http2_keep_alive_timeout = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::Http2KeepAliveWhileIdle => { + self.http2_keep_alive_while_idle.parse(value) + } + ClientConfigKey::Http2MaxFrameSize => { + self.http2_max_frame_size = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::PoolIdleTimeout => { + self.pool_idle_timeout = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::PoolMaxIdlePerHost => { + self.pool_max_idle_per_host = Some(ConfigValue::Deferred(value.into())) + } + ClientConfigKey::ProxyUrl => self.proxy_url = Some(value.into()), + ClientConfigKey::ProxyCaCertificate => self.proxy_ca_certificate = Some(value.into()), + ClientConfigKey::ProxyExcludes => self.proxy_excludes = Some(value.into()), + ClientConfigKey::RandomizeAddresses => { + self.randomize_addresses.parse(value); + } + ClientConfigKey::Timeout => self.timeout = Some(ConfigValue::Deferred(value.into())), + ClientConfigKey::UserAgent => { + self.user_agent = Some(ConfigValue::Deferred(value.into())) + } + } + self + } + + /// Get an option by key + pub fn get_config_value(&self, key: &ClientConfigKey) -> Option { + match key { + ClientConfigKey::AllowHttp => Some(self.allow_http.to_string()), + ClientConfigKey::AllowInvalidCertificates => Some(self.allow_insecure.to_string()), + ClientConfigKey::ConnectTimeout => self.connect_timeout.as_ref().map(fmt_duration), + ClientConfigKey::DefaultContentType => self.default_content_type.clone(), + ClientConfigKey::Http1Only => Some(self.http1_only.to_string()), + ClientConfigKey::Http2KeepAliveInterval => { + self.http2_keep_alive_interval.as_ref().map(fmt_duration) + } + ClientConfigKey::Http2KeepAliveTimeout => { + self.http2_keep_alive_timeout.as_ref().map(fmt_duration) + } + ClientConfigKey::Http2KeepAliveWhileIdle => { + Some(self.http2_keep_alive_while_idle.to_string()) + } + ClientConfigKey::Http2MaxFrameSize => { + self.http2_max_frame_size.as_ref().map(|v| v.to_string()) + } + ClientConfigKey::Http2Only => Some(self.http2_only.to_string()), + ClientConfigKey::PoolIdleTimeout => self.pool_idle_timeout.as_ref().map(fmt_duration), + ClientConfigKey::PoolMaxIdlePerHost => { + self.pool_max_idle_per_host.as_ref().map(|v| v.to_string()) + } + ClientConfigKey::ProxyUrl => self.proxy_url.clone(), + ClientConfigKey::ProxyCaCertificate => self.proxy_ca_certificate.clone(), + ClientConfigKey::ProxyExcludes => self.proxy_excludes.clone(), + ClientConfigKey::RandomizeAddresses => Some(self.randomize_addresses.to_string()), + ClientConfigKey::Timeout => self.timeout.as_ref().map(fmt_duration), + ClientConfigKey::UserAgent => self + .user_agent + .as_ref() + .and_then(|v| v.get().ok()) + .and_then(|v| v.to_str().ok().map(|s| s.to_string())), + } + } + + /// Sets the User-Agent header to be used by this client + /// + /// Default is based on the version of this crate + pub fn with_user_agent(mut self, agent: HeaderValue) -> Self { + self.user_agent = Some(agent.into()); + self + } + + /// Add a custom root certificate. + /// + /// This can be used to connect to a server that has a self-signed + /// certificate for example. + #[cfg(not(target_arch = "wasm32"))] + pub fn with_root_certificate(mut self, certificate: Certificate) -> Self { + self.root_certificates.push(certificate); + self + } + + /// Set the default CONTENT_TYPE for uploads + pub fn with_default_content_type(mut self, mime: impl Into) -> Self { + self.default_content_type = Some(mime.into()); + self + } + + /// Set the CONTENT_TYPE for a given file extension + pub fn with_content_type_for_suffix( + mut self, + extension: impl Into, + mime: impl Into, + ) -> Self { + self.content_type_map.insert(extension.into(), mime.into()); + self + } + + /// Sets the default headers for every request + pub fn with_default_headers(mut self, headers: HeaderMap) -> Self { + self.default_headers = Some(headers); + self + } + + /// Sets what protocol is allowed. If `allow_http` is : + /// * false (default): Only HTTPS are allowed + /// * true: HTTP and HTTPS are allowed + pub fn with_allow_http(mut self, allow_http: bool) -> Self { + self.allow_http = allow_http.into(); + self + } + /// Allows connections to invalid SSL certificates + /// * false (default): Only valid HTTPS certificates are allowed + /// * true: All HTTPS certificates are allowed + /// + /// # Warning + /// + /// You should think very carefully before using this method. If + /// invalid certificates are trusted, *any* certificate for *any* site + /// will be trusted for use. This includes expired certificates. This + /// introduces significant vulnerabilities, and should only be used + /// as a last resort or for testing + pub fn with_allow_invalid_certificates(mut self, allow_insecure: bool) -> Self { + self.allow_insecure = allow_insecure.into(); + self + } + + /// Only use http1 connections + /// + /// This is on by default, since http2 is known to be significantly slower than http1. + pub fn with_http1_only(mut self) -> Self { + self.http2_only = false.into(); + self.http1_only = true.into(); + self + } + + /// Only use http2 connections + pub fn with_http2_only(mut self) -> Self { + self.http1_only = false.into(); + self.http2_only = true.into(); + self + } + + /// Use http2 if supported, otherwise use http1. + pub fn with_allow_http2(mut self) -> Self { + self.http1_only = false.into(); + self.http2_only = false.into(); + self + } + + /// Set a proxy URL to use for requests + pub fn with_proxy_url(mut self, proxy_url: impl Into) -> Self { + self.proxy_url = Some(proxy_url.into()); + self + } + + /// Set a trusted proxy CA certificate + pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into) -> Self { + self.proxy_ca_certificate = Some(proxy_ca_certificate.into()); + self + } + + /// Set a list of hosts to exclude from proxy connections + pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into) -> Self { + self.proxy_excludes = Some(proxy_excludes.into()); + self + } + + /// Set timeout for the overall request + /// + /// The timeout starts from when the request starts connecting until the + /// response body has finished. If the request does not complete within the + /// timeout, the client returns a timeout error. + /// + /// Timeout errors are retried, subject to the [`RetryConfig`] + /// + /// Default is 30 seconds + /// + /// # See Also + /// * [`Self::with_timeout_disabled`] to disable the timeout + /// * [`Self::with_connect_timeout`] to set a timeout for the connect phase + /// + /// [`RetryConfig`]: crate::RetryConfig + pub fn with_timeout(mut self, timeout: Duration) -> Self { + self.timeout = Some(ConfigValue::Parsed(timeout)); + self + } + + /// Disables the request timeout + /// + /// # See Also + /// * [`Self::with_timeout`] + pub fn with_timeout_disabled(mut self) -> Self { + self.timeout = None; + self + } + + /// Set a timeout for only the connect phase of a Client + /// + /// This is the time allowed for the client to establish a connection + /// and if the connection is not established within this time, + /// the client returns a timeout error. + /// + /// Timeout errors are retried, subject to the [`RetryConfig`] + /// + /// Default is 5 seconds + /// + /// # See Also + /// * [`Self::with_timeout`] to set a timeout for the overall request + /// * [`Self::with_connect_timeout_disabled`] to disable the connect timeout + /// + /// [`RetryConfig`]: crate::RetryConfig + pub fn with_connect_timeout(mut self, timeout: Duration) -> Self { + self.connect_timeout = Some(ConfigValue::Parsed(timeout)); + self + } + + /// Disables the connection timeout + /// + /// # See Also + /// * [`Self::with_connect_timeout`] + pub fn with_connect_timeout_disabled(mut self) -> Self { + self.connect_timeout = None; + self + } + + /// Set the pool max idle timeout + /// + /// This is the length of time an idle connection will be kept alive + /// + /// Default is 90 seconds enforced by reqwest + pub fn with_pool_idle_timeout(mut self, timeout: Duration) -> Self { + self.pool_idle_timeout = Some(ConfigValue::Parsed(timeout)); + self + } + + /// Set the maximum number of idle connections per host + /// + /// Default is no limit enforced by reqwest + pub fn with_pool_max_idle_per_host(mut self, max: usize) -> Self { + self.pool_max_idle_per_host = Some(max.into()); + self + } + + /// Sets an interval for HTTP2 Ping frames should be sent to keep a connection alive. + /// + /// Default is disabled enforced by reqwest + pub fn with_http2_keep_alive_interval(mut self, interval: Duration) -> Self { + self.http2_keep_alive_interval = Some(ConfigValue::Parsed(interval)); + self + } + + /// Sets a timeout for receiving an acknowledgement of the keep-alive ping. + /// + /// If the ping is not acknowledged within the timeout, the connection will be closed. + /// Does nothing if http2_keep_alive_interval is disabled. + /// + /// Default is disabled enforced by reqwest + pub fn with_http2_keep_alive_timeout(mut self, interval: Duration) -> Self { + self.http2_keep_alive_timeout = Some(ConfigValue::Parsed(interval)); + self + } + + /// Enable HTTP2 keep alive pings for idle connections + /// + /// If disabled, keep-alive pings are only sent while there are open request/response + /// streams. If enabled, pings are also sent when no streams are active + /// + /// Default is disabled enforced by reqwest + pub fn with_http2_keep_alive_while_idle(mut self) -> Self { + self.http2_keep_alive_while_idle = true.into(); + self + } + + /// Sets the maximum frame size to use for HTTP2. + /// + /// Default is currently 16,384 but may change internally to optimize for common uses. + pub fn with_http2_max_frame_size(mut self, sz: u32) -> Self { + self.http2_max_frame_size = Some(ConfigValue::Parsed(sz)); + self + } + + /// Get the mime type for the file in `path` to be uploaded + /// + /// Gets the file extension from `path`, and returns the + /// mime type if it was defined initially through + /// `ClientOptions::with_content_type_for_suffix` + /// + /// Otherwise, returns the default mime type if it was defined + /// earlier through `ClientOptions::with_default_content_type` + pub fn get_content_type(&self, path: &Path) -> Option<&str> { + match path.extension() { + Some(extension) => match self.content_type_map.get(extension) { + Some(ct) => Some(ct.as_str()), + None => self.default_content_type.as_deref(), + }, + None => self.default_content_type.as_deref(), + } + } + + /// Returns a copy of this [`ClientOptions`] with overrides necessary for metadata endpoint access + /// + /// In particular: + /// * Allows HTTP as metadata endpoints do not use TLS + /// * Configures a low connection timeout to provide quick feedback if not present + #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))] + pub(crate) fn metadata_options(&self) -> Self { + self.clone() + .with_allow_http(true) + .with_connect_timeout(Duration::from_secs(1)) + } + + #[cfg(not(target_arch = "wasm32"))] + pub(crate) fn client(&self) -> Result { + let mut builder = reqwest::ClientBuilder::new(); + + match &self.user_agent { + Some(user_agent) => builder = builder.user_agent(user_agent.get()?), + None => builder = builder.user_agent(DEFAULT_USER_AGENT), + } + + if let Some(headers) = &self.default_headers { + builder = builder.default_headers(headers.clone()) + } + + if let Some(proxy) = &self.proxy_url { + let mut proxy = Proxy::all(proxy).map_err(map_client_error)?; + + if let Some(certificate) = &self.proxy_ca_certificate { + let certificate = reqwest::tls::Certificate::from_pem(certificate.as_bytes()) + .map_err(map_client_error)?; + + builder = builder.add_root_certificate(certificate); + } + + if let Some(proxy_excludes) = &self.proxy_excludes { + let no_proxy = NoProxy::from_string(proxy_excludes); + + proxy = proxy.no_proxy(no_proxy); + } + + builder = builder.proxy(proxy); + } + + for certificate in &self.root_certificates { + builder = builder.add_root_certificate(certificate.0.clone()); + } + + if let Some(timeout) = &self.timeout { + builder = builder.timeout(timeout.get()?) + } + + if let Some(timeout) = &self.connect_timeout { + builder = builder.connect_timeout(timeout.get()?) + } + + if let Some(timeout) = &self.pool_idle_timeout { + builder = builder.pool_idle_timeout(timeout.get()?) + } + + if let Some(max) = &self.pool_max_idle_per_host { + builder = builder.pool_max_idle_per_host(max.get()?) + } + + if let Some(interval) = &self.http2_keep_alive_interval { + builder = builder.http2_keep_alive_interval(interval.get()?) + } + + if let Some(interval) = &self.http2_keep_alive_timeout { + builder = builder.http2_keep_alive_timeout(interval.get()?) + } + + if self.http2_keep_alive_while_idle.get()? { + builder = builder.http2_keep_alive_while_idle(true) + } + + if let Some(sz) = &self.http2_max_frame_size { + builder = builder.http2_max_frame_size(Some(sz.get()?)) + } + + if self.http1_only.get()? { + builder = builder.http1_only() + } + + if self.http2_only.get()? { + builder = builder.http2_prior_knowledge() + } + + if self.allow_insecure.get()? { + builder = builder.danger_accept_invalid_certs(true) + } + + // Explicitly disable compression, since it may be automatically enabled + // when certain reqwest features are enabled. Compression interferes + // with the `Content-Length` header, which is used to determine the + // size of objects. + builder = builder.no_gzip().no_brotli().no_zstd().no_deflate(); + + if self.randomize_addresses.get()? { + builder = builder.dns_resolver(Arc::new(dns::ShuffleResolver)); + } + + builder + .https_only(!self.allow_http.get()?) + .build() + .map_err(map_client_error) + } + + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] + pub(crate) fn client(&self) -> Result { + let mut builder = reqwest::ClientBuilder::new(); + + match &self.user_agent { + Some(user_agent) => builder = builder.user_agent(user_agent.get()?), + None => builder = builder.user_agent(DEFAULT_USER_AGENT), + } + + if let Some(headers) = &self.default_headers { + builder = builder.default_headers(headers.clone()) + } + + builder.build().map_err(map_client_error) + } +} + +pub(crate) trait GetOptionsExt { + fn with_get_options(self, options: GetOptions) -> Self; +} + +impl GetOptionsExt for HttpRequestBuilder { + fn with_get_options(mut self, options: GetOptions) -> Self { + use hyper::header::*; + + let GetOptions { + if_match, + if_none_match, + if_modified_since, + if_unmodified_since, + range, + version: _, + head: _, + extensions, + } = options; + + if let Some(range) = range { + self = self.header(RANGE, range.to_string()); + } + + if let Some(tag) = if_match { + self = self.header(IF_MATCH, tag); + } + + if let Some(tag) = if_none_match { + self = self.header(IF_NONE_MATCH, tag); + } + + const DATE_FORMAT: &str = "%a, %d %b %Y %H:%M:%S GMT"; + if let Some(date) = if_unmodified_since { + self = self.header(IF_UNMODIFIED_SINCE, date.format(DATE_FORMAT).to_string()); + } + + if let Some(date) = if_modified_since { + self = self.header(IF_MODIFIED_SINCE, date.format(DATE_FORMAT).to_string()); + } + + self = self.extensions(extensions); + + self + } +} + +/// Provides credentials for use when signing requests +#[async_trait] +pub trait CredentialProvider: std::fmt::Debug + Send + Sync { + /// The type of credential returned by this provider + type Credential; + + /// Return a credential + async fn get_credential(&self) -> Result>; +} + +/// A static set of credentials +#[derive(Debug)] +pub struct StaticCredentialProvider { + credential: Arc, +} + +impl StaticCredentialProvider { + /// A [`CredentialProvider`] for a static credential of type `T` + pub fn new(credential: T) -> Self { + Self { + credential: Arc::new(credential), + } + } +} + +#[async_trait] +impl CredentialProvider for StaticCredentialProvider +where + T: std::fmt::Debug + Send + Sync, +{ + type Credential = T; + + async fn get_credential(&self) -> Result> { + Ok(Arc::clone(&self.credential)) + } +} + +#[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] +mod cloud { + use super::*; + use crate::client::token::{TemporaryToken, TokenCache}; + use crate::RetryConfig; + + /// A [`CredentialProvider`] that uses [`HttpClient`] to fetch temporary tokens + #[derive(Debug)] + pub(crate) struct TokenCredentialProvider { + inner: T, + client: HttpClient, + retry: RetryConfig, + cache: TokenCache>, + } + + impl TokenCredentialProvider { + pub(crate) fn new(inner: T, client: HttpClient, retry: RetryConfig) -> Self { + Self { + inner, + client, + retry, + cache: Default::default(), + } + } + + /// Override the minimum remaining TTL for a cached token to be used + #[cfg(any(feature = "aws", feature = "gcp"))] + pub(crate) fn with_min_ttl(mut self, min_ttl: Duration) -> Self { + self.cache = self.cache.with_min_ttl(min_ttl); + self + } + } + + #[async_trait] + impl CredentialProvider for TokenCredentialProvider { + type Credential = T::Credential; + + async fn get_credential(&self) -> Result> { + self.cache + .get_or_insert_with(|| self.inner.fetch_token(&self.client, &self.retry)) + .await + } + } + + #[async_trait] + pub(crate) trait TokenProvider: std::fmt::Debug + Send + Sync { + type Credential: std::fmt::Debug + Send + Sync; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> Result>>; + } +} + +use crate::client::builder::HttpRequestBuilder; +#[cfg(any(feature = "aws", feature = "azure", feature = "gcp"))] +pub(crate) use cloud::*; + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn client_test_config_from_map() { + let allow_http = "true".to_string(); + let allow_invalid_certificates = "false".to_string(); + let connect_timeout = "90 seconds".to_string(); + let default_content_type = "object_store:fake_default_content_type".to_string(); + let http1_only = "true".to_string(); + let http2_only = "false".to_string(); + let http2_keep_alive_interval = "90 seconds".to_string(); + let http2_keep_alive_timeout = "91 seconds".to_string(); + let http2_keep_alive_while_idle = "92 seconds".to_string(); + let http2_max_frame_size = "1337".to_string(); + let pool_idle_timeout = "93 seconds".to_string(); + let pool_max_idle_per_host = "94".to_string(); + let proxy_url = "https://fake_proxy_url".to_string(); + let timeout = "95 seconds".to_string(); + let user_agent = "object_store:fake_user_agent".to_string(); + + let options = HashMap::from([ + ("allow_http", allow_http.clone()), + ( + "allow_invalid_certificates", + allow_invalid_certificates.clone(), + ), + ("connect_timeout", connect_timeout.clone()), + ("default_content_type", default_content_type.clone()), + ("http1_only", http1_only.clone()), + ("http2_only", http2_only.clone()), + ( + "http2_keep_alive_interval", + http2_keep_alive_interval.clone(), + ), + ("http2_keep_alive_timeout", http2_keep_alive_timeout.clone()), + ( + "http2_keep_alive_while_idle", + http2_keep_alive_while_idle.clone(), + ), + ("http2_max_frame_size", http2_max_frame_size.clone()), + ("pool_idle_timeout", pool_idle_timeout.clone()), + ("pool_max_idle_per_host", pool_max_idle_per_host.clone()), + ("proxy_url", proxy_url.clone()), + ("timeout", timeout.clone()), + ("user_agent", user_agent.clone()), + ]); + + let builder = options + .into_iter() + .fold(ClientOptions::new(), |builder, (key, value)| { + builder.with_config(key.parse().unwrap(), value) + }); + + assert_eq!( + builder + .get_config_value(&ClientConfigKey::AllowHttp) + .unwrap(), + allow_http + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::AllowInvalidCertificates) + .unwrap(), + allow_invalid_certificates + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::ConnectTimeout) + .unwrap(), + connect_timeout + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::DefaultContentType) + .unwrap(), + default_content_type + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http1Only) + .unwrap(), + http1_only + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http2Only) + .unwrap(), + http2_only + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http2KeepAliveInterval) + .unwrap(), + http2_keep_alive_interval + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http2KeepAliveTimeout) + .unwrap(), + http2_keep_alive_timeout + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http2KeepAliveWhileIdle) + .unwrap(), + http2_keep_alive_while_idle + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::Http2MaxFrameSize) + .unwrap(), + http2_max_frame_size + ); + + assert_eq!( + builder + .get_config_value(&ClientConfigKey::PoolIdleTimeout) + .unwrap(), + pool_idle_timeout + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::PoolMaxIdlePerHost) + .unwrap(), + pool_max_idle_per_host + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::ProxyUrl) + .unwrap(), + proxy_url + ); + assert_eq!( + builder.get_config_value(&ClientConfigKey::Timeout).unwrap(), + timeout + ); + assert_eq!( + builder + .get_config_value(&ClientConfigKey::UserAgent) + .unwrap(), + user_agent + ); + } +} diff --git a/rust/object_store/src/client/pagination.rs b/rust/object_store/src/client/pagination.rs new file mode 100644 index 0000000000..d789c7431d --- /dev/null +++ b/rust/object_store/src/client/pagination.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::Result; +use futures::Stream; +use std::future::Future; + +/// Takes a paginated operation `op` that when called with: +/// +/// - A state `S` +/// - An optional next token `Option` +/// +/// Returns +/// +/// - A response value `T` +/// - The next state `S` +/// - The next continuation token `Option` +/// +/// And converts it into a `Stream>` which will first call `op(state, None)`, and yield +/// the returned response `T`. If the returned continuation token was `None` the stream will then +/// finish, otherwise it will continue to call `op(state, token)` with the values returned by the +/// previous call to `op`, until a continuation token of `None` is returned +/// +pub(crate) fn stream_paginated( + client: C, + state: S, + op: F, +) -> impl Stream> +where + C: Clone, + F: Fn(C, S, Option) -> Fut + Copy, + Fut: Future)>>, +{ + enum PaginationState { + Start(T), + HasMore(T, String), + Done, + } + + futures::stream::unfold(PaginationState::Start(state), move |state| { + let client = client.clone(); + async move { + let (s, page_token) = match state { + PaginationState::Start(s) => (s, None), + PaginationState::HasMore(s, page_token) if !page_token.is_empty() => { + (s, Some(page_token)) + } + _ => { + return None; + } + }; + + let (resp, s, continuation) = match op(client, s, page_token).await { + Ok(resp) => resp, + Err(e) => return Some((Err(e), PaginationState::Done)), + }; + + let next_state = match continuation { + Some(token) => PaginationState::HasMore(s, token), + None => PaginationState::Done, + }; + + Some((Ok(resp), next_state)) + } + }) +} diff --git a/rust/object_store/src/client/parts.rs b/rust/object_store/src/client/parts.rs new file mode 100644 index 0000000000..9fc301edcf --- /dev/null +++ b/rust/object_store/src/client/parts.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::multipart::PartId; +use parking_lot::Mutex; + +/// An interior mutable collection of upload parts and their corresponding part index +#[derive(Debug, Default)] +pub(crate) struct Parts(Mutex>); + +impl Parts { + /// Record the [`PartId`] for a given index + /// + /// Note: calling this method multiple times with the same `part_idx` + /// will result in multiple [`PartId`] in the final output + pub(crate) fn put(&self, part_idx: usize, id: PartId) { + self.0.lock().push((part_idx, id)) + } + + /// Produce the final list of [`PartId`] ordered by `part_idx` + /// + /// `expected` is the number of parts expected in the final result + pub(crate) fn finish(&self, expected: usize) -> crate::Result> { + let mut parts = self.0.lock(); + if parts.len() != expected { + return Err(crate::Error::Generic { + store: "Parts", + source: "Missing part".to_string().into(), + }); + } + parts.sort_unstable_by_key(|(idx, _)| *idx); + Ok(parts.drain(..).map(|(_, v)| v).collect()) + } +} diff --git a/rust/object_store/src/client/retry.rs b/rust/object_store/src/client/retry.rs new file mode 100644 index 0000000000..c5341c2446 --- /dev/null +++ b/rust/object_store/src/client/retry.rs @@ -0,0 +1,913 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`RetryConfig`] connection retry policy + +//! File has been modified to include some backtrace and debugging information on send() failures. + +use crate::client::backoff::{Backoff, BackoffConfig}; +use crate::client::builder::HttpRequestBuilder; +use crate::client::{HttpClient, HttpError, HttpErrorKind, HttpRequest, HttpResponse}; +use crate::PutPayload; +use futures::future::BoxFuture; +use http::{Method, Uri}; +use reqwest::header::LOCATION; +use reqwest::StatusCode; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +use std::time::{Duration, Instant}; +use tracing::{error, info}; +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +use web_time::{Duration, Instant}; + +/// Retry request error +#[derive(Debug)] +pub struct RetryError(Box); + +/// Box error to avoid large error variant +#[derive(Debug)] +struct RetryErrorImpl { + method: Method, + uri: Option, + retries: usize, + max_retries: usize, + elapsed: Duration, + retry_timeout: Duration, + inner: RequestError, +} + +impl std::fmt::Display for RetryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Error performing {} ", self.0.method)?; + match &self.0.uri { + Some(uri) => write!(f, "{uri} ")?, + None => write!(f, "REDACTED ")?, + } + write!(f, "in {:?}", self.0.elapsed)?; + if self.0.retries != 0 { + write!( + f, + ", after {} retries, max_retries: {}, retry_timeout: {:?} ", + self.0.retries, self.0.max_retries, self.0.retry_timeout + )?; + } + write!(f, " - {}", self.0.inner) + } +} + +impl std::error::Error for RetryError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(&self.0.inner) + } +} + +/// Context of the retry loop +/// +/// Most use-cases should use [`RetryExt`] and [`RetryableRequestBuilder`], however, +/// [`RetryContext`] allows preserving retry state across multiple [`RetryableRequest`] +pub(crate) struct RetryContext { + backoff: Backoff, + retries: usize, + max_retries: usize, + retry_timeout: Duration, + start: Instant, +} + +impl RetryContext { + pub(crate) fn new(config: &RetryConfig) -> Self { + Self { + max_retries: config.max_retries, + retry_timeout: config.retry_timeout, + backoff: Backoff::new(&config.backoff), + retries: 0, + start: Instant::now(), + } + } + + pub(crate) fn exhausted(&self) -> bool { + self.retries >= self.max_retries || self.start.elapsed() > self.retry_timeout + } + + pub(crate) fn backoff(&mut self) -> Duration { + self.retries += 1; + self.backoff.next() + } +} + +/// The reason a request failed +#[derive(Debug, thiserror::Error)] +pub enum RequestError { + #[error("Received redirect without LOCATION, this normally indicates an incorrectly configured region" + )] + BareRedirect, + + #[error("Server returned non-2xx status code: {status}: {}", body.as_deref().unwrap_or(""))] + Status { + status: StatusCode, + body: Option, + }, + + #[error("Server returned error response: {body}")] + Response { status: StatusCode, body: String }, + + #[error(transparent)] + Http(#[from] HttpError), +} + +impl RetryError { + /// Returns the underlying [`RequestError`] + pub fn inner(&self) -> &RequestError { + &self.0.inner + } + + /// Returns the status code associated with this error if any + pub fn status(&self) -> Option { + match self.inner() { + RequestError::Status { status, .. } | RequestError::Response { status, .. } => { + Some(*status) + } + RequestError::BareRedirect | RequestError::Http(_) => None, + } + } + + /// Returns the error body if any + pub fn body(&self) -> Option<&str> { + match self.inner() { + RequestError::Status { body, .. } => body.as_deref(), + RequestError::Response { body, .. } => Some(body), + RequestError::BareRedirect | RequestError::Http(_) => None, + } + } + + pub fn error(self, store: &'static str, path: String) -> crate::Error { + match self.status() { + Some(StatusCode::NOT_FOUND) => crate::Error::NotFound { + path, + source: Box::new(self), + }, + Some(StatusCode::NOT_MODIFIED) => crate::Error::NotModified { + path, + source: Box::new(self), + }, + Some(StatusCode::PRECONDITION_FAILED) => crate::Error::Precondition { + path, + source: Box::new(self), + }, + Some(StatusCode::CONFLICT) => crate::Error::AlreadyExists { + path, + source: Box::new(self), + }, + Some(StatusCode::FORBIDDEN) => crate::Error::PermissionDenied { + path, + source: Box::new(self), + }, + Some(StatusCode::UNAUTHORIZED) => crate::Error::Unauthenticated { + path, + source: Box::new(self), + }, + _ => crate::Error::Generic { + store, + source: Box::new(self), + }, + } + } +} + +impl From for std::io::Error { + fn from(err: RetryError) -> Self { + use std::io::ErrorKind; + let kind = match err.status() { + Some(StatusCode::NOT_FOUND) => ErrorKind::NotFound, + Some(StatusCode::BAD_REQUEST) => ErrorKind::InvalidInput, + Some(StatusCode::UNAUTHORIZED) | Some(StatusCode::FORBIDDEN) => { + ErrorKind::PermissionDenied + } + _ => match err.inner() { + RequestError::Http(h) => match h.kind() { + HttpErrorKind::Timeout => ErrorKind::TimedOut, + HttpErrorKind::Connect => ErrorKind::NotConnected, + _ => ErrorKind::Other, + }, + _ => ErrorKind::Other, + }, + }; + Self::new(kind, err) + } +} + +pub(crate) type Result = std::result::Result; + +/// The configuration for how to respond to request errors +/// +/// The following categories of error will be retried: +/// +/// * 5xx server errors +/// * Connection errors +/// * Dropped connections +/// * Timeouts for [safe] / read-only requests +/// +/// Requests will be retried up to some limit, using exponential +/// backoff with jitter. See [`BackoffConfig`] for more information +/// +/// [safe]: https://datatracker.ietf.org/doc/html/rfc7231#section-4.2.1 +#[derive(Debug, Clone)] +pub struct RetryConfig { + /// The backoff configuration + pub backoff: BackoffConfig, + + /// The maximum number of times to retry a request + /// + /// Set to 0 to disable retries + pub max_retries: usize, + + /// The maximum length of time from the initial request + /// after which no further retries will be attempted + /// + /// This not only bounds the length of time before a server + /// error will be surfaced to the application, but also bounds + /// the length of time a request's credentials must remain valid. + /// + /// As requests are retried without renewing credentials or + /// regenerating request payloads, this number should be kept + /// below 5 minutes to avoid errors due to expired credentials + /// and/or request payloads + pub retry_timeout: Duration, +} + +impl Default for RetryConfig { + fn default() -> Self { + Self { + backoff: Default::default(), + max_retries: 10, + retry_timeout: Duration::from_secs(3 * 60), + } + } +} + +fn body_contains_error(response_body: &str) -> bool { + response_body.contains("InternalError") || response_body.contains("SlowDown") +} + +/// Combines a [`RetryableRequest`] with a [`RetryContext`] +pub(crate) struct RetryableRequestBuilder { + request: RetryableRequest, + context: RetryContext, +} + +impl RetryableRequestBuilder { + /// Set whether this request is idempotent + /// + /// An idempotent request will be retried on timeout even if the request + /// method is not [safe](https://datatracker.ietf.org/doc/html/rfc7231#section-4.2.1) + pub(crate) fn idempotent(mut self, idempotent: bool) -> Self { + self.request.idempotent = Some(idempotent); + self + } + + /// Set whether this request should be retried on a 409 Conflict response. + #[cfg(feature = "aws")] + pub(crate) fn retry_on_conflict(mut self, retry_on_conflict: bool) -> Self { + self.request.retry_on_conflict = retry_on_conflict; + self + } + + /// Set whether this request contains sensitive data + /// + /// This will avoid printing out the URL in error messages + #[allow(unused)] + pub(crate) fn sensitive(mut self, sensitive: bool) -> Self { + self.request.sensitive = sensitive; + self + } + + /// Provide a [`PutPayload`] + pub(crate) fn payload(mut self, payload: Option) -> Self { + self.request.payload = payload; + self + } + + #[allow(unused)] + pub(crate) fn retry_error_body(mut self, retry_error_body: bool) -> Self { + self.request.retry_error_body = retry_error_body; + self + } + + pub(crate) async fn send(mut self) -> Result { + self.request.send(&mut self.context).await + } +} + +/// A retryable request +pub(crate) struct RetryableRequest { + client: HttpClient, + http: HttpRequest, + + sensitive: bool, + idempotent: Option, + retry_on_conflict: bool, + payload: Option, + + retry_error_body: bool, +} + +impl RetryableRequest { + #[allow(unused)] + pub(crate) fn sensitive(self, sensitive: bool) -> Self { + Self { sensitive, ..self } + } + + fn err(&self, error: RequestError, ctx: &RetryContext) -> RetryError { + RetryError(Box::new(RetryErrorImpl { + uri: (!self.sensitive).then(|| self.http.uri().clone()), + method: self.http.method().clone(), + retries: ctx.retries, + max_retries: ctx.max_retries, + elapsed: ctx.start.elapsed(), + retry_timeout: ctx.retry_timeout, + inner: error, + })) + } + + pub(crate) async fn send(self, ctx: &mut RetryContext) -> Result { + loop { + let mut request = self.http.clone(); + let reporting_request = request.clone(); + if let Some(payload) = &self.payload { + *request.body_mut() = payload.clone().into(); + } + match self.client.execute(request).await { + Ok(r) => { + let status = r.status(); + if status.is_success() { + // For certain S3 requests, 200 response may contain `InternalError` or + // `SlowDown` in the message. These responses should be handled similarly + // to r5xx errors. + // More info here: https://repost.aws/knowledge-center/s3-resolve-200-internalerror + if !self.retry_error_body { + return Ok(r); + } + let (parts, body) = r.into_parts(); + let body = match body.text().await { + Ok(body) => body, + Err(e) => { + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse\n Parts:\n{parts:?}\n Error:{e:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + return Err(self.err(RequestError::Http(e), ctx)); + } + }; + + if !body_contains_error(&body) { + // Success response and no error, clone and return response + return Ok(HttpResponse::from_parts(parts, body.into())); + } else { + // Retry as if this was a 5xx response + if ctx.exhausted() { + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse\n Parts:\n{parts:?}\n Body:{body:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + return Err(self.err(RequestError::Response { body, status }, ctx)); + } + + let sleep = ctx.backoff(); + info!( + "Encountered a response status of {} but body contains Error, backing off for {} seconds, retry {} of {}", + status, + sleep.as_secs_f32(), + ctx.retries, + ctx.max_retries, + ); + tokio::time::sleep(sleep).await; + } + } else if status == StatusCode::NOT_MODIFIED { + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse:\n{r:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + return Err(self.err(RequestError::Status { status, body: None }, ctx)); + } else if status.is_redirection() { + let is_bare_redirect = !r.headers().contains_key(LOCATION); + return match is_bare_redirect { + true => { + error!( + "ERROR: Request:\n{reporting_request:?}\n\nResponse:\n{r:?}" + ); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + Err(self.err(RequestError::BareRedirect, ctx)) + } + false => { + error!( + "ERROR: Request:\n{reporting_request:?}\n\nResponse:\n{r:?}" + ); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + Err(self.err( + RequestError::Status { + body: None, + status: r.status(), + }, + ctx, + )) + } + }; + } else { + let status = r.status(); + if ctx.exhausted() + || !(status.is_server_error() + || status == StatusCode::TOO_MANY_REQUESTS + || status == StatusCode::REQUEST_TIMEOUT + || (self.retry_on_conflict && status == StatusCode::CONFLICT)) + { + let (parts, body) = r.into_parts(); + let source = match status.is_client_error() { + true => match body.text().await { + Ok(body) => RequestError::Status { + status, + body: Some(body), + }, + Err(e) => { + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse\n Parts:\n{parts:?}\n Error:\n{e:?}"); + error!( + "BT: {}", + std::backtrace::Backtrace::force_capture() + ); + RequestError::Http(e) + } + }, + false => RequestError::Status { status, body: None }, + }; + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse\n Parts:\n{parts:?}\n\n Error:{source:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + return Err(self.err(source, ctx)); + }; + + let sleep = ctx.backoff(); + info!( + "Encountered server error with status {}, backing off for {} seconds, retry {} of {}", + status, + sleep.as_secs_f32(), + ctx.retries, + ctx.max_retries, + ); + tokio::time::sleep(sleep).await; + } + } + Err(e) => { + let is_idempotent = self + .idempotent + .unwrap_or_else(|| self.http.method().is_safe()); + + let do_retry = match e.kind() { + HttpErrorKind::Connect | HttpErrorKind::Request => true, // Request not sent, can retry + HttpErrorKind::Timeout | HttpErrorKind::Interrupted => is_idempotent, + HttpErrorKind::Unknown | HttpErrorKind::Decode => false, + }; + + if ctx.exhausted() || !do_retry { + error!("ERROR: Request:\n{reporting_request:?}\n\nResponse\n Error:{e:?}"); + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + return Err(self.err(RequestError::Http(e), ctx)); + } + let sleep = ctx.backoff(); + info!( + "Encountered transport error of kind {:?}, backing off for {} seconds, retry {} of {}: {}", + e.kind(), + sleep.as_secs_f32(), + ctx.retries, + ctx.max_retries, + e, + ); + tokio::time::sleep(sleep).await; + } + } + } + } +} + +pub(crate) trait RetryExt { + /// Return a [`RetryableRequestBuilder`] + fn retryable(self, config: &RetryConfig) -> RetryableRequestBuilder; + + /// Return a [`RetryableRequest`] + fn retryable_request(self) -> RetryableRequest; + + /// Dispatch a request with the given retry configuration + /// + /// # Panic + /// + /// This will panic if the request body is a stream + fn send_retry(self, config: &RetryConfig) -> BoxFuture<'static, Result>; +} + +impl RetryExt for HttpRequestBuilder { + fn retryable(self, config: &RetryConfig) -> RetryableRequestBuilder { + RetryableRequestBuilder { + request: self.retryable_request(), + context: RetryContext::new(config), + } + } + + fn retryable_request(self) -> RetryableRequest { + let (client, request) = self.into_parts(); + let request = request.expect("request must be valid"); + + RetryableRequest { + client, + http: request, + idempotent: None, + payload: None, + sensitive: false, + retry_on_conflict: false, + retry_error_body: false, + } + } + + fn send_retry(self, config: &RetryConfig) -> BoxFuture<'static, Result> { + let request = self.retryable(config); + Box::pin(async move { request.send().await }) + } +} + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(test)] +mod tests { + use crate::client::mock_server::MockServer; + use crate::client::retry::{body_contains_error, RequestError, RetryContext, RetryExt}; + use crate::client::{HttpClient, HttpResponse}; + use crate::RetryConfig; + use http::StatusCode; + use hyper::header::LOCATION; + use hyper::server::conn::http1; + use hyper::service::service_fn; + use hyper::Response; + use hyper_util::rt::TokioIo; + use reqwest::{Client, Method}; + use std::convert::Infallible; + use std::error::Error; + use std::time::Duration; + use tokio::net::TcpListener; + use tokio::time::timeout; + + #[test] + fn test_body_contains_error() { + // Example error message provided by https://repost.aws/knowledge-center/s3-resolve-200-internalerror + let error_response = "AmazonS3Exception: We encountered an internal error. Please try again. (Service: Amazon S3; Status Code: 200; Error Code: InternalError; Request ID: 0EXAMPLE9AAEB265)"; + assert!(body_contains_error(error_response)); + + let error_response_2 = "SlowDownPlease reduce your request rate.123456"; + assert!(body_contains_error(error_response_2)); + + // Example success response from https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html + let success_response = "2009-10-12T17:50:30.000Z\"9b2cf535f27731c974343645a3985328\""; + assert!(!body_contains_error(success_response)); + } + + #[tokio::test] + async fn test_retry() { + let mock = MockServer::new().await; + + let retry = RetryConfig { + backoff: Default::default(), + max_retries: 2, + retry_timeout: Duration::from_secs(1000), + }; + + let client = HttpClient::new( + Client::builder() + .timeout(Duration::from_millis(100)) + .build() + .unwrap(), + ); + + let do_request = || client.request(Method::GET, mock.url()).send_retry(&retry); + + // Simple request should work + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Returns client errors immediately with a status message + mock.push( + Response::builder() + .status(StatusCode::BAD_REQUEST) + .body("cupcakes".to_string()) + .unwrap(), + ); + + let e = do_request().await.unwrap_err(); + assert_eq!(e.status().unwrap(), StatusCode::BAD_REQUEST); + assert_eq!(e.body(), Some("cupcakes")); + assert_eq!( + e.inner().to_string(), + "Server returned non-2xx status code: 400 Bad Request: cupcakes" + ); + + // Handles client errors with no payload + mock.push( + Response::builder() + .status(StatusCode::BAD_REQUEST) + .body("NAUGHTY NAUGHTY".to_string()) + .unwrap(), + ); + + let e = do_request().await.unwrap_err(); + assert_eq!(e.status().unwrap(), StatusCode::BAD_REQUEST); + assert_eq!(e.body(), Some("NAUGHTY NAUGHTY")); + assert_eq!( + e.inner().to_string(), + "Server returned non-2xx status code: 400 Bad Request: NAUGHTY NAUGHTY" + ); + + // Should retry server error request + mock.push( + Response::builder() + .status(StatusCode::BAD_GATEWAY) + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Should retry 429 Too Many Requests + mock.push( + Response::builder() + .status(StatusCode::TOO_MANY_REQUESTS) + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Should retry 408 Request Timeout + mock.push( + Response::builder() + .status(StatusCode::REQUEST_TIMEOUT) + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Accepts 204 status code + mock.push( + Response::builder() + .status(StatusCode::NO_CONTENT) + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::NO_CONTENT); + + // Follows 402 redirects + mock.push( + Response::builder() + .status(StatusCode::FOUND) + .header(LOCATION, "/foo") + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Follows 401 redirects + mock.push( + Response::builder() + .status(StatusCode::FOUND) + .header(LOCATION, "/bar") + .body(String::new()) + .unwrap(), + ); + + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Handles redirect loop + for _ in 0..11 { + mock.push( + Response::builder() + .status(StatusCode::FOUND) + .header(LOCATION, "/bar") + .body(String::new()) + .unwrap(), + ); + } + + let e = do_request().await.unwrap_err().to_string(); + assert!(e.contains("error following redirect"), "{}", e); + + // Handles redirect missing location + mock.push( + Response::builder() + .status(StatusCode::FOUND) + .body(String::new()) + .unwrap(), + ); + + let e = do_request().await.unwrap_err(); + assert!(matches!(e.inner(), RequestError::BareRedirect)); + assert_eq!(e.inner().to_string(), "Received redirect without LOCATION, this normally indicates an incorrectly configured region"); + + // Gives up after the retrying the specified number of times + for _ in 0..=retry.max_retries { + mock.push( + Response::builder() + .status(StatusCode::BAD_GATEWAY) + .body("ignored".to_string()) + .unwrap(), + ); + } + + let e = do_request().await.unwrap_err(); + assert!( + e.to_string().contains(" after 2 retries, max_retries: 2, retry_timeout: 1000s - Server returned non-2xx status code: 502 Bad Gateway"), + "{e}" + ); + // verify e.source() is available as well for users who need programmatic access + assert_eq!( + e.source().unwrap().to_string(), + "Server returned non-2xx status code: 502 Bad Gateway: ", + ); + + // Panic results in an incomplete message error in the client + mock.push_fn::<_, String>(|_| panic!()); + let r = do_request().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + + // Gives up after retrying multiple panics + for _ in 0..=retry.max_retries { + mock.push_fn::<_, String>(|_| panic!()); + } + let e = do_request().await.unwrap_err(); + assert!( + e.to_string().contains("after 2 retries, max_retries: 2, retry_timeout: 1000s - HTTP error: error sending request"), + "{e}" + ); + // verify e.source() is available as well for users who need programmatic access + assert_eq!( + e.source().unwrap().to_string(), + "HTTP error: error sending request", + ); + + // Retries on client timeout + mock.push_async_fn(|_| async move { + tokio::time::sleep(Duration::from_secs(10)).await; + panic!() + }); + do_request().await.unwrap(); + + // Does not retry PUT request + mock.push_async_fn(|_| async move { + tokio::time::sleep(Duration::from_secs(10)).await; + panic!() + }); + let res = client.request(Method::PUT, mock.url()).send_retry(&retry); + let e = res.await.unwrap_err().to_string(); + assert!( + !e.contains("retries") && e.contains("error sending request"), + "{e}" + ); + + let url = format!("{}/SENSITIVE", mock.url()); + for _ in 0..=retry.max_retries { + mock.push( + Response::builder() + .status(StatusCode::BAD_GATEWAY) + .body("ignored".to_string()) + .unwrap(), + ); + } + let res = client.request(Method::GET, url).send_retry(&retry).await; + let err = res.unwrap_err().to_string(); + assert!(err.contains("SENSITIVE"), "{err}"); + + let url = format!("{}/SENSITIVE", mock.url()); + for _ in 0..=retry.max_retries { + mock.push( + Response::builder() + .status(StatusCode::BAD_GATEWAY) + .body("ignored".to_string()) + .unwrap(), + ); + } + + // Sensitive requests should strip URL from error + let req = client + .request(Method::GET, &url) + .retryable(&retry) + .sensitive(true); + let err = req.send().await.unwrap_err().to_string(); + assert!(!err.contains("SENSITIVE"), "{err}"); + + for _ in 0..=retry.max_retries { + mock.push_fn::<_, String>(|_| panic!()); + } + + let req = client + .request(Method::GET, &url) + .retryable(&retry) + .sensitive(true); + let err = req.send().await.unwrap_err().to_string(); + assert!(!err.contains("SENSITIVE"), "{err}"); + + // Success response with error in body is retried + mock.push( + Response::builder() + .status(StatusCode::OK) + .body("InternalError".to_string()) + .unwrap(), + ); + let req = client + .request(Method::PUT, &url) + .retryable(&retry) + .idempotent(true) + .retry_error_body(true); + let r = req.send().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + // Response with InternalError should have been retried + let b = r.into_body().text().await.unwrap(); + assert!(!b.contains("InternalError")); + + // Should not retry success response with no error in body + mock.push( + Response::builder() + .status(StatusCode::OK) + .body("success".to_string()) + .unwrap(), + ); + let req = client + .request(Method::PUT, &url) + .retryable(&retry) + .idempotent(true) + .retry_error_body(true); + let r = req.send().await.unwrap(); + assert_eq!(r.status(), StatusCode::OK); + let b = r.into_body().text().await.unwrap(); + assert!(b.contains("success")); + + // Shutdown + mock.shutdown().await + } + + #[tokio::test] + async fn test_connection_reset_is_retried() { + let retry = RetryConfig { + backoff: Default::default(), + max_retries: 2, + retry_timeout: Duration::from_secs(1), + }; + assert!(retry.max_retries > 0); + + // Setup server which resets a connection and then quits + let listener = TcpListener::bind("::1:0").await.unwrap(); + let url = format!("http://{}", listener.local_addr().unwrap()); + let handle = tokio::spawn(async move { + // Reset the connection on the first n-1 attempts + for _ in 0..retry.max_retries { + let (stream, _) = listener.accept().await.unwrap(); + stream.set_linger(Some(Duration::from_secs(0))).unwrap(); + } + // Succeed on the last attempt + let (stream, _) = listener.accept().await.unwrap(); + http1::Builder::new() + // we want the connection to end after responding + .keep_alive(false) + .serve_connection( + TokioIo::new(stream), + service_fn(move |_req| async { + Ok::<_, Infallible>(HttpResponse::new("Success!".to_string().into())) + }), + ) + .await + .unwrap(); + }); + + // Perform the request + let client = HttpClient::new(reqwest::Client::new()); + let ctx = &mut RetryContext::new(&retry); + let res = client + .get(url) + .retryable_request() + .send(ctx) + .await + .expect("request should eventually succeed"); + assert_eq!(res.status(), StatusCode::OK); + assert!(ctx.exhausted()); + + // Wait for server to shutdown + let _ = timeout(Duration::from_secs(1), handle) + .await + .expect("shutdown shouldn't hang"); + } +} diff --git a/rust/object_store/src/client/s3.rs b/rust/object_store/src/client/s3.rs new file mode 100644 index 0000000000..a2221fbbc1 --- /dev/null +++ b/rust/object_store/src/client/s3.rs @@ -0,0 +1,157 @@ +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The list and multipart API used by both GCS and S3 + +use crate::multipart::PartId; +use crate::path::Path; +use crate::{ListResult, ObjectMeta, Result}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct ListResponse { + #[serde(default)] + pub contents: Vec, + #[serde(default)] + pub common_prefixes: Vec, + #[serde(default)] + pub next_continuation_token: Option, +} + +impl TryFrom for ListResult { + type Error = crate::Error; + + fn try_from(value: ListResponse) -> Result { + let common_prefixes = value + .common_prefixes + .into_iter() + .map(|x| Ok(Path::parse(x.prefix)?)) + .collect::>()?; + + let objects = value + .contents + .into_iter() + .map(TryFrom::try_from) + .collect::>()?; + + Ok(Self { + common_prefixes, + objects, + }) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct ListPrefix { + pub prefix: String, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct ListContents { + pub key: String, + pub size: u64, + pub last_modified: DateTime, + #[serde(rename = "ETag")] + pub e_tag: Option, +} + +impl TryFrom for ObjectMeta { + type Error = crate::Error; + + fn try_from(value: ListContents) -> Result { + Ok(Self { + location: Path::parse(value.key)?, + last_modified: value.last_modified, + size: value.size, + e_tag: value.e_tag, + version: None, + }) + } +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct InitiateMultipartUploadResult { + pub upload_id: String, +} + +#[cfg(feature = "aws")] +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct CopyPartResult { + #[serde(rename = "ETag")] + pub e_tag: String, +} + +#[derive(Debug, Serialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct CompleteMultipartUpload { + pub part: Vec, +} + +#[derive(Serialize, Deserialize)] +pub(crate) struct PartMetadata { + pub e_tag: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub checksum_sha256: Option, +} + +impl From> for CompleteMultipartUpload { + fn from(value: Vec) -> Self { + let part = value + .into_iter() + .enumerate() + .map(|(part_idx, part)| { + let md = match quick_xml::de::from_str::(&part.content_id) { + Ok(md) => md, + // fallback to old way + Err(_) => PartMetadata { + e_tag: part.content_id.clone(), + checksum_sha256: None, + }, + }; + MultipartPart { + e_tag: md.e_tag, + part_number: part_idx + 1, + checksum_sha256: md.checksum_sha256, + } + }) + .collect(); + Self { part } + } +} + +#[derive(Debug, Serialize)] +pub(crate) struct MultipartPart { + #[serde(rename = "ETag")] + pub e_tag: String, + #[serde(rename = "PartNumber")] + pub part_number: usize, + #[serde(rename = "ChecksumSHA256")] + #[serde(skip_serializing_if = "Option::is_none")] + pub checksum_sha256: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct CompleteMultipartUploadResult { + #[serde(rename = "ETag")] + pub e_tag: String, +} diff --git a/rust/object_store/src/client/token.rs b/rust/object_store/src/client/token.rs new file mode 100644 index 0000000000..81ffc110ac --- /dev/null +++ b/rust/object_store/src/client/token.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::future::Future; +use std::time::{Duration, Instant}; +use tokio::sync::Mutex; + +/// A temporary authentication token with an associated expiry +#[derive(Debug, Clone)] +pub(crate) struct TemporaryToken { + /// The temporary credential + pub token: T, + /// The instant at which this credential is no longer valid + /// None means the credential does not expire + pub expiry: Option, +} + +/// Provides [`TokenCache::get_or_insert_with`] which can be used to cache a +/// [`TemporaryToken`] based on its expiry +#[derive(Debug)] +pub(crate) struct TokenCache { + cache: Mutex, Instant)>>, + min_ttl: Duration, + fetch_backoff: Duration, +} + +impl Default for TokenCache { + fn default() -> Self { + Self { + cache: Default::default(), + min_ttl: Duration::from_secs(300), + // How long to wait before re-attempting a token fetch after receiving one that + // is still within the min-ttl + fetch_backoff: Duration::from_millis(100), + } + } +} + +impl TokenCache { + /// Override the minimum remaining TTL for a cached token to be used + #[cfg(any(feature = "aws", feature = "gcp"))] + pub(crate) fn with_min_ttl(self, min_ttl: Duration) -> Self { + Self { min_ttl, ..self } + } + + pub(crate) async fn get_or_insert_with(&self, f: F) -> Result + where + F: FnOnce() -> Fut + Send, + Fut: Future, E>> + Send, + { + let now = Instant::now(); + let mut locked = self.cache.lock().await; + + if let Some((cached, fetched_at)) = locked.as_ref() { + match cached.expiry { + Some(ttl) => { + if ttl.checked_duration_since(now).unwrap_or_default() > self.min_ttl || + // if we've recently attempted to fetch this token and it's not actually + // expired, we'll wait to re-fetch it and return the cached one + (fetched_at.elapsed() < self.fetch_backoff && ttl.checked_duration_since(now).is_some()) + { + return Ok(cached.token.clone()); + } + } + None => return Ok(cached.token.clone()), + } + } + + let cached = f().await?; + let token = cached.token.clone(); + *locked = Some((cached, Instant::now())); + + Ok(token) + } +} + +#[cfg(test)] +mod test { + use crate::client::token::{TemporaryToken, TokenCache}; + use std::sync::atomic::{AtomicU32, Ordering}; + use std::time::{Duration, Instant}; + + // Helper function to create a token with a specific expiry duration from now + fn create_token(expiry_duration: Option) -> TemporaryToken { + TemporaryToken { + token: "test_token".to_string(), + expiry: expiry_duration.map(|d| Instant::now() + d), + } + } + + #[tokio::test] + async fn test_expired_token_is_refreshed() { + let cache = TokenCache::default(); + static COUNTER: AtomicU32 = AtomicU32::new(0); + + async fn get_token() -> Result, String> { + COUNTER.fetch_add(1, Ordering::SeqCst); + Ok::<_, String>(create_token(Some(Duration::from_secs(0)))) + } + + // Should fetch initial token + let _ = cache.get_or_insert_with(get_token).await.unwrap(); + assert_eq!(COUNTER.load(Ordering::SeqCst), 1); + + tokio::time::sleep(Duration::from_millis(2)).await; + + // Token is expired, so should fetch again + let _ = cache.get_or_insert_with(get_token).await.unwrap(); + assert_eq!(COUNTER.load(Ordering::SeqCst), 2); + } + + #[tokio::test] + async fn test_min_ttl_causes_refresh() { + let cache = TokenCache { + cache: Default::default(), + min_ttl: Duration::from_secs(1), + fetch_backoff: Duration::from_millis(1), + }; + + static COUNTER: AtomicU32 = AtomicU32::new(0); + + async fn get_token() -> Result, String> { + COUNTER.fetch_add(1, Ordering::SeqCst); + Ok::<_, String>(create_token(Some(Duration::from_millis(100)))) + } + + // Initial fetch + let _ = cache.get_or_insert_with(get_token).await.unwrap(); + assert_eq!(COUNTER.load(Ordering::SeqCst), 1); + + // Should not fetch again since not expired and within fetch_backoff + let _ = cache.get_or_insert_with(get_token).await.unwrap(); + assert_eq!(COUNTER.load(Ordering::SeqCst), 1); + + tokio::time::sleep(Duration::from_millis(2)).await; + + // Should fetch, since we've passed fetch_backoff + let _ = cache.get_or_insert_with(get_token).await.unwrap(); + assert_eq!(COUNTER.load(Ordering::SeqCst), 2); + } +} diff --git a/rust/object_store/src/config.rs b/rust/object_store/src/config.rs new file mode 100644 index 0000000000..29a389d4e3 --- /dev/null +++ b/rust/object_store/src/config.rs @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::fmt::{Debug, Display, Formatter}; +use std::str::FromStr; +use std::time::Duration; + +use humantime::{format_duration, parse_duration}; +use reqwest::header::HeaderValue; + +use crate::{Error, Result}; + +/// Provides deferred parsing of a value +/// +/// This allows builders to defer fallibility to build +#[derive(Debug, Clone)] +pub(crate) enum ConfigValue { + Parsed(T), + Deferred(String), +} + +impl Display for ConfigValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Parsed(v) => write!(f, "{v}"), + Self::Deferred(v) => write!(f, "{v}"), + } + } +} + +impl From for ConfigValue { + fn from(value: T) -> Self { + Self::Parsed(value) + } +} + +impl ConfigValue { + pub(crate) fn parse(&mut self, v: impl Into) { + *self = Self::Deferred(v.into()) + } + + pub(crate) fn get(&self) -> Result { + match self { + Self::Parsed(v) => Ok(v.clone()), + Self::Deferred(v) => T::parse(v), + } + } +} + +impl Default for ConfigValue { + fn default() -> Self { + Self::Parsed(T::default()) + } +} + +/// A value that can be stored in [`ConfigValue`] +pub(crate) trait Parse: Sized { + fn parse(v: &str) -> Result; +} + +impl Parse for bool { + fn parse(v: &str) -> Result { + let lower = v.to_ascii_lowercase(); + match lower.as_str() { + "1" | "true" | "on" | "yes" | "y" => Ok(true), + "0" | "false" | "off" | "no" | "n" => Ok(false), + _ => Err(Error::Generic { + store: "Config", + source: format!("failed to parse \"{v}\" as boolean").into(), + }), + } + } +} + +impl Parse for Duration { + fn parse(v: &str) -> Result { + parse_duration(v).map_err(|_| Error::Generic { + store: "Config", + source: format!("failed to parse \"{v}\" as Duration").into(), + }) + } +} + +impl Parse for usize { + fn parse(v: &str) -> Result { + Self::from_str(v).map_err(|_| Error::Generic { + store: "Config", + source: format!("failed to parse \"{v}\" as usize").into(), + }) + } +} + +impl Parse for u32 { + fn parse(v: &str) -> Result { + Self::from_str(v).map_err(|_| Error::Generic { + store: "Config", + source: format!("failed to parse \"{v}\" as u32").into(), + }) + } +} + +impl Parse for HeaderValue { + fn parse(v: &str) -> Result { + Self::from_str(v).map_err(|_| Error::Generic { + store: "Config", + source: format!("failed to parse \"{v}\" as HeaderValue").into(), + }) + } +} + +pub(crate) fn fmt_duration(duration: &ConfigValue) -> String { + match duration { + ConfigValue::Parsed(v) => format_duration(*v).to_string(), + ConfigValue::Deferred(v) => v.clone(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn test_parse_duration() { + let duration = Duration::from_secs(60); + assert_eq!(Duration::parse("60 seconds").unwrap(), duration); + assert_eq!(Duration::parse("60 s").unwrap(), duration); + assert_eq!(Duration::parse("60s").unwrap(), duration) + } +} diff --git a/rust/object_store/src/delimited.rs b/rust/object_store/src/delimited.rs new file mode 100644 index 0000000000..0994b4f299 --- /dev/null +++ b/rust/object_store/src/delimited.rs @@ -0,0 +1,272 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utility for streaming newline delimited files from object storage + +use std::collections::VecDeque; + +use bytes::Bytes; +use futures::{Stream, StreamExt}; + +use super::Result; + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("encountered unterminated string")] + UnterminatedString, + + #[error("encountered trailing escape character")] + TrailingEscape, +} + +impl From for super::Error { + fn from(err: Error) -> Self { + Self::Generic { + store: "LineDelimiter", + source: Box::new(err), + } + } +} + +/// The ASCII encoding of `"` +const QUOTE: u8 = b'"'; + +/// The ASCII encoding of `\n` +const NEWLINE: u8 = b'\n'; + +/// The ASCII encoding of `\` +const ESCAPE: u8 = b'\\'; + +/// [`LineDelimiter`] is provided with a stream of [`Bytes`] and returns an iterator +/// of [`Bytes`] containing a whole number of new line delimited records +#[derive(Debug, Default)] +struct LineDelimiter { + /// Complete chunks of [`Bytes`] + complete: VecDeque, + /// Remainder bytes that form the next record + remainder: Vec, + /// True if the last character was the escape character + is_escape: bool, + /// True if currently processing a quoted string + is_quote: bool, +} + +impl LineDelimiter { + /// Creates a new [`LineDelimiter`] with the provided delimiter + fn new() -> Self { + Self::default() + } + + /// Adds the next set of [`Bytes`] + fn push(&mut self, val: impl Into) { + let val: Bytes = val.into(); + + let is_escape = &mut self.is_escape; + let is_quote = &mut self.is_quote; + let mut record_ends = val.iter().enumerate().filter_map(|(idx, v)| { + if *is_escape { + *is_escape = false; + None + } else if *v == ESCAPE { + *is_escape = true; + None + } else if *v == QUOTE { + *is_quote = !*is_quote; + None + } else if *is_quote { + None + } else { + (*v == NEWLINE).then_some(idx + 1) + } + }); + + let start_offset = match self.remainder.is_empty() { + true => 0, + false => match record_ends.next() { + Some(idx) => { + self.remainder.extend_from_slice(&val[0..idx]); + self.complete + .push_back(Bytes::from(std::mem::take(&mut self.remainder))); + idx + } + None => { + self.remainder.extend_from_slice(&val); + return; + } + }, + }; + let end_offset = record_ends.next_back().unwrap_or(start_offset); + if start_offset != end_offset { + self.complete.push_back(val.slice(start_offset..end_offset)); + } + + if end_offset != val.len() { + self.remainder.extend_from_slice(&val[end_offset..]) + } + } + + /// Marks the end of the stream, delimiting any remaining bytes + /// + /// Returns `true` if there is no remaining data to be read + fn finish(&mut self) -> Result { + if !self.remainder.is_empty() { + if self.is_quote { + Err(Error::UnterminatedString)?; + } + if self.is_escape { + Err(Error::TrailingEscape)?; + } + + self.complete + .push_back(Bytes::from(std::mem::take(&mut self.remainder))) + } + Ok(self.complete.is_empty()) + } +} + +impl Iterator for LineDelimiter { + type Item = Bytes; + + fn next(&mut self) -> Option { + self.complete.pop_front() + } +} + +/// Given a [`Stream`] of [`Bytes`] returns a [`Stream`] where each +/// yielded [`Bytes`] contains a whole number of new line delimited records +/// accounting for `\` style escapes and `"` quotes +pub fn newline_delimited_stream(s: S) -> impl Stream> +where + S: Stream> + Unpin, +{ + let delimiter = LineDelimiter::new(); + + futures::stream::unfold( + (s, delimiter, false), + |(mut s, mut delimiter, mut exhausted)| async move { + loop { + if let Some(next) = delimiter.next() { + return Some((Ok(next), (s, delimiter, exhausted))); + } else if exhausted { + return None; + } + + match s.next().await { + Some(Ok(bytes)) => delimiter.push(bytes), + Some(Err(e)) => return Some((Err(e), (s, delimiter, exhausted))), + None => { + exhausted = true; + match delimiter.finish() { + Ok(true) => return None, + Ok(false) => continue, + Err(e) => return Some((Err(e), (s, delimiter, exhausted))), + } + } + } + } + }, + ) +} + +#[cfg(test)] +mod tests { + use futures::stream::{BoxStream, TryStreamExt}; + + use super::*; + + #[test] + fn test_delimiter() { + let mut delimiter = LineDelimiter::new(); + delimiter.push("hello\nworld"); + delimiter.push("\n\n"); + + assert_eq!(delimiter.next().unwrap(), Bytes::from("hello\n")); + assert_eq!(delimiter.next().unwrap(), Bytes::from("world\n")); + assert_eq!(delimiter.next().unwrap(), Bytes::from("\n")); + assert!(delimiter.next().is_none()); + } + + #[test] + fn test_delimiter_escaped() { + let mut delimiter = LineDelimiter::new(); + delimiter.push(""); + delimiter.push("fo\\\n\"foo"); + delimiter.push("bo\n\"bar\n"); + delimiter.push("\"he"); + delimiter.push("llo\"\n"); + assert_eq!( + delimiter.next().unwrap(), + Bytes::from("fo\\\n\"foobo\n\"bar\n") + ); + assert_eq!(delimiter.next().unwrap(), Bytes::from("\"hello\"\n")); + assert!(delimiter.next().is_none()); + + // Verify can push further data + delimiter.push("\"foo\nbar\",\"fiz\\\"inner\\\"\"\nhello"); + assert!(!delimiter.finish().unwrap()); + + assert_eq!( + delimiter.next().unwrap(), + Bytes::from("\"foo\nbar\",\"fiz\\\"inner\\\"\"\n") + ); + assert_eq!(delimiter.next().unwrap(), Bytes::from("hello")); + assert!(delimiter.finish().unwrap()); + assert!(delimiter.next().is_none()); + } + + #[tokio::test] + async fn test_delimiter_stream() { + let input = vec!["hello\nworld\nbin", "go\ncup", "cakes"]; + let input_stream = futures::stream::iter(input.into_iter().map(|s| Ok(Bytes::from(s)))); + let stream = newline_delimited_stream(input_stream); + + let results: Vec<_> = stream.try_collect().await.unwrap(); + assert_eq!( + results, + vec![ + Bytes::from("hello\nworld\n"), + Bytes::from("bingo\n"), + Bytes::from("cupcakes") + ] + ) + } + #[tokio::test] + async fn test_delimiter_unfold_stream() { + let input_stream: BoxStream<'static, Result> = futures::stream::unfold( + VecDeque::from(["hello\nworld\nbin", "go\ncup", "cakes"]), + |mut input| async move { + if !input.is_empty() { + Some((Ok(Bytes::from(input.pop_front().unwrap())), input)) + } else { + None + } + }, + ) + .boxed(); + let stream = newline_delimited_stream(input_stream); + + let results: Vec<_> = stream.try_collect().await.unwrap(); + assert_eq!( + results, + vec![ + Bytes::from("hello\nworld\n"), + Bytes::from("bingo\n"), + Bytes::from("cupcakes") + ] + ) + } +} diff --git a/rust/object_store/src/gcp/builder.rs b/rust/object_store/src/gcp/builder.rs new file mode 100644 index 0000000000..f22d66d2e8 --- /dev/null +++ b/rust/object_store/src/gcp/builder.rs @@ -0,0 +1,741 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::{http_connector, HttpConnector, TokenCredentialProvider}; +use crate::config::ConfigValue; +use crate::gcp::client::{GoogleCloudStorageClient, GoogleCloudStorageConfig}; +use crate::gcp::credential::{ + ApplicationDefaultCredentials, InstanceCredentialProvider, ServiceAccountCredentials, + DEFAULT_GCS_BASE_URL, +}; +use crate::gcp::{ + credential, GcpCredential, GcpCredentialProvider, GcpSigningCredential, + GcpSigningCredentialProvider, GoogleCloudStorage, STORE, +}; +use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider}; +use serde::{Deserialize, Serialize}; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; +use url::Url; + +use super::credential::{AuthorizedUserSigningCredentials, InstanceSigningCredentialProvider}; + +const TOKEN_MIN_TTL: Duration = Duration::from_secs(4 * 60); + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Missing bucket name")] + MissingBucketName {}, + + #[error("One of service account path or service account key may be provided.")] + ServiceAccountPathAndKeyProvided, + + #[error("Unable parse source url. Url: {}, Error: {}", url, source)] + UnableToParseUrl { + source: url::ParseError, + url: String, + }, + + #[error( + "Unknown url scheme cannot be parsed into storage location: {}", + scheme + )] + UnknownUrlScheme { scheme: String }, + + #[error("URL did not match any known pattern for scheme: {}", url)] + UrlNotRecognised { url: String }, + + #[error("Configuration key: '{}' is not known.", key)] + UnknownConfigurationKey { key: String }, + + #[error("GCP credential error: {}", source)] + Credential { source: credential::Error }, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + match err { + Error::UnknownConfigurationKey { key } => { + Self::UnknownConfigurationKey { store: STORE, key } + } + _ => Self::Generic { + store: STORE, + source: Box::new(err), + }, + } + } +} + +/// Configure a connection to Google Cloud Storage. +/// +/// If no credentials are explicitly provided, they will be sourced +/// from the environment as documented [here](https://cloud.google.com/docs/authentication/application-default-credentials). +/// +/// # Example +/// ``` +/// # let BUCKET_NAME = "foo"; +/// # use object_store::gcp::GoogleCloudStorageBuilder; +/// let gcs = GoogleCloudStorageBuilder::from_env().with_bucket_name(BUCKET_NAME).build(); +/// ``` +#[derive(Debug, Clone)] +pub struct GoogleCloudStorageBuilder { + /// Bucket name + bucket_name: Option, + /// Url + url: Option, + /// Path to the service account file + service_account_path: Option, + /// The serialized service account key + service_account_key: Option, + /// Path to the application credentials file. + application_credentials_path: Option, + /// Retry config + retry_config: RetryConfig, + /// Client options + client_options: ClientOptions, + /// Credentials + credentials: Option, + /// Skip signing requests + skip_signature: ConfigValue, + /// Credentials for sign url + signing_credentials: Option, + /// The [`HttpConnector`] to use + http_connector: Option>, +} + +/// Configuration keys for [`GoogleCloudStorageBuilder`] +/// +/// Configuration via keys can be done via [`GoogleCloudStorageBuilder::with_config`] +/// +/// # Example +/// ``` +/// # use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey}; +/// let builder = GoogleCloudStorageBuilder::new() +/// .with_config("google_service_account".parse().unwrap(), "my-service-account") +/// .with_config(GoogleConfigKey::Bucket, "my-bucket"); +/// ``` +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)] +#[non_exhaustive] +pub enum GoogleConfigKey { + /// Path to the service account file + /// + /// Supported keys: + /// - `google_service_account` + /// - `service_account` + /// - `google_service_account_path` + /// - `service_account_path` + ServiceAccount, + + /// The serialized service account key. + /// + /// Supported keys: + /// - `google_service_account_key` + /// - `service_account_key` + ServiceAccountKey, + + /// Bucket name + /// + /// See [`GoogleCloudStorageBuilder::with_bucket_name`] for details. + /// + /// Supported keys: + /// - `google_bucket` + /// - `google_bucket_name` + /// - `bucket` + /// - `bucket_name` + Bucket, + + /// Application credentials path + /// + /// See [`GoogleCloudStorageBuilder::with_application_credentials`]. + ApplicationCredentials, + + /// Skip signing request + SkipSignature, + + /// Client options + Client(ClientConfigKey), +} + +impl AsRef for GoogleConfigKey { + fn as_ref(&self) -> &str { + match self { + Self::ServiceAccount => "google_service_account", + Self::ServiceAccountKey => "google_service_account_key", + Self::Bucket => "google_bucket", + Self::ApplicationCredentials => "google_application_credentials", + Self::SkipSignature => "google_skip_signature", + Self::Client(key) => key.as_ref(), + } + } +} + +impl FromStr for GoogleConfigKey { + type Err = crate::Error; + + fn from_str(s: &str) -> Result { + match s { + "google_service_account" + | "service_account" + | "google_service_account_path" + | "service_account_path" => Ok(Self::ServiceAccount), + "google_service_account_key" | "service_account_key" => Ok(Self::ServiceAccountKey), + "google_bucket" | "google_bucket_name" | "bucket" | "bucket_name" => Ok(Self::Bucket), + "google_application_credentials" | "application_credentials" => { + Ok(Self::ApplicationCredentials) + } + "google_skip_signature" | "skip_signature" => Ok(Self::SkipSignature), + _ => match s.strip_prefix("google_").unwrap_or(s).parse() { + Ok(key) => Ok(Self::Client(key)), + Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()), + }, + } + } +} + +impl Default for GoogleCloudStorageBuilder { + fn default() -> Self { + Self { + bucket_name: None, + service_account_path: None, + service_account_key: None, + application_credentials_path: None, + retry_config: Default::default(), + client_options: ClientOptions::new().with_allow_http(true), + url: None, + credentials: None, + skip_signature: Default::default(), + signing_credentials: None, + http_connector: None, + } + } +} + +impl GoogleCloudStorageBuilder { + /// Create a new [`GoogleCloudStorageBuilder`] with default values. + pub fn new() -> Self { + Default::default() + } + + /// Create an instance of [`GoogleCloudStorageBuilder`] with values pre-populated from environment variables. + /// + /// Variables extracted from environment: + /// * GOOGLE_SERVICE_ACCOUNT: location of service account file + /// * GOOGLE_SERVICE_ACCOUNT_PATH: (alias) location of service account file + /// * SERVICE_ACCOUNT: (alias) location of service account file + /// * GOOGLE_SERVICE_ACCOUNT_KEY: JSON serialized service account key + /// * GOOGLE_BUCKET: bucket name + /// * GOOGLE_BUCKET_NAME: (alias) bucket name + /// + /// # Example + /// ``` + /// use object_store::gcp::GoogleCloudStorageBuilder; + /// + /// let gcs = GoogleCloudStorageBuilder::from_env() + /// .with_bucket_name("foo") + /// .build(); + /// ``` + pub fn from_env() -> Self { + let mut builder = Self::default(); + + if let Ok(service_account_path) = std::env::var("SERVICE_ACCOUNT") { + builder.service_account_path = Some(service_account_path); + } + + for (os_key, os_value) in std::env::vars_os() { + if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { + if key.starts_with("GOOGLE_") { + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + builder = builder.with_config(config_key, value); + } + } + } + } + + builder + } + + /// Parse available connection info form a well-known storage URL. + /// + /// The supported url schemes are: + /// + /// - `gs:///` + /// + /// Note: Settings derived from the URL will override any others set on this builder + /// + /// # Example + /// ``` + /// use object_store::gcp::GoogleCloudStorageBuilder; + /// + /// let gcs = GoogleCloudStorageBuilder::from_env() + /// .with_url("gs://bucket/path") + /// .build(); + /// ``` + pub fn with_url(mut self, url: impl Into) -> Self { + self.url = Some(url.into()); + self + } + + /// Set an option on the builder via a key - value pair. + pub fn with_config(mut self, key: GoogleConfigKey, value: impl Into) -> Self { + match key { + GoogleConfigKey::ServiceAccount => self.service_account_path = Some(value.into()), + GoogleConfigKey::ServiceAccountKey => self.service_account_key = Some(value.into()), + GoogleConfigKey::Bucket => self.bucket_name = Some(value.into()), + GoogleConfigKey::ApplicationCredentials => { + self.application_credentials_path = Some(value.into()) + } + GoogleConfigKey::SkipSignature => self.skip_signature.parse(value), + GoogleConfigKey::Client(key) => { + self.client_options = self.client_options.with_config(key, value) + } + }; + self + } + + /// Get config value via a [`GoogleConfigKey`]. + /// + /// # Example + /// ``` + /// use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey}; + /// + /// let builder = GoogleCloudStorageBuilder::from_env() + /// .with_service_account_key("foo"); + /// let service_account_key = builder.get_config_value(&GoogleConfigKey::ServiceAccountKey).unwrap_or_default(); + /// assert_eq!("foo", &service_account_key); + /// ``` + pub fn get_config_value(&self, key: &GoogleConfigKey) -> Option { + match key { + GoogleConfigKey::ServiceAccount => self.service_account_path.clone(), + GoogleConfigKey::ServiceAccountKey => self.service_account_key.clone(), + GoogleConfigKey::Bucket => self.bucket_name.clone(), + GoogleConfigKey::ApplicationCredentials => self.application_credentials_path.clone(), + GoogleConfigKey::SkipSignature => Some(self.skip_signature.to_string()), + GoogleConfigKey::Client(key) => self.client_options.get_config_value(key), + } + } + + /// Sets properties on this builder based on a URL + /// + /// This is a separate member function to allow fallible computation to + /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`] + fn parse_url(&mut self, url: &str) -> Result<()> { + let parsed = Url::parse(url).map_err(|source| Error::UnableToParseUrl { + source, + url: url.to_string(), + })?; + + let host = parsed.host_str().ok_or_else(|| Error::UrlNotRecognised { + url: url.to_string(), + })?; + + match parsed.scheme() { + "gs" => self.bucket_name = Some(host.to_string()), + scheme => { + let scheme = scheme.to_string(); + return Err(Error::UnknownUrlScheme { scheme }.into()); + } + } + Ok(()) + } + + /// Set the bucket name (required) + pub fn with_bucket_name(mut self, bucket_name: impl Into) -> Self { + self.bucket_name = Some(bucket_name.into()); + self + } + + /// Set the path to the service account file. + /// + /// This or [`GoogleCloudStorageBuilder::with_service_account_key`] must be + /// set. + /// + /// Example `"/tmp/gcs.json"`. + /// + /// Example contents of `gcs.json`: + /// + /// ```json + /// { + /// "gcs_base_url": "https://localhost:4443", + /// "disable_oauth": true, + /// "client_email": "", + /// "private_key": "" + /// } + /// ``` + pub fn with_service_account_path(mut self, service_account_path: impl Into) -> Self { + self.service_account_path = Some(service_account_path.into()); + self + } + + /// Set the service account key. The service account must be in the JSON + /// format. + /// + /// This or [`GoogleCloudStorageBuilder::with_service_account_path`] must be + /// set. + pub fn with_service_account_key(mut self, service_account: impl Into) -> Self { + self.service_account_key = Some(service_account.into()); + self + } + + /// Set the path to the application credentials file. + /// + /// + pub fn with_application_credentials( + mut self, + application_credentials_path: impl Into, + ) -> Self { + self.application_credentials_path = Some(application_credentials_path.into()); + self + } + + /// If enabled, [`GoogleCloudStorage`] will not fetch credentials and will not sign requests. + /// + /// This can be useful when interacting with public GCS buckets that deny authorized requests. + pub fn with_skip_signature(mut self, skip_signature: bool) -> Self { + self.skip_signature = skip_signature.into(); + self + } + + /// Set the credential provider overriding any other options + pub fn with_credentials(mut self, credentials: GcpCredentialProvider) -> Self { + self.credentials = Some(credentials); + self + } + + /// Set the retry configuration + pub fn with_retry(mut self, retry_config: RetryConfig) -> Self { + self.retry_config = retry_config; + self + } + + /// Set the proxy_url to be used by the underlying client + pub fn with_proxy_url(mut self, proxy_url: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_url(proxy_url); + self + } + + /// Set a trusted proxy CA certificate + pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into) -> Self { + self.client_options = self + .client_options + .with_proxy_ca_certificate(proxy_ca_certificate); + self + } + + /// Set a list of hosts to exclude from proxy connections + pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into) -> Self { + self.client_options = self.client_options.with_proxy_excludes(proxy_excludes); + self + } + + /// Sets the client options, overriding any already set + pub fn with_client_options(mut self, options: ClientOptions) -> Self { + self.client_options = options; + self + } + + /// The [`HttpConnector`] to use + /// + /// On non-WASM32 platforms uses [`reqwest`] by default, on WASM32 platforms must be provided + pub fn with_http_connector(mut self, connector: C) -> Self { + self.http_connector = Some(Arc::new(connector)); + self + } + + /// Configure a connection to Google Cloud Storage, returning a + /// new [`GoogleCloudStorage`] and consuming `self` + pub fn build(mut self) -> Result { + if let Some(url) = self.url.take() { + self.parse_url(&url)?; + } + + let bucket_name = self.bucket_name.ok_or(Error::MissingBucketName {})?; + + let http = http_connector(self.http_connector)?; + + // First try to initialize from the service account information. + let service_account_credentials = + match (self.service_account_path, self.service_account_key) { + (Some(path), None) => Some( + ServiceAccountCredentials::from_file(path) + .map_err(|source| Error::Credential { source })?, + ), + (None, Some(key)) => Some( + ServiceAccountCredentials::from_key(&key) + .map_err(|source| Error::Credential { source })?, + ), + (None, None) => None, + (Some(_), Some(_)) => return Err(Error::ServiceAccountPathAndKeyProvided.into()), + }; + + // Then try to initialize from the application credentials file, or the environment. + let application_default_credentials = + ApplicationDefaultCredentials::read(self.application_credentials_path.as_deref())?; + + let disable_oauth = service_account_credentials + .as_ref() + .map(|c| c.disable_oauth) + .unwrap_or(false); + + let gcs_base_url: String = service_account_credentials + .as_ref() + .and_then(|c| c.gcs_base_url.clone()) + .unwrap_or_else(|| DEFAULT_GCS_BASE_URL.to_string()); + + let credentials = if let Some(credentials) = self.credentials { + credentials + } else if disable_oauth { + Arc::new(StaticCredentialProvider::new(GcpCredential { + bearer: "".to_string(), + })) as _ + } else if let Some(credentials) = service_account_credentials.clone() { + Arc::new(TokenCredentialProvider::new( + credentials.token_provider()?, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } else if let Some(credentials) = application_default_credentials.clone() { + match credentials { + ApplicationDefaultCredentials::AuthorizedUser(token) => Arc::new( + TokenCredentialProvider::new( + token, + http.connect(&self.client_options)?, + self.retry_config.clone(), + ) + .with_min_ttl(TOKEN_MIN_TTL), + ) as _, + ApplicationDefaultCredentials::ServiceAccount(token) => { + Arc::new(TokenCredentialProvider::new( + token.token_provider()?, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } + } + } else { + Arc::new( + TokenCredentialProvider::new( + InstanceCredentialProvider::default(), + http.connect(&self.client_options.metadata_options())?, + self.retry_config.clone(), + ) + .with_min_ttl(TOKEN_MIN_TTL), + ) as _ + }; + + let signing_credentials = if let Some(signing_credentials) = self.signing_credentials { + signing_credentials + } else if disable_oauth { + Arc::new(StaticCredentialProvider::new(GcpSigningCredential { + email: "".to_string(), + private_key: None, + })) as _ + } else if let Some(credentials) = service_account_credentials.clone() { + credentials.signing_credentials()? + } else if let Some(credentials) = application_default_credentials.clone() { + match credentials { + ApplicationDefaultCredentials::AuthorizedUser(token) => { + Arc::new(TokenCredentialProvider::new( + AuthorizedUserSigningCredentials::from(token)?, + http.connect(&self.client_options)?, + self.retry_config.clone(), + )) as _ + } + ApplicationDefaultCredentials::ServiceAccount(token) => { + token.signing_credentials()? + } + } + } else { + Arc::new(TokenCredentialProvider::new( + InstanceSigningCredentialProvider::default(), + http.connect(&self.client_options.metadata_options())?, + self.retry_config.clone(), + )) as _ + }; + + let config = GoogleCloudStorageConfig { + base_url: gcs_base_url, + credentials, + signing_credentials, + bucket_name, + retry_config: self.retry_config, + client_options: self.client_options, + skip_signature: self.skip_signature.get()?, + }; + + let http_client = http.connect(&config.client_options)?; + Ok(GoogleCloudStorage { + client: Arc::new(GoogleCloudStorageClient::new(config, http_client)?), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::io::Write; + use tempfile::NamedTempFile; + + const FAKE_KEY: &str = r#"{"private_key": "private_key", "private_key_id": "private_key_id", "client_email":"client_email", "disable_oauth":true}"#; + + #[test] + fn gcs_test_service_account_key_and_path() { + let mut tfile = NamedTempFile::new().unwrap(); + write!(tfile, "{FAKE_KEY}").unwrap(); + let _ = GoogleCloudStorageBuilder::new() + .with_service_account_key(FAKE_KEY) + .with_service_account_path(tfile.path().to_str().unwrap()) + .with_bucket_name("foo") + .build() + .unwrap_err(); + } + + #[test] + fn gcs_test_config_from_map() { + let google_service_account = "object_store:fake_service_account".to_string(); + let google_bucket_name = "object_store:fake_bucket".to_string(); + let options = HashMap::from([ + ("google_service_account", google_service_account.clone()), + ("google_bucket_name", google_bucket_name.clone()), + ]); + + let builder = options + .iter() + .fold(GoogleCloudStorageBuilder::new(), |builder, (key, value)| { + builder.with_config(key.parse().unwrap(), value) + }); + + assert_eq!( + builder.service_account_path.unwrap(), + google_service_account.as_str() + ); + assert_eq!(builder.bucket_name.unwrap(), google_bucket_name.as_str()); + } + + #[test] + fn gcs_test_config_aliases() { + // Service account path + for alias in [ + "google_service_account", + "service_account", + "google_service_account_path", + "service_account_path", + ] { + let builder = GoogleCloudStorageBuilder::new() + .with_config(alias.parse().unwrap(), "/fake/path.json"); + assert_eq!("/fake/path.json", builder.service_account_path.unwrap()); + } + + // Service account key + for alias in ["google_service_account_key", "service_account_key"] { + let builder = + GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), FAKE_KEY); + assert_eq!(FAKE_KEY, builder.service_account_key.unwrap()); + } + + // Bucket name + for alias in [ + "google_bucket", + "google_bucket_name", + "bucket", + "bucket_name", + ] { + let builder = + GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), "fake_bucket"); + assert_eq!("fake_bucket", builder.bucket_name.unwrap()); + } + } + + #[tokio::test] + async fn gcs_test_proxy_url() { + let mut tfile = NamedTempFile::new().unwrap(); + write!(tfile, "{FAKE_KEY}").unwrap(); + let service_account_path = tfile.path(); + let gcs = GoogleCloudStorageBuilder::new() + .with_service_account_path(service_account_path.to_str().unwrap()) + .with_bucket_name("foo") + .with_proxy_url("https://example.com") + .build(); + assert!(gcs.is_ok()); + + let err = GoogleCloudStorageBuilder::new() + .with_service_account_path(service_account_path.to_str().unwrap()) + .with_bucket_name("foo") + // use invalid url + .with_proxy_url("dxx:ddd\\example.com") + .build() + .unwrap_err() + .to_string(); + + assert_eq!("Generic HTTP client error: builder error", err); + } + + #[test] + fn gcs_test_urls() { + let mut builder = GoogleCloudStorageBuilder::new(); + builder.parse_url("gs://bucket/path").unwrap(); + assert_eq!(builder.bucket_name.as_deref(), Some("bucket")); + + builder.parse_url("gs://bucket.mydomain/path").unwrap(); + assert_eq!(builder.bucket_name.as_deref(), Some("bucket.mydomain")); + + builder.parse_url("mailto://bucket/path").unwrap_err(); + } + + #[test] + fn gcs_test_service_account_key_only() { + let _ = GoogleCloudStorageBuilder::new() + .with_service_account_key(FAKE_KEY) + .with_bucket_name("foo") + .build() + .unwrap(); + } + + #[test] + fn gcs_test_config_get_value() { + let google_service_account = "object_store:fake_service_account".to_string(); + let google_bucket_name = "object_store:fake_bucket".to_string(); + let builder = GoogleCloudStorageBuilder::new() + .with_config(GoogleConfigKey::ServiceAccount, &google_service_account) + .with_config(GoogleConfigKey::Bucket, &google_bucket_name); + + assert_eq!( + builder + .get_config_value(&GoogleConfigKey::ServiceAccount) + .unwrap(), + google_service_account + ); + assert_eq!( + builder.get_config_value(&GoogleConfigKey::Bucket).unwrap(), + google_bucket_name + ); + } + + #[test] + fn gcp_test_client_opts() { + let key = "GOOGLE_PROXY_URL"; + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + assert_eq!( + GoogleConfigKey::Client(ClientConfigKey::ProxyUrl), + config_key + ); + } else { + panic!("{key} not propagated as ClientConfigKey"); + } + } +} diff --git a/rust/object_store/src/gcp/client.rs b/rust/object_store/src/gcp/client.rs new file mode 100644 index 0000000000..47af709dfd --- /dev/null +++ b/rust/object_store/src/gcp/client.rs @@ -0,0 +1,723 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::client::builder::HttpRequestBuilder; +use crate::client::get::GetClient; +use crate::client::header::{get_put_result, get_version, HeaderConfig}; +use crate::client::list::ListClient; +use crate::client::retry::{RetryContext, RetryExt}; +use crate::client::s3::{ + CompleteMultipartUpload, CompleteMultipartUploadResult, InitiateMultipartUploadResult, + ListResponse, +}; +use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse}; +use crate::gcp::credential::CredentialExt; +use crate::gcp::{GcpCredential, GcpCredentialProvider, GcpSigningCredentialProvider, STORE}; +use crate::list::{PaginatedListOptions, PaginatedListResult}; +use crate::multipart::PartId; +use crate::path::Path; +use crate::util::hex_encode; +use crate::{ + Attribute, Attributes, ClientOptions, GetOptions, MultipartId, PutMode, PutMultipartOptions, + PutOptions, PutPayload, PutResult, Result, RetryConfig, +}; +use async_trait::async_trait; +use base64::prelude::BASE64_STANDARD; +use base64::Engine; +use bytes::Buf; +use http::header::{ + CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, + CONTENT_TYPE, +}; +use http::{HeaderName, Method, StatusCode}; +use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +const VERSION_HEADER: &str = "x-goog-generation"; +const DEFAULT_CONTENT_TYPE: &str = "application/octet-stream"; +const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-goog-meta-"; +const STORAGE_CLASS: &str = "x-goog-storage-class"; + +static VERSION_MATCH: HeaderName = HeaderName::from_static("x-goog-if-generation-match"); + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Error performing list request: {}", source)] + ListRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting list response body: {}", source)] + ListResponseBody { source: HttpError }, + + #[error("Got invalid list response: {}", source)] + InvalidListResponse { source: quick_xml::de::DeError }, + + #[error("Error performing get request {}: {}", path, source)] + GetRequest { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error performing request {}: {}", path, source)] + Request { + source: crate::client::retry::RetryError, + path: String, + }, + + #[error("Error getting put response body: {}", source)] + PutResponseBody { source: HttpError }, + + #[error("Got invalid put request: {}", source)] + InvalidPutRequest { source: quick_xml::se::SeError }, + + #[error("Got invalid put response: {}", source)] + InvalidPutResponse { source: quick_xml::de::DeError }, + + #[error("Unable to extract metadata from headers: {}", source)] + Metadata { + source: crate::client::header::Error, + }, + + #[error("Version required for conditional update")] + MissingVersion, + + #[error("Error performing complete multipart request: {}", source)] + CompleteMultipartRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting complete multipart response body: {}", source)] + CompleteMultipartResponseBody { source: HttpError }, + + #[error("Got invalid multipart response: {}", source)] + InvalidMultipartResponse { source: quick_xml::de::DeError }, + + #[error("Error signing blob: {}", source)] + SignBlobRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Got invalid signing blob response: {}", source)] + InvalidSignBlobResponse { source: HttpError }, + + #[error("Got invalid signing blob signature: {}", source)] + InvalidSignBlobSignature { source: base64::DecodeError }, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + match err { + Error::GetRequest { source, path } | Error::Request { source, path } => { + source.error(STORE, path) + } + _ => Self::Generic { + store: STORE, + source: Box::new(err), + }, + } + } +} + +#[derive(Debug)] +pub(crate) struct GoogleCloudStorageConfig { + pub base_url: String, + + pub credentials: GcpCredentialProvider, + + pub signing_credentials: GcpSigningCredentialProvider, + + pub bucket_name: String, + + pub retry_config: RetryConfig, + + pub client_options: ClientOptions, + + pub skip_signature: bool, +} + +impl GoogleCloudStorageConfig { + pub(crate) fn path_url(&self, path: &Path) -> String { + format!("{}/{}/{}", self.base_url, self.bucket_name, path) + } + + pub(crate) async fn get_credential(&self) -> Result>> { + Ok(match self.skip_signature { + false => Some(self.credentials.get_credential().await?), + true => None, + }) + } +} + +/// A builder for a put request allowing customisation of the headers and query string +pub(crate) struct Request<'a> { + path: &'a Path, + config: &'a GoogleCloudStorageConfig, + payload: Option, + builder: HttpRequestBuilder, + idempotent: bool, +} + +impl Request<'_> { + fn header(self, k: &HeaderName, v: &str) -> Self { + let builder = self.builder.header(k, v); + Self { builder, ..self } + } + + fn query(self, query: &T) -> Self { + let builder = self.builder.query(query); + Self { builder, ..self } + } + + fn idempotent(mut self, idempotent: bool) -> Self { + self.idempotent = idempotent; + self + } + + fn with_attributes(self, attributes: Attributes) -> Self { + let mut builder = self.builder; + let mut has_content_type = false; + for (k, v) in &attributes { + builder = match k { + Attribute::CacheControl => builder.header(CACHE_CONTROL, v.as_ref()), + Attribute::ContentDisposition => builder.header(CONTENT_DISPOSITION, v.as_ref()), + Attribute::ContentEncoding => builder.header(CONTENT_ENCODING, v.as_ref()), + Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE, v.as_ref()), + Attribute::ContentType => { + has_content_type = true; + builder.header(CONTENT_TYPE, v.as_ref()) + } + Attribute::StorageClass => builder.header(STORAGE_CLASS, v.as_ref()), + Attribute::Metadata(k_suffix) => builder.header( + &format!("{USER_DEFINED_METADATA_HEADER_PREFIX}{k_suffix}"), + v.as_ref(), + ), + }; + } + + if !has_content_type { + let value = self.config.client_options.get_content_type(self.path); + builder = builder.header(CONTENT_TYPE, value.unwrap_or(DEFAULT_CONTENT_TYPE)) + } + Self { builder, ..self } + } + + fn with_payload(self, payload: PutPayload) -> Self { + let content_length = payload.content_length(); + Self { + builder: self.builder.header(CONTENT_LENGTH, content_length), + payload: Some(payload), + ..self + } + } + + fn with_extensions(self, extensions: ::http::Extensions) -> Self { + let builder = self.builder.extensions(extensions); + Self { builder, ..self } + } + + async fn send(self) -> Result { + let credential = self.config.credentials.get_credential().await?; + let resp = self + .builder + .bearer_auth(&credential.bearer) + .retryable(&self.config.retry_config) + .idempotent(self.idempotent) + .payload(self.payload) + .send() + .await + .map_err(|source| { + let path = self.path.as_ref().into(); + Error::Request { source, path } + })?; + Ok(resp) + } + + async fn do_put(self) -> Result { + let response = self.send().await?; + Ok(get_put_result(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?) + } +} + +/// Sign Blob Request Body +#[derive(Debug, Serialize)] +struct SignBlobBody { + /// The payload to sign + payload: String, +} + +/// Sign Blob Response +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct SignBlobResponse { + /// The signature for the payload + signed_blob: String, +} + +#[derive(Debug)] +pub(crate) struct GoogleCloudStorageClient { + config: GoogleCloudStorageConfig, + + client: HttpClient, + + bucket_name_encoded: String, + + // TODO: Hook this up in tests + max_list_results: Option, +} + +impl GoogleCloudStorageClient { + pub(crate) fn new(config: GoogleCloudStorageConfig, client: HttpClient) -> Result { + let bucket_name_encoded = + percent_encode(config.bucket_name.as_bytes(), NON_ALPHANUMERIC).to_string(); + + Ok(Self { + config, + client, + bucket_name_encoded, + max_list_results: None, + }) + } + + pub(crate) fn config(&self) -> &GoogleCloudStorageConfig { + &self.config + } + + async fn get_credential(&self) -> Result>> { + self.config.get_credential().await + } + + /// Create a signature from a string-to-sign using Google Cloud signBlob method. + /// form like: + /// ```plaintext + /// curl -X POST --data-binary @JSON_FILE_NAME \ + /// -H "Authorization: Bearer OAUTH2_TOKEN" \ + /// -H "Content-Type: application/json" \ + /// "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/SERVICE_ACCOUNT_EMAIL:signBlob" + /// ``` + /// + /// 'JSON_FILE_NAME' is a file containing the following JSON object: + /// ```plaintext + /// { + /// "payload": "REQUEST_INFORMATION" + /// } + /// ``` + pub(crate) async fn sign_blob( + &self, + string_to_sign: &str, + client_email: &str, + ) -> Result { + let credential = self.get_credential().await?; + let body = SignBlobBody { + payload: BASE64_STANDARD.encode(string_to_sign), + }; + + let url = format!( + "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/{client_email}:signBlob" + ); + + let response = self + .client + .post(&url) + .with_bearer_auth(credential.as_deref()) + .json(&body) + .retryable(&self.config.retry_config) + .idempotent(true) + .send() + .await + .map_err(|source| Error::SignBlobRequest { source })? + .into_body() + .json::() + .await + .map_err(|source| Error::InvalidSignBlobResponse { source })?; + + let signed_blob = BASE64_STANDARD + .decode(response.signed_blob) + .map_err(|source| Error::InvalidSignBlobSignature { source })?; + + Ok(hex_encode(&signed_blob)) + } + + pub(crate) fn object_url(&self, path: &Path) -> String { + let encoded = utf8_percent_encode(path.as_ref(), NON_ALPHANUMERIC); + format!( + "{}/{}/{}", + self.config.base_url, self.bucket_name_encoded, encoded + ) + } + + /// Perform a put request + /// + /// Returns the new ETag + pub(crate) fn request<'a>(&'a self, method: Method, path: &'a Path) -> Request<'a> { + let builder = self.client.request(method, self.object_url(path)); + + Request { + path, + builder, + payload: None, + config: &self.config, + idempotent: false, + } + } + + pub(crate) async fn put( + &self, + path: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let PutOptions { + mode, + // not supported by GCP + tags: _, + attributes, + extensions, + } = opts; + + let builder = self + .request(Method::PUT, path) + .with_payload(payload) + .with_attributes(attributes) + .with_extensions(extensions); + + let builder = match &mode { + PutMode::Overwrite => builder.idempotent(true), + PutMode::Create => builder.header(&VERSION_MATCH, "0"), + PutMode::Update(v) => { + let etag = v.version.as_ref().ok_or(Error::MissingVersion)?; + builder.header(&VERSION_MATCH, etag) + } + }; + + match (mode, builder.do_put().await) { + (PutMode::Create, Err(crate::Error::Precondition { path, source })) => { + Err(crate::Error::AlreadyExists { path, source }) + } + (_, r) => r, + } + } + + /// Perform a put part request + /// + /// Returns the new [`PartId`] + pub(crate) async fn put_part( + &self, + path: &Path, + upload_id: &MultipartId, + part_idx: usize, + data: PutPayload, + ) -> Result { + let query = &[ + ("partNumber", &format!("{}", part_idx + 1)), + ("uploadId", upload_id), + ]; + let result = self + .request(Method::PUT, path) + .with_payload(data) + .query(query) + .idempotent(true) + .do_put() + .await?; + + Ok(PartId { + content_id: result.e_tag.unwrap(), + }) + } + + /// Initiate a multipart upload + pub(crate) async fn multipart_initiate( + &self, + path: &Path, + opts: PutMultipartOptions, + ) -> Result { + let PutMultipartOptions { + // not supported by GCP + tags: _, + attributes, + extensions, + } = opts; + + let response = self + .request(Method::POST, path) + .with_attributes(attributes) + .with_extensions(extensions) + .header(&CONTENT_LENGTH, "0") + .query(&[("uploads", "")]) + .send() + .await?; + + let data = response + .into_body() + .bytes() + .await + .map_err(|source| Error::PutResponseBody { source })?; + + let result: InitiateMultipartUploadResult = + quick_xml::de::from_reader(data.as_ref().reader()) + .map_err(|source| Error::InvalidPutResponse { source })?; + + Ok(result.upload_id) + } + + /// Cleanup unused parts + pub(crate) async fn multipart_cleanup( + &self, + path: &Path, + multipart_id: &MultipartId, + ) -> Result<()> { + let credential = self.get_credential().await?; + let url = self.object_url(path); + + self.client + .request(Method::DELETE, &url) + .with_bearer_auth(credential.as_deref()) + .header(CONTENT_TYPE, "application/octet-stream") + .header(CONTENT_LENGTH, "0") + .query(&[("uploadId", multipart_id)]) + .send_retry(&self.config.retry_config) + .await + .map_err(|source| { + let path = path.as_ref().into(); + Error::Request { source, path } + })?; + + Ok(()) + } + + pub(crate) async fn multipart_complete( + &self, + path: &Path, + multipart_id: &MultipartId, + completed_parts: Vec, + ) -> Result { + if completed_parts.is_empty() { + // GCS doesn't allow empty multipart uploads, so fallback to regular upload. + self.multipart_cleanup(path, multipart_id).await?; + let result = self + .put(path, PutPayload::new(), Default::default()) + .await?; + return Ok(result); + } + + let upload_id = multipart_id.clone(); + let url = self.object_url(path); + + let upload_info = CompleteMultipartUpload::from(completed_parts); + let credential = self.get_credential().await?; + + let data = quick_xml::se::to_string(&upload_info) + .map_err(|source| Error::InvalidPutRequest { source })? + // We cannot disable the escaping that transforms "/" to ""e;" :( + // https://github.com/tafia/quick-xml/issues/362 + // https://github.com/tafia/quick-xml/issues/350 + .replace(""", "\""); + + let response = self + .client + .request(Method::POST, &url) + .with_bearer_auth(credential.as_deref()) + .query(&[("uploadId", upload_id)]) + .body(data) + .retryable(&self.config.retry_config) + .idempotent(true) + .send() + .await + .map_err(|source| Error::CompleteMultipartRequest { source })?; + + let version = get_version(response.headers(), VERSION_HEADER) + .map_err(|source| Error::Metadata { source })?; + + let data = response + .into_body() + .bytes() + .await + .map_err(|source| Error::CompleteMultipartResponseBody { source })?; + + let response: CompleteMultipartUploadResult = quick_xml::de::from_reader(data.reader()) + .map_err(|source| Error::InvalidMultipartResponse { source })?; + + Ok(PutResult { + e_tag: Some(response.e_tag), + version, + }) + } + + /// Perform a delete request + pub(crate) async fn delete_request(&self, path: &Path) -> Result<()> { + self.request(Method::DELETE, path).send().await?; + Ok(()) + } + + /// Perform a copy request + pub(crate) async fn copy_request( + &self, + from: &Path, + to: &Path, + if_not_exists: bool, + ) -> Result<()> { + let credential = self.get_credential().await?; + let url = self.object_url(to); + + let from = utf8_percent_encode(from.as_ref(), NON_ALPHANUMERIC); + let source = format!("{}/{}", self.bucket_name_encoded, from); + + let mut builder = self + .client + .request(Method::PUT, url) + .header("x-goog-copy-source", source); + + if if_not_exists { + builder = builder.header(&VERSION_MATCH, 0); + } + + builder + .with_bearer_auth(credential.as_deref()) + // Needed if reqwest is compiled with native-tls instead of rustls-tls + // See https://github.com/apache/arrow-rs/pull/3921 + .header(CONTENT_LENGTH, 0) + .retryable(&self.config.retry_config) + .idempotent(!if_not_exists) + .send() + .await + .map_err(|err| match err.status() { + Some(StatusCode::PRECONDITION_FAILED) => crate::Error::AlreadyExists { + source: Box::new(err), + path: to.to_string(), + }, + _ => err.error(STORE, from.to_string()), + })?; + + Ok(()) + } +} + +#[async_trait] +impl GetClient for GoogleCloudStorageClient { + const STORE: &'static str = STORE; + const HEADER_CONFIG: HeaderConfig = HeaderConfig { + etag_required: true, + last_modified_required: true, + version_header: Some(VERSION_HEADER), + user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), + }; + + fn retry_config(&self) -> &RetryConfig { + &self.config.retry_config + } + + /// Perform a get request + async fn get_request( + &self, + ctx: &mut RetryContext, + path: &Path, + options: GetOptions, + ) -> Result { + let credential = self.get_credential().await?; + let url = self.object_url(path); + + let method = match options.head { + true => Method::HEAD, + false => Method::GET, + }; + + let mut request = self.client.request(method, url); + + if let Some(version) = &options.version { + request = request.query(&[("generation", version)]); + } + + let response = request + .with_bearer_auth(credential.as_deref()) + .with_get_options(options) + .retryable_request() + .send(ctx) + .await + .map_err(|source| { + let path = path.as_ref().into(); + Error::GetRequest { source, path } + })?; + + Ok(response) + } +} + +#[async_trait] +impl ListClient for Arc { + /// Perform a list request + async fn list_request( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + let credential = self.get_credential().await?; + let url = format!("{}/{}", self.config.base_url, self.bucket_name_encoded); + + let mut query = Vec::with_capacity(5); + query.push(("list-type", "2")); + if let Some(delimiter) = &opts.delimiter { + query.push(("delimiter", delimiter.as_ref())) + } + + if let Some(prefix) = prefix { + query.push(("prefix", prefix)) + } + + if let Some(page_token) = &opts.page_token { + query.push(("continuation-token", page_token)) + } + + if let Some(max_results) = &self.max_list_results { + query.push(("max-keys", max_results)) + } + + if let Some(offset) = &opts.offset { + query.push(("start-after", offset.as_ref())) + } + + let max_keys_str; + if let Some(max_keys) = &opts.max_keys { + max_keys_str = max_keys.to_string(); + query.push(("max-keys", max_keys_str.as_ref())) + } + + let response = self + .client + .request(Method::GET, url) + .extensions(opts.extensions) + .query(&query) + .with_bearer_auth(credential.as_deref()) + .send_retry(&self.config.retry_config) + .await + .map_err(|source| Error::ListRequest { source })? + .into_body() + .bytes() + .await + .map_err(|source| Error::ListResponseBody { source })?; + + let mut response: ListResponse = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidListResponse { source })?; + + let token = response.next_continuation_token.take(); + Ok(PaginatedListResult { + result: response.try_into()?, + page_token: token, + }) + } +} diff --git a/rust/object_store/src/gcp/credential.rs b/rust/object_store/src/gcp/credential.rs new file mode 100644 index 0000000000..1e067f5329 --- /dev/null +++ b/rust/object_store/src/gcp/credential.rs @@ -0,0 +1,964 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::client::GoogleCloudStorageClient; +use crate::client::builder::HttpRequestBuilder; +use crate::client::retry::RetryExt; +use crate::client::token::TemporaryToken; +use crate::client::{HttpClient, HttpError, TokenProvider}; +use crate::gcp::{GcpSigningCredentialProvider, STORE}; +use crate::util::{hex_digest, hex_encode, STRICT_ENCODE_SET}; +use crate::{RetryConfig, StaticCredentialProvider}; +use async_trait::async_trait; +use base64::prelude::BASE64_URL_SAFE_NO_PAD; +use base64::Engine; +use chrono::{DateTime, Utc}; +use futures::TryFutureExt; +use http::{HeaderMap, Method}; +use itertools::Itertools; +use percent_encoding::utf8_percent_encode; +use ring::signature::RsaKeyPair; +use serde::Deserialize; +use std::collections::BTreeMap; +use std::env; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tracing::info; +use url::Url; + +pub(crate) const DEFAULT_SCOPE: &str = "https://www.googleapis.com/auth/cloud-platform"; + +pub(crate) const DEFAULT_GCS_BASE_URL: &str = "https://storage.googleapis.com"; + +const DEFAULT_GCS_PLAYLOAD_STRING: &str = "UNSIGNED-PAYLOAD"; +const DEFAULT_GCS_SIGN_BLOB_HOST: &str = "storage.googleapis.com"; + +const DEFAULT_METADATA_HOST: &str = "metadata.google.internal"; +const DEFAULT_METADATA_IP: &str = "169.254.169.254"; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Unable to open service account file from {}: {}", path.display(), source)] + OpenCredentials { + source: std::io::Error, + path: PathBuf, + }, + + #[error("Unable to decode service account file: {}", source)] + DecodeCredentials { source: serde_json::Error }, + + #[error("No RSA key found in pem file")] + MissingKey, + + #[error("Invalid RSA key: {}", source)] + InvalidKey { + #[from] + source: ring::error::KeyRejected, + }, + + #[error("Error signing: {}", source)] + Sign { source: ring::error::Unspecified }, + + #[error("Error encoding jwt payload: {}", source)] + Encode { source: serde_json::Error }, + + #[error("Unsupported key encoding: {}", encoding)] + UnsupportedKey { encoding: String }, + + #[error("Error performing token request: {}", source)] + TokenRequest { + source: crate::client::retry::RetryError, + }, + + #[error("Error getting token response body: {}", source)] + TokenResponseBody { source: HttpError }, + + #[error("Error reading pem file: {}", source)] + ReadPem { source: std::io::Error }, +} + +impl From for crate::Error { + fn from(value: Error) -> Self { + Self::Generic { + store: STORE, + source: Box::new(value), + } + } +} + +/// A Google Cloud Storage Credential for signing +#[derive(Debug)] +pub struct GcpSigningCredential { + /// The email of the service account + pub email: String, + + /// An optional RSA private key + /// + /// If provided this will be used to sign the URL, otherwise a call will be made to + /// [`iam.serviceAccounts.signBlob`]. This allows supporting credential sources + /// that don't expose the service account private key, e.g. [IMDS]. + /// + /// [IMDS]: https://cloud.google.com/docs/authentication/get-id-token#metadata-server + /// [`iam.serviceAccounts.signBlob`]: https://cloud.google.com/storage/docs/authentication/creating-signatures + pub private_key: Option, +} + +/// A private RSA key for a service account +#[derive(Debug)] +pub struct ServiceAccountKey(RsaKeyPair); + +impl ServiceAccountKey { + /// Parses a pem-encoded RSA key + pub fn from_pem(encoded: &[u8]) -> Result { + use rustls_pemfile::Item; + use std::io::Cursor; + + let mut cursor = Cursor::new(encoded); + let mut reader = BufReader::new(&mut cursor); + + match rustls_pemfile::read_one(&mut reader) { + Ok(item) => match item { + Some(Item::Pkcs8Key(key)) => Self::from_pkcs8(key.secret_pkcs8_der()), + Some(Item::Pkcs1Key(key)) => Self::from_der(key.secret_pkcs1_der()), + _ => Err(Error::MissingKey), + }, + Err(e) => Err(Error::ReadPem { source: e }), + } + } + + /// Parses an unencrypted PKCS#8-encoded RSA private key. + pub fn from_pkcs8(key: &[u8]) -> Result { + Ok(Self(RsaKeyPair::from_pkcs8(key)?)) + } + + /// Parses an unencrypted PKCS#8-encoded RSA private key. + pub fn from_der(key: &[u8]) -> Result { + Ok(Self(RsaKeyPair::from_der(key)?)) + } + + fn sign(&self, string_to_sign: &str) -> Result { + let mut signature = vec![0; self.0.public().modulus_len()]; + self.0 + .sign( + &ring::signature::RSA_PKCS1_SHA256, + &ring::rand::SystemRandom::new(), + string_to_sign.as_bytes(), + &mut signature, + ) + .map_err(|source| Error::Sign { source })?; + + Ok(hex_encode(&signature)) + } +} + +/// A Google Cloud Storage Credential +#[derive(Debug, Eq, PartialEq)] +pub struct GcpCredential { + /// An HTTP bearer token + pub bearer: String, +} + +pub(crate) type Result = std::result::Result; + +#[derive(Debug, Default, serde::Serialize)] +pub(crate) struct JwtHeader<'a> { + /// The type of JWS: it can only be "JWT" here + /// + /// Defined in [RFC7515#4.1.9](https://tools.ietf.org/html/rfc7515#section-4.1.9). + #[serde(skip_serializing_if = "Option::is_none")] + pub typ: Option<&'a str>, + /// The algorithm used + /// + /// Defined in [RFC7515#4.1.1](https://tools.ietf.org/html/rfc7515#section-4.1.1). + pub alg: &'a str, + /// Content type + /// + /// Defined in [RFC7519#5.2](https://tools.ietf.org/html/rfc7519#section-5.2). + #[serde(skip_serializing_if = "Option::is_none")] + pub cty: Option<&'a str>, + /// JSON Key URL + /// + /// Defined in [RFC7515#4.1.2](https://tools.ietf.org/html/rfc7515#section-4.1.2). + #[serde(skip_serializing_if = "Option::is_none")] + pub jku: Option<&'a str>, + /// Key ID + /// + /// Defined in [RFC7515#4.1.4](https://tools.ietf.org/html/rfc7515#section-4.1.4). + #[serde(skip_serializing_if = "Option::is_none")] + pub kid: Option<&'a str>, + /// X.509 URL + /// + /// Defined in [RFC7515#4.1.5](https://tools.ietf.org/html/rfc7515#section-4.1.5). + #[serde(skip_serializing_if = "Option::is_none")] + pub x5u: Option<&'a str>, + /// X.509 certificate thumbprint + /// + /// Defined in [RFC7515#4.1.7](https://tools.ietf.org/html/rfc7515#section-4.1.7). + #[serde(skip_serializing_if = "Option::is_none")] + pub x5t: Option<&'a str>, +} + +#[derive(serde::Serialize)] +struct TokenClaims<'a> { + iss: &'a str, + sub: &'a str, + scope: &'a str, + exp: u64, + iat: u64, +} + +#[derive(serde::Deserialize, Debug)] +struct TokenResponse { + access_token: String, + expires_in: u64, + id_token: Option, +} + +/// Self-signed JWT (JSON Web Token). +/// +/// # References +/// - +#[derive(Debug)] +pub(crate) struct SelfSignedJwt { + issuer: String, + scope: String, + private_key: ServiceAccountKey, + key_id: String, +} + +impl SelfSignedJwt { + /// Create a new [`SelfSignedJwt`] + pub(crate) fn new( + key_id: String, + issuer: String, + private_key: ServiceAccountKey, + scope: String, + ) -> Result { + Ok(Self { + issuer, + scope, + private_key, + key_id, + }) + } +} + +#[async_trait] +impl TokenProvider for SelfSignedJwt { + type Credential = GcpCredential; + + /// Fetch a fresh token + async fn fetch_token( + &self, + _client: &HttpClient, + _retry: &RetryConfig, + ) -> crate::Result>> { + let now = seconds_since_epoch(); + let exp = now + 3600; + + let claims = TokenClaims { + iss: &self.issuer, + sub: &self.issuer, + scope: &self.scope, + iat: now, + exp, + }; + + let jwt_header = b64_encode_obj(&JwtHeader { + alg: "RS256", + typ: Some("JWT"), + kid: Some(&self.key_id), + ..Default::default() + })?; + + let claim_str = b64_encode_obj(&claims)?; + let message = [jwt_header.as_ref(), claim_str.as_ref()].join("."); + let mut sig_bytes = vec![0; self.private_key.0.public().modulus_len()]; + self.private_key + .0 + .sign( + &ring::signature::RSA_PKCS1_SHA256, + &ring::rand::SystemRandom::new(), + message.as_bytes(), + &mut sig_bytes, + ) + .map_err(|source| Error::Sign { source })?; + + let signature = BASE64_URL_SAFE_NO_PAD.encode(sig_bytes); + let bearer = [message, signature].join("."); + + Ok(TemporaryToken { + token: Arc::new(GcpCredential { bearer }), + expiry: Some(Instant::now() + Duration::from_secs(3600)), + }) + } +} + +fn read_credentials_file(service_account_path: impl AsRef) -> Result +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(&service_account_path).map_err(|source| { + let path = service_account_path.as_ref().to_owned(); + Error::OpenCredentials { source, path } + })?; + let reader = BufReader::new(file); + serde_json::from_reader(reader).map_err(|source| Error::DecodeCredentials { source }) +} + +/// A deserialized `service-account-********.json`-file. +#[derive(serde::Deserialize, Debug, Clone)] +pub(crate) struct ServiceAccountCredentials { + /// The private key in RSA format. + pub private_key: String, + + /// The private key ID + pub private_key_id: String, + + /// The email address associated with the service account. + pub client_email: String, + + /// Base URL for GCS + #[serde(default)] + pub gcs_base_url: Option, + + /// Disable oauth and use empty tokens. + #[serde(default)] + pub disable_oauth: bool, +} + +impl ServiceAccountCredentials { + /// Create a new [`ServiceAccountCredentials`] from a file. + pub(crate) fn from_file>(path: P) -> Result { + read_credentials_file(path) + } + + /// Create a new [`ServiceAccountCredentials`] from a string. + pub(crate) fn from_key(key: &str) -> Result { + serde_json::from_str(key).map_err(|source| Error::DecodeCredentials { source }) + } + + /// Create a [`SelfSignedJwt`] from this credentials struct. + /// + /// We use a scope of [`DEFAULT_SCOPE`] as opposed to an audience + /// as GCS appears to not support audience + /// + /// # References + /// - + /// - + pub(crate) fn token_provider(self) -> crate::Result { + Ok(SelfSignedJwt::new( + self.private_key_id, + self.client_email, + ServiceAccountKey::from_pem(self.private_key.as_bytes())?, + DEFAULT_SCOPE.to_string(), + )?) + } + + pub(crate) fn signing_credentials(self) -> crate::Result { + Ok(Arc::new(StaticCredentialProvider::new( + GcpSigningCredential { + email: self.client_email, + private_key: Some(ServiceAccountKey::from_pem(self.private_key.as_bytes())?), + }, + ))) + } +} + +/// Returns the number of seconds since unix epoch +fn seconds_since_epoch() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs() +} + +fn b64_encode_obj(obj: &T) -> Result { + let string = serde_json::to_string(obj).map_err(|source| Error::Encode { source })?; + Ok(BASE64_URL_SAFE_NO_PAD.encode(string)) +} + +/// A provider that uses the Google Cloud Platform metadata server to fetch a token. +/// +/// +#[derive(Debug, Default)] +pub(crate) struct InstanceCredentialProvider {} + +/// Make a request to the metadata server to fetch a token, using a a given hostname. +async fn make_metadata_request( + client: &HttpClient, + hostname: &str, + retry: &RetryConfig, +) -> crate::Result { + let url = + format!("http://{hostname}/computeMetadata/v1/instance/service-accounts/default/token"); + let response: TokenResponse = client + .get(url) + .header("Metadata-Flavor", "Google") + .query(&[("audience", "https://www.googleapis.com/oauth2/v4/token")]) + .send_retry(retry) + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + Ok(response) +} + +#[async_trait] +impl TokenProvider for InstanceCredentialProvider { + type Credential = GcpCredential; + + /// Fetch a token from the metadata server. + /// Since the connection is local we need to enable http access and don't actually use the client object passed in. + /// Respects the `GCE_METADATA_HOST`, `GCE_METADATA_ROOT`, and `GCE_METADATA_IP` + /// environment variables. + /// + /// References: + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let metadata_host = if let Ok(host) = env::var("GCE_METADATA_HOST") { + host + } else if let Ok(host) = env::var("GCE_METADATA_ROOT") { + host + } else { + DEFAULT_METADATA_HOST.to_string() + }; + let metadata_ip = if let Ok(ip) = env::var("GCE_METADATA_IP") { + ip + } else { + DEFAULT_METADATA_IP.to_string() + }; + + info!("fetching token from metadata server"); + let response = make_metadata_request(client, &metadata_host, retry) + .or_else(|_| make_metadata_request(client, &metadata_ip, retry)) + .await?; + + let token = TemporaryToken { + token: Arc::new(GcpCredential { + bearer: response.access_token, + }), + expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)), + }; + Ok(token) + } +} + +/// Make a request to the metadata server to fetch the client email, using a given hostname. +async fn make_metadata_request_for_email( + client: &HttpClient, + hostname: &str, + retry: &RetryConfig, +) -> crate::Result { + let url = + format!("http://{hostname}/computeMetadata/v1/instance/service-accounts/default/email",); + let response = client + .get(url) + .header("Metadata-Flavor", "Google") + .send_retry(retry) + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .text() + .await + .map_err(|source| Error::TokenResponseBody { source })?; + Ok(response) +} + +/// A provider that uses the Google Cloud Platform metadata server to fetch a email for signing. +/// +/// +#[derive(Debug, Default)] +pub(crate) struct InstanceSigningCredentialProvider {} + +#[async_trait] +impl TokenProvider for InstanceSigningCredentialProvider { + type Credential = GcpSigningCredential; + + /// Fetch a token from the metadata server. + /// Since the connection is local we need to enable http access and don't actually use the client object passed in. + /// Respects the `GCE_METADATA_HOST`, `GCE_METADATA_ROOT`, and `GCE_METADATA_IP` + /// environment variables. + /// + /// References: + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let metadata_host = if let Ok(host) = env::var("GCE_METADATA_HOST") { + host + } else if let Ok(host) = env::var("GCE_METADATA_ROOT") { + host + } else { + DEFAULT_METADATA_HOST.to_string() + }; + + let metadata_ip = if let Ok(ip) = env::var("GCE_METADATA_IP") { + ip + } else { + DEFAULT_METADATA_IP.to_string() + }; + + info!("fetching token from metadata server"); + + let email = make_metadata_request_for_email(client, &metadata_host, retry) + .or_else(|_| make_metadata_request_for_email(client, &metadata_ip, retry)) + .await?; + + let token = TemporaryToken { + token: Arc::new(GcpSigningCredential { + email, + private_key: None, + }), + expiry: None, + }; + Ok(token) + } +} + +/// A deserialized `application_default_credentials.json`-file. +/// +/// # References +/// - +/// - +#[derive(serde::Deserialize, Clone)] +#[serde(tag = "type")] +pub(crate) enum ApplicationDefaultCredentials { + /// Service Account. + /// + /// # References + /// - + #[serde(rename = "service_account")] + ServiceAccount(ServiceAccountCredentials), + /// Authorized user via "gcloud CLI Integration". + /// + /// # References + /// - + #[serde(rename = "authorized_user")] + AuthorizedUser(AuthorizedUserCredentials), +} + +impl ApplicationDefaultCredentials { + const CREDENTIALS_PATH: &'static str = if cfg!(windows) { + "gcloud/application_default_credentials.json" + } else { + ".config/gcloud/application_default_credentials.json" + }; + + // Create a new application default credential in the following situations: + // 1. a file is passed in and the type matches. + // 2. without argument if the well-known configuration file is present. + pub(crate) fn read(path: Option<&str>) -> Result, Error> { + if let Some(path) = path { + return read_credentials_file::(path).map(Some); + } + + let home_var = if cfg!(windows) { "APPDATA" } else { "HOME" }; + if let Some(home) = env::var_os(home_var) { + let path = Path::new(&home).join(Self::CREDENTIALS_PATH); + + // It's expected for this file to not exist unless it has been explicitly configured by the user. + if path.exists() { + return read_credentials_file::(path).map(Some); + } + } + Ok(None) + } +} + +const DEFAULT_TOKEN_GCP_URI: &str = "https://accounts.google.com/o/oauth2/token"; + +/// +#[derive(Debug, Deserialize, Clone)] +pub(crate) struct AuthorizedUserCredentials { + client_id: String, + client_secret: String, + refresh_token: String, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct AuthorizedUserSigningCredentials { + credential: AuthorizedUserCredentials, +} + +/// +#[derive(Debug, Deserialize)] +struct EmailResponse { + email: String, +} + +#[derive(Debug, Deserialize)] +struct IdTokenClaims { + email: String, +} + +async fn get_token_response( + client_id: &str, + client_secret: &str, + refresh_token: &str, + client: &HttpClient, + retry: &RetryConfig, +) -> Result { + client + .post(DEFAULT_TOKEN_GCP_URI) + .form([ + ("grant_type", "refresh_token"), + ("client_id", client_id), + ("client_secret", client_secret), + ("refresh_token", refresh_token), + ]) + .retryable(retry) + .idempotent(true) + .send() + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json::() + .await + .map_err(|source| Error::TokenResponseBody { source }) +} + +impl AuthorizedUserSigningCredentials { + pub(crate) fn from(credential: AuthorizedUserCredentials) -> crate::Result { + Ok(Self { credential }) + } + + async fn client_email( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result { + let response = get_token_response( + &self.credential.client_id, + &self.credential.client_secret, + &self.credential.refresh_token, + client, + retry, + ) + .await?; + + // Extract email from id_token if available + if let Some(id_token) = response.id_token { + // Split the JWT string by dots to get the payload section + let parts: Vec<&str> = id_token.split('.').collect(); + if parts.len() == 3 { + // Decode the base64-encoded payload (middle part) + if let Ok(payload) = BASE64_URL_SAFE_NO_PAD.decode(parts[1]) { + // Parse the payload as JSON and extract the email + if let Ok(claims) = serde_json::from_slice::(&payload) { + return Ok(claims.email); + } + } + // If any of the parsing steps fail, fallback to other method + } + } + + // Fallback to the original method if id_token is not available or invalid + let response = client + .get("https://oauth2.googleapis.com/tokeninfo") + .query(&[("access_token", response.access_token)]) + .send_retry(retry) + .await + .map_err(|source| Error::TokenRequest { source })? + .into_body() + .json::() + .await + .map_err(|source: HttpError| Error::TokenResponseBody { source })?; + + Ok(response.email) + } +} + +#[async_trait] +impl TokenProvider for AuthorizedUserSigningCredentials { + type Credential = GcpSigningCredential; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let email = self.client_email(client, retry).await?; + Ok(TemporaryToken { + token: Arc::new(GcpSigningCredential { + email, + private_key: None, + }), + expiry: None, + }) + } +} + +#[async_trait] +impl TokenProvider for AuthorizedUserCredentials { + type Credential = GcpCredential; + + async fn fetch_token( + &self, + client: &HttpClient, + retry: &RetryConfig, + ) -> crate::Result>> { + let response = get_token_response( + &self.client_id, + &self.client_secret, + &self.refresh_token, + client, + retry, + ) + .await?; + + Ok(TemporaryToken { + token: Arc::new(GcpCredential { + bearer: response.access_token, + }), + expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)), + }) + } +} + +/// Trim whitespace from header values +fn trim_header_value(value: &str) -> String { + let mut ret = value.to_string(); + ret.retain(|c| !c.is_whitespace()); + ret +} + +/// A Google Cloud Storage Authorizer for generating signed URL using [Google SigV4] +/// +/// [Google SigV4]: https://cloud.google.com/storage/docs/access-control/signed-urls +#[derive(Debug)] +pub(crate) struct GCSAuthorizer { + date: Option>, + credential: Arc, +} + +impl GCSAuthorizer { + /// Create a new [`GCSAuthorizer`] + pub(crate) fn new(credential: Arc) -> Self { + Self { + date: None, + credential, + } + } + + pub(crate) async fn sign( + &self, + method: Method, + url: &mut Url, + expires_in: Duration, + client: &GoogleCloudStorageClient, + ) -> crate::Result<()> { + let email = &self.credential.email; + let date = self.date.unwrap_or_else(Utc::now); + let scope = self.scope(date); + let credential_with_scope = format!("{email}/{scope}"); + + let mut headers = HeaderMap::new(); + headers.insert("host", DEFAULT_GCS_SIGN_BLOB_HOST.parse().unwrap()); + + let (_, signed_headers) = Self::canonicalize_headers(&headers); + + url.query_pairs_mut() + .append_pair("X-Goog-Algorithm", "GOOG4-RSA-SHA256") + .append_pair("X-Goog-Credential", &credential_with_scope) + .append_pair("X-Goog-Date", &date.format("%Y%m%dT%H%M%SZ").to_string()) + .append_pair("X-Goog-Expires", &expires_in.as_secs().to_string()) + .append_pair("X-Goog-SignedHeaders", &signed_headers); + + let string_to_sign = self.string_to_sign(date, &method, url, &headers); + let signature = match &self.credential.private_key { + Some(key) => key.sign(&string_to_sign)?, + None => client.sign_blob(&string_to_sign, email).await?, + }; + + url.query_pairs_mut() + .append_pair("X-Goog-Signature", &signature); + Ok(()) + } + + /// Get scope for the request + /// + /// + fn scope(&self, date: DateTime) -> String { + format!("{}/auto/storage/goog4_request", date.format("%Y%m%d"),) + } + + /// Canonicalizes query parameters into the GCP canonical form + /// form like: + ///```plaintext + ///HTTP_VERB + ///PATH_TO_RESOURCE + ///CANONICAL_QUERY_STRING + ///CANONICAL_HEADERS + /// + ///SIGNED_HEADERS + ///PAYLOAD + ///``` + /// + /// + fn canonicalize_request(url: &Url, method: &Method, headers: &HeaderMap) -> String { + let verb = method.as_str(); + let path = url.path(); + let query = Self::canonicalize_query(url); + let (canonical_headers, signed_headers) = Self::canonicalize_headers(headers); + + format!( + "{verb}\n{path}\n{query}\n{canonical_headers}\n\n{signed_headers}\n{DEFAULT_GCS_PLAYLOAD_STRING}" + ) + } + + /// Canonicalizes query parameters into the GCP canonical form + /// form like `max-keys=2&prefix=object` + /// + /// + fn canonicalize_query(url: &Url) -> String { + url.query_pairs() + .sorted_unstable_by(|a, b| a.0.cmp(&b.0)) + .map(|(k, v)| { + format!( + "{}={}", + utf8_percent_encode(k.as_ref(), &STRICT_ENCODE_SET), + utf8_percent_encode(v.as_ref(), &STRICT_ENCODE_SET) + ) + }) + .join("&") + } + + /// Canonicalizes header into the GCP canonical form + /// + /// + fn canonicalize_headers(header_map: &HeaderMap) -> (String, String) { + //FIXME add error handling for invalid header values + let mut headers = BTreeMap::>::new(); + for (k, v) in header_map { + headers + .entry(k.as_str().to_lowercase()) + .or_default() + .push(std::str::from_utf8(v.as_bytes()).unwrap()); + } + + let canonicalize_headers = headers + .iter() + .map(|(k, v)| { + format!( + "{}:{}", + k.trim(), + v.iter().map(|v| trim_header_value(v)).join(",") + ) + }) + .join("\n"); + + let signed_headers = headers.keys().join(";"); + + (canonicalize_headers, signed_headers) + } + + ///construct the string to sign + ///form like: + ///```plaintext + ///SIGNING_ALGORITHM + ///ACTIVE_DATETIME + ///CREDENTIAL_SCOPE + ///HASHED_CANONICAL_REQUEST + ///``` + ///`ACTIVE_DATETIME` format:`YYYYMMDD'T'HHMMSS'Z'` + /// + pub(crate) fn string_to_sign( + &self, + date: DateTime, + request_method: &Method, + url: &Url, + headers: &HeaderMap, + ) -> String { + let canonical_request = Self::canonicalize_request(url, request_method, headers); + let hashed_canonical_req = hex_digest(canonical_request.as_bytes()); + let scope = self.scope(date); + + format!( + "{}\n{}\n{}\n{}", + "GOOG4-RSA-SHA256", + date.format("%Y%m%dT%H%M%SZ"), + scope, + hashed_canonical_req + ) + } +} + +pub(crate) trait CredentialExt { + /// Apply bearer authentication to the request if the credential is not None + fn with_bearer_auth(self, credential: Option<&GcpCredential>) -> Self; +} + +impl CredentialExt for HttpRequestBuilder { + fn with_bearer_auth(self, credential: Option<&GcpCredential>) -> Self { + match credential { + Some(credential) => { + if credential.bearer.is_empty() { + self + } else { + self.bearer_auth(&credential.bearer) + } + } + None => self, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_canonicalize_headers() { + let mut input_header = HeaderMap::new(); + input_header.insert("content-type", "text/plain".parse().unwrap()); + input_header.insert("host", "storage.googleapis.com".parse().unwrap()); + input_header.insert("x-goog-meta-reviewer", "jane".parse().unwrap()); + input_header.append("x-goog-meta-reviewer", "john".parse().unwrap()); + assert_eq!( + GCSAuthorizer::canonicalize_headers(&input_header), + ( + "content-type:text/plain +host:storage.googleapis.com +x-goog-meta-reviewer:jane,john" + .into(), + "content-type;host;x-goog-meta-reviewer".to_string() + ) + ); + } + + #[test] + fn test_canonicalize_query() { + let mut url = Url::parse("https://storage.googleapis.com/bucket/object").unwrap(); + url.query_pairs_mut() + .append_pair("max-keys", "2") + .append_pair("prefix", "object"); + assert_eq!( + GCSAuthorizer::canonicalize_query(&url), + "max-keys=2&prefix=object".to_string() + ); + } +} diff --git a/rust/object_store/src/gcp/mod.rs b/rust/object_store/src/gcp/mod.rs new file mode 100644 index 0000000000..442b24fece --- /dev/null +++ b/rust/object_store/src/gcp/mod.rs @@ -0,0 +1,435 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store implementation for Google Cloud Storage +//! +//! ## Multipart uploads +//! +//! [Multipart uploads](https://cloud.google.com/storage/docs/multipart-uploads) +//! can be initiated with the [ObjectStore::put_multipart] method. If neither +//! [`MultipartUpload::complete`] nor [`MultipartUpload::abort`] is invoked, you may +//! have parts uploaded to GCS but not used, that you will be charged for. It is recommended +//! you configure a [lifecycle rule] to abort incomplete multipart uploads after a certain +//! period of time to avoid being charged for storing partial uploads. +//! +//! ## Using HTTP/2 +//! +//! Google Cloud Storage supports both HTTP/2 and HTTP/1. HTTP/1 is used by default +//! because it allows much higher throughput in our benchmarks (see +//! [#5194](https://github.com/apache/arrow-rs/issues/5194)). HTTP/2 can be +//! enabled by setting [crate::ClientConfigKey::Http1Only] to false. +//! +//! [lifecycle rule]: https://cloud.google.com/storage/docs/lifecycle#abort-mpu +use std::sync::Arc; +use std::time::Duration; + +use crate::client::CredentialProvider; +use crate::gcp::credential::GCSAuthorizer; +use crate::signer::Signer; +use crate::{ + multipart::PartId, path::Path, GetOptions, GetResult, ListResult, MultipartId, MultipartUpload, + ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + UploadPart, +}; +use async_trait::async_trait; +use client::GoogleCloudStorageClient; +use futures::stream::BoxStream; +use http::Method; +use url::Url; + +use crate::client::get::GetClientExt; +use crate::client::list::{ListClient, ListClientExt}; +use crate::client::parts::Parts; +use crate::list::{PaginatedListOptions, PaginatedListResult, PaginatedListStore}; +use crate::multipart::MultipartStore; +pub use builder::{GoogleCloudStorageBuilder, GoogleConfigKey}; +pub use credential::{GcpCredential, GcpSigningCredential, ServiceAccountKey}; + +mod builder; +mod client; +mod credential; + +const STORE: &str = "GCS"; + +/// [`CredentialProvider`] for [`GoogleCloudStorage`] +pub type GcpCredentialProvider = Arc>; + +/// [`GcpSigningCredential`] for [`GoogleCloudStorage`] +pub type GcpSigningCredentialProvider = + Arc>; + +/// Interface for [Google Cloud Storage](https://cloud.google.com/storage/). +#[derive(Debug, Clone)] +pub struct GoogleCloudStorage { + client: Arc, +} + +impl std::fmt::Display for GoogleCloudStorage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "GoogleCloudStorage({})", + self.client.config().bucket_name + ) + } +} + +impl GoogleCloudStorage { + /// Returns the [`GcpCredentialProvider`] used by [`GoogleCloudStorage`] + pub fn credentials(&self) -> &GcpCredentialProvider { + &self.client.config().credentials + } + + /// Returns the [`GcpSigningCredentialProvider`] used by [`GoogleCloudStorage`] + pub fn signing_credentials(&self) -> &GcpSigningCredentialProvider { + &self.client.config().signing_credentials + } +} + +#[derive(Debug)] +struct GCSMultipartUpload { + state: Arc, + part_idx: usize, +} + +#[derive(Debug)] +struct UploadState { + client: Arc, + path: Path, + multipart_id: MultipartId, + parts: Parts, +} + +#[async_trait] +impl MultipartUpload for GCSMultipartUpload { + fn put_part(&mut self, payload: PutPayload) -> UploadPart { + let idx = self.part_idx; + self.part_idx += 1; + let state = Arc::clone(&self.state); + Box::pin(async move { + let part = state + .client + .put_part(&state.path, &state.multipart_id, idx, payload) + .await?; + state.parts.put(idx, part); + Ok(()) + }) + } + + async fn complete(&mut self) -> Result { + let parts = self.state.parts.finish(self.part_idx)?; + + self.state + .client + .multipart_complete(&self.state.path, &self.state.multipart_id, parts) + .await + } + + async fn abort(&mut self) -> Result<()> { + self.state + .client + .multipart_cleanup(&self.state.path, &self.state.multipart_id) + .await + } +} + +#[async_trait] +impl ObjectStore for GoogleCloudStorage { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.client.put(location, payload, opts).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + let upload_id = self.client.multipart_initiate(location, opts).await?; + + Ok(Box::new(GCSMultipartUpload { + part_idx: 0, + state: Arc::new(UploadState { + client: Arc::clone(&self.client), + path: location.clone(), + multipart_id: upload_id.clone(), + parts: Default::default(), + }), + })) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.client.get_opts(location, options).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.client.delete_request(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.client.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + self.client.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.client.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy_request(from, to, false).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy_request(from, to, true).await + } +} + +#[async_trait] +impl MultipartStore for GoogleCloudStorage { + async fn create_multipart(&self, path: &Path) -> Result { + self.client + .multipart_initiate(path, PutMultipartOptions::default()) + .await + } + + async fn put_part( + &self, + path: &Path, + id: &MultipartId, + part_idx: usize, + payload: PutPayload, + ) -> Result { + self.client.put_part(path, id, part_idx, payload).await + } + + async fn complete_multipart( + &self, + path: &Path, + id: &MultipartId, + parts: Vec, + ) -> Result { + self.client.multipart_complete(path, id, parts).await + } + + async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> { + self.client.multipart_cleanup(path, id).await + } +} + +#[async_trait] +impl Signer for GoogleCloudStorage { + async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result { + if expires_in.as_secs() > 604800 { + return Err(crate::Error::Generic { + store: STORE, + source: "Expiration Time can't be longer than 604800 seconds (7 days).".into(), + }); + } + + let config = self.client.config(); + let path_url = config.path_url(path); + let mut url = Url::parse(&path_url).map_err(|e| crate::Error::Generic { + store: STORE, + source: format!("Unable to parse url {path_url}: {e}").into(), + })?; + + let signing_credentials = self.signing_credentials().get_credential().await?; + let authorizer = GCSAuthorizer::new(signing_credentials); + + authorizer + .sign(method, &mut url, expires_in, &self.client) + .await?; + + Ok(url) + } +} + +#[async_trait] +impl PaginatedListStore for GoogleCloudStorage { + async fn list_paginated( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result { + self.client.list_request(prefix, opts).await + } +} + +#[cfg(test)] +mod test { + + use credential::DEFAULT_GCS_BASE_URL; + + use crate::integration::*; + use crate::tests::*; + + use super::*; + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn gcs_test() { + maybe_skip_integration!(); + let integration = GoogleCloudStorageBuilder::from_env().build().unwrap(); + + put_get_delete_list(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + if integration.client.config().base_url == DEFAULT_GCS_BASE_URL { + // Fake GCS server doesn't currently honor ifGenerationMatch + // https://github.com/fsouza/fake-gcs-server/issues/994 + copy_if_not_exists(&integration).await; + // Fake GCS server does not yet implement XML Multipart uploads + // https://github.com/fsouza/fake-gcs-server/issues/852 + stream_get(&integration).await; + multipart(&integration, &integration).await; + multipart_race_condition(&integration, true).await; + multipart_out_of_order(&integration).await; + list_paginated(&integration, &integration).await; + // Fake GCS server doesn't currently honor preconditions + get_opts(&integration).await; + put_opts(&integration, true).await; + // Fake GCS server doesn't currently support attributes + put_get_attributes(&integration).await; + } + } + + #[tokio::test] + #[ignore] + async fn gcs_test_sign() { + maybe_skip_integration!(); + let integration = GoogleCloudStorageBuilder::from_env().build().unwrap(); + + let client = reqwest::Client::new(); + + let path = Path::from("test_sign"); + let url = integration + .signed_url(Method::PUT, &path, Duration::from_secs(3600)) + .await + .unwrap(); + println!("PUT {url}"); + + let resp = client.put(url).body("data").send().await.unwrap(); + resp.error_for_status().unwrap(); + + let url = integration + .signed_url(Method::GET, &path, Duration::from_secs(3600)) + .await + .unwrap(); + println!("GET {url}"); + + let resp = client.get(url).send().await.unwrap(); + let resp = resp.error_for_status().unwrap(); + let data = resp.bytes().await.unwrap(); + assert_eq!(data.as_ref(), b"data"); + } + + #[tokio::test] + async fn gcs_test_get_nonexistent_location() { + maybe_skip_integration!(); + let integration = GoogleCloudStorageBuilder::from_env().build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = integration.get(&location).await.unwrap_err(); + + assert!( + matches!(err, crate::Error::NotFound { .. }), + "unexpected error type: {err}" + ); + } + + #[tokio::test] + async fn gcs_test_get_nonexistent_bucket() { + maybe_skip_integration!(); + let config = GoogleCloudStorageBuilder::from_env(); + let integration = config.with_bucket_name(NON_EXISTENT_NAME).build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + + assert!( + matches!(err, crate::Error::NotFound { .. }), + "unexpected error type: {err}" + ); + } + + #[tokio::test] + async fn gcs_test_delete_nonexistent_location() { + maybe_skip_integration!(); + let integration = GoogleCloudStorageBuilder::from_env().build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = integration.delete(&location).await.unwrap_err(); + assert!( + matches!(err, crate::Error::NotFound { .. }), + "unexpected error type: {err}" + ); + } + + #[tokio::test] + async fn gcs_test_delete_nonexistent_bucket() { + maybe_skip_integration!(); + let config = GoogleCloudStorageBuilder::from_env(); + let integration = config.with_bucket_name(NON_EXISTENT_NAME).build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + + let err = integration.delete(&location).await.unwrap_err(); + assert!( + matches!(err, crate::Error::NotFound { .. }), + "unexpected error type: {err}" + ); + } + + #[tokio::test] + async fn gcs_test_put_nonexistent_bucket() { + maybe_skip_integration!(); + let config = GoogleCloudStorageBuilder::from_env(); + let integration = config.with_bucket_name(NON_EXISTENT_NAME).build().unwrap(); + + let location = Path::from_iter([NON_EXISTENT_NAME]); + let data = PutPayload::from("arbitrary data"); + + let err = integration + .put(&location, data) + .await + .unwrap_err() + .to_string(); + assert!( + err.contains("Server returned non-2xx status code: 404 Not Found"), + "{}", + err + ) + } +} diff --git a/rust/object_store/src/http/client.rs b/rust/object_store/src/http/client.rs new file mode 100644 index 0000000000..d08e9faf93 --- /dev/null +++ b/rust/object_store/src/http/client.rs @@ -0,0 +1,516 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::STORE; +use crate::client::get::GetClient; +use crate::client::header::HeaderConfig; +use crate::client::retry::{self, RetryConfig, RetryContext, RetryExt}; +use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse}; +use crate::path::{Path, DELIMITER}; +use crate::util::deserialize_rfc1123; +use crate::{Attribute, Attributes, ClientOptions, GetOptions, ObjectMeta, PutPayload, Result}; +use async_trait::async_trait; +use bytes::Buf; +use chrono::{DateTime, Utc}; +use http::header::{ + CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, CONTENT_LENGTH, + CONTENT_TYPE, +}; +use percent_encoding::percent_decode_str; +use reqwest::{Method, StatusCode}; +use serde::Deserialize; +use url::Url; + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Request error: {}", source)] + Request { + source: retry::RetryError, + path: String, + }, + + #[error("Request error: {}", source)] + Reqwest { source: HttpError }, + + #[error("Range request not supported by {}", href)] + RangeNotSupported { href: String }, + + #[error("Error decoding PROPFIND response: {}", source)] + InvalidPropFind { source: quick_xml::de::DeError }, + + #[error("Missing content size for {}", href)] + MissingSize { href: String }, + + #[error("Error getting properties of \"{}\" got \"{}\"", href, status)] + PropStatus { href: String, status: String }, + + #[error("Failed to parse href \"{}\": {}", href, source)] + InvalidHref { + href: String, + source: url::ParseError, + }, + + #[error("Path \"{}\" contained non-unicode characters: {}", path, source)] + NonUnicode { + path: String, + source: std::str::Utf8Error, + }, + + #[error("Encountered invalid path \"{}\": {}", path, source)] + InvalidPath { + path: String, + source: crate::path::Error, + }, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + match err { + Error::Request { source, path } => source.error(STORE, path), + _ => Self::Generic { + store: STORE, + source: Box::new(err), + }, + } + } +} + +/// Internal client for HttpStore +#[derive(Debug)] +pub(crate) struct Client { + url: Url, + client: HttpClient, + retry_config: RetryConfig, + client_options: ClientOptions, +} + +impl Client { + pub(crate) fn new( + url: Url, + client: HttpClient, + client_options: ClientOptions, + retry_config: RetryConfig, + ) -> Self { + Self { + url, + retry_config, + client_options, + client, + } + } + + pub(crate) fn base_url(&self) -> &Url { + &self.url + } + + fn path_url(&self, location: &Path) -> String { + let mut url = self.url.clone(); + url.path_segments_mut().unwrap().extend(location.parts()); + url.to_string() + } + + /// Create a directory with `path` using MKCOL + async fn make_directory(&self, path: &str) -> Result<(), Error> { + let method = Method::from_bytes(b"MKCOL").unwrap(); + let mut url = self.url.clone(); + url.path_segments_mut() + .unwrap() + .extend(path.split(DELIMITER)); + + self.client + .request(method, String::from(url)) + .send_retry(&self.retry_config) + .await + .map_err(|source| Error::Request { + source, + path: path.to_string(), + })?; + + Ok(()) + } + + /// Recursively create parent directories + async fn create_parent_directories(&self, location: &Path) -> Result<()> { + let mut stack = vec![]; + + // Walk backwards until a request succeeds + let mut last_prefix = location.as_ref(); + while let Some((prefix, _)) = last_prefix.rsplit_once(DELIMITER) { + last_prefix = prefix; + + match self.make_directory(prefix).await { + Ok(_) => break, + Err(Error::Request { source, path: _ }) + if matches!(source.status(), Some(StatusCode::CONFLICT)) => + { + // Need to create parent + stack.push(prefix) + } + Err(e) => return Err(e.into()), + } + } + + // Retry the failed requests, which should now succeed + for prefix in stack.into_iter().rev() { + self.make_directory(prefix).await?; + } + + Ok(()) + } + + pub(crate) async fn put( + &self, + location: &Path, + payload: PutPayload, + attributes: Attributes, + ) -> Result { + let mut retry = false; + loop { + let url = self.path_url(location); + let mut builder = self.client.put(url); + + let mut has_content_type = false; + for (k, v) in &attributes { + builder = match k { + Attribute::CacheControl => builder.header(CACHE_CONTROL, v.as_ref()), + Attribute::ContentDisposition => { + builder.header(CONTENT_DISPOSITION, v.as_ref()) + } + Attribute::ContentEncoding => builder.header(CONTENT_ENCODING, v.as_ref()), + Attribute::ContentLanguage => builder.header(CONTENT_LANGUAGE, v.as_ref()), + Attribute::ContentType => { + has_content_type = true; + builder.header(CONTENT_TYPE, v.as_ref()) + } + Attribute::StorageClass => { + tracing::warn!("StorageClass attribute not supported on HTTP client as header key is unknown"); + builder + } + // Ignore metadata attributes + Attribute::Metadata(_) => builder, + }; + } + + if !has_content_type { + if let Some(value) = self.client_options.get_content_type(location) { + builder = builder.header(CONTENT_TYPE, value); + } + } + + let resp = builder + .header(CONTENT_LENGTH, payload.content_length()) + .retryable(&self.retry_config) + .idempotent(true) + .payload(Some(payload.clone())) + .send() + .await; + + match resp { + Ok(response) => return Ok(response), + Err(source) => match source.status() { + // Some implementations return 404 instead of 409 + Some(StatusCode::CONFLICT | StatusCode::NOT_FOUND) if !retry => { + retry = true; + self.create_parent_directories(location).await? + } + _ => { + return Err(Error::Request { + source, + path: location.to_string(), + } + .into()) + } + }, + } + } + } + + pub(crate) async fn list(&self, location: Option<&Path>, depth: &str) -> Result { + let url = location + .map(|path| self.path_url(path)) + .unwrap_or_else(|| self.url.to_string()); + + let method = Method::from_bytes(b"PROPFIND").unwrap(); + let result = self + .client + .request(method, url) + .header("Depth", depth) + .retryable(&self.retry_config) + .idempotent(true) + .send() + .await; + + let response = match result { + Ok(result) => result + .into_body() + .bytes() + .await + .map_err(|source| Error::Reqwest { source })?, + Err(e) if matches!(e.status(), Some(StatusCode::NOT_FOUND)) => { + return match depth { + "0" => { + let path = location.map(|x| x.as_ref()).unwrap_or(""); + Err(crate::Error::NotFound { + path: path.to_string(), + source: Box::new(e), + }) + } + _ => { + // If prefix not found, return empty result set + Ok(Default::default()) + } + }; + } + Err(source) => { + return Err(Error::Request { + source, + path: location.map(|x| x.to_string()).unwrap_or_default(), + } + .into()) + } + }; + + let status = quick_xml::de::from_reader(response.reader()) + .map_err(|source| Error::InvalidPropFind { source })?; + + Ok(status) + } + + pub(crate) async fn delete(&self, path: &Path) -> Result<()> { + let url = self.path_url(path); + self.client + .delete(url) + .send_retry(&self.retry_config) + .await + .map_err(|source| source.error(STORE, path.to_string()))?; + Ok(()) + } + + pub(crate) async fn copy(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> { + let mut retry = false; + loop { + let method = Method::from_bytes(b"COPY").unwrap(); + + let mut builder = self + .client + .request(method, self.path_url(from)) + .header("Destination", self.path_url(to).as_str()); + + if !overwrite { + // While the Overwrite header appears to duplicate + // the functionality of the If-Match: * header of HTTP/1.1, If-Match + // applies only to the Request-URI, and not to the Destination of a COPY + // or MOVE. + builder = builder.header("Overwrite", "F"); + } + + return match builder.send_retry(&self.retry_config).await { + Ok(_) => Ok(()), + Err(source) => Err(match source.status() { + Some(StatusCode::PRECONDITION_FAILED) if !overwrite => { + crate::Error::AlreadyExists { + path: to.to_string(), + source: Box::new(source), + } + } + // Some implementations return 404 instead of 409 + Some(StatusCode::CONFLICT | StatusCode::NOT_FOUND) if !retry => { + retry = true; + self.create_parent_directories(to).await?; + continue; + } + _ => Error::Request { + source, + path: from.to_string(), + } + .into(), + }), + }; + } + } +} + +#[async_trait] +impl GetClient for Client { + const STORE: &'static str = STORE; + + /// Override the [`HeaderConfig`] to be less strict to support a + /// broader range of HTTP servers (#4831) + const HEADER_CONFIG: HeaderConfig = HeaderConfig { + etag_required: false, + last_modified_required: false, + version_header: None, + user_defined_metadata_prefix: None, + }; + + fn retry_config(&self) -> &RetryConfig { + &self.retry_config + } + + async fn get_request( + &self, + ctx: &mut RetryContext, + path: &Path, + options: GetOptions, + ) -> Result { + let url = self.path_url(path); + let method = match options.head { + true => Method::HEAD, + false => Method::GET, + }; + let has_range = options.range.is_some(); + let builder = self.client.request(method, url); + + let res = builder + .with_get_options(options) + .retryable_request() + .send(ctx) + .await + .map_err(|source| match source.status() { + // Some stores return METHOD_NOT_ALLOWED for get on directories + Some(StatusCode::NOT_FOUND | StatusCode::METHOD_NOT_ALLOWED) => { + crate::Error::NotFound { + source: Box::new(source), + path: path.to_string(), + } + } + _ => Error::Request { + source, + path: path.to_string(), + } + .into(), + })?; + + // We expect a 206 Partial Content response if a range was requested + // a 200 OK response would indicate the server did not fulfill the request + if has_range && res.status() != StatusCode::PARTIAL_CONTENT { + return Err(crate::Error::NotSupported { + source: Box::new(Error::RangeNotSupported { + href: path.to_string(), + }), + }); + } + + Ok(res) + } +} + +/// The response returned by a PROPFIND request, i.e. list +#[derive(Deserialize, Default)] +pub(crate) struct MultiStatus { + pub response: Vec, +} + +#[derive(Deserialize)] +pub(crate) struct MultiStatusResponse { + href: String, + #[serde(rename = "propstat")] + prop_stat: PropStat, +} + +impl MultiStatusResponse { + /// Returns an error if this response is not OK + pub(crate) fn check_ok(&self) -> Result<()> { + match self.prop_stat.status.contains("200 OK") { + true => Ok(()), + false => Err(Error::PropStatus { + href: self.href.clone(), + status: self.prop_stat.status.clone(), + } + .into()), + } + } + + /// Returns the resolved path of this element relative to `base_url` + pub(crate) fn path(&self, base_url: &Url) -> Result { + let url = Url::options() + .base_url(Some(base_url)) + .parse(&self.href) + .map_err(|source| Error::InvalidHref { + href: self.href.clone(), + source, + })?; + + // Reverse any percent encoding + let path = percent_decode_str(url.path()) + .decode_utf8() + .map_err(|source| Error::NonUnicode { + path: url.path().into(), + source, + })?; + + Ok(Path::parse(path.as_ref()).map_err(|source| { + let path = path.into(); + Error::InvalidPath { path, source } + })?) + } + + fn size(&self) -> Result { + let size = self + .prop_stat + .prop + .content_length + .ok_or_else(|| Error::MissingSize { + href: self.href.clone(), + })?; + + Ok(size) + } + + /// Returns this objects metadata as [`ObjectMeta`] + pub(crate) fn object_meta(&self, base_url: &Url) -> Result { + let last_modified = self.prop_stat.prop.last_modified; + Ok(ObjectMeta { + location: self.path(base_url)?, + last_modified, + size: self.size()?, + e_tag: self.prop_stat.prop.e_tag.clone(), + version: None, + }) + } + + /// Returns true if this is a directory / collection + pub(crate) fn is_dir(&self) -> bool { + self.prop_stat.prop.resource_type.collection.is_some() + } +} + +#[derive(Deserialize)] +pub(crate) struct PropStat { + prop: Prop, + status: String, +} + +#[derive(Deserialize)] +pub(crate) struct Prop { + #[serde(deserialize_with = "deserialize_rfc1123", rename = "getlastmodified")] + last_modified: DateTime, + + #[serde(rename = "getcontentlength")] + content_length: Option, + + #[serde(rename = "resourcetype")] + resource_type: ResourceType, + + #[serde(rename = "getetag")] + e_tag: Option, +} + +#[derive(Deserialize)] +pub(crate) struct ResourceType { + collection: Option<()>, +} diff --git a/rust/object_store/src/http/mod.rs b/rust/object_store/src/http/mod.rs new file mode 100644 index 0000000000..8581f92372 --- /dev/null +++ b/rust/object_store/src/http/mod.rs @@ -0,0 +1,293 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store implementation for generic HTTP servers +//! +//! This follows [rfc2518] commonly known as [WebDAV] +//! +//! Basic get support will work out of the box with most HTTP servers, +//! even those that don't explicitly support [rfc2518] +//! +//! Other operations such as list, delete, copy, etc... will likely +//! require server-side configuration. A list of HTTP servers with support +//! can be found [here](https://wiki.archlinux.org/title/WebDAV#Server) +//! +//! Multipart uploads are not currently supported +//! +//! [rfc2518]: https://datatracker.ietf.org/doc/html/rfc2518 +//! [WebDAV]: https://en.wikipedia.org/wiki/WebDAV + +use std::sync::Arc; + +use async_trait::async_trait; +use futures::stream::BoxStream; +use futures::{StreamExt, TryStreamExt}; +use itertools::Itertools; +use url::Url; + +use crate::client::get::GetClientExt; +use crate::client::header::get_etag; +use crate::client::{http_connector, HttpConnector}; +use crate::http::client::Client; +use crate::path::Path; +use crate::{ + ClientConfigKey, ClientOptions, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, PutMode, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + RetryConfig, +}; + +mod client; + +const STORE: &str = "HTTP"; + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("Must specify a URL")] + MissingUrl, + + #[error("Unable parse source url. Url: {}, Error: {}", url, source)] + UnableToParseUrl { + source: url::ParseError, + url: String, + }, + + #[error("Unable to extract metadata from headers: {}", source)] + Metadata { + source: crate::client::header::Error, + }, +} + +impl From for crate::Error { + fn from(err: Error) -> Self { + Self::Generic { + store: STORE, + source: Box::new(err), + } + } +} + +/// An [`ObjectStore`] implementation for generic HTTP servers +/// +/// See [`crate::http`] for more information +#[derive(Debug)] +pub struct HttpStore { + client: Arc, +} + +impl std::fmt::Display for HttpStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "HttpStore") + } +} + +#[async_trait] +impl ObjectStore for HttpStore { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + if opts.mode != PutMode::Overwrite { + // TODO: Add support for If header - https://datatracker.ietf.org/doc/html/rfc2518#section-9.4 + return Err(crate::Error::NotImplemented); + } + + let response = self.client.put(location, payload, opts.attributes).await?; + let e_tag = match get_etag(response.headers()) { + Ok(e_tag) => Some(e_tag), + Err(crate::client::header::Error::MissingEtag) => None, + Err(source) => return Err(Error::Metadata { source }.into()), + }; + + Ok(PutResult { + e_tag, + version: None, + }) + } + + async fn put_multipart_opts( + &self, + _location: &Path, + _opts: PutMultipartOptions, + ) -> Result> { + Err(crate::Error::NotImplemented) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.client.get_opts(location, options).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.client.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + let prefix_len = prefix.map(|p| p.as_ref().len()).unwrap_or_default(); + let prefix = prefix.cloned(); + let client = Arc::clone(&self.client); + futures::stream::once(async move { + let status = client.list(prefix.as_ref(), "infinity").await?; + + let iter = status + .response + .into_iter() + .filter(|r| !r.is_dir()) + .map(move |response| { + response.check_ok()?; + response.object_meta(client.base_url()) + }) + // Filter out exact prefix matches + .filter_ok(move |r| r.location.as_ref().len() > prefix_len); + + Ok::<_, crate::Error>(futures::stream::iter(iter)) + }) + .try_flatten() + .boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let status = self.client.list(prefix, "1").await?; + let prefix_len = prefix.map(|p| p.as_ref().len()).unwrap_or(0); + + let mut objects: Vec = Vec::with_capacity(status.response.len()); + let mut common_prefixes = Vec::with_capacity(status.response.len()); + for response in status.response { + response.check_ok()?; + match response.is_dir() { + false => { + let meta = response.object_meta(self.client.base_url())?; + // Filter out exact prefix matches + if meta.location.as_ref().len() > prefix_len { + objects.push(meta); + } + } + true => { + let path = response.path(self.client.base_url())?; + // Exclude the current object + if path.as_ref().len() > prefix_len { + common_prefixes.push(path); + } + } + } + } + + Ok(ListResult { + common_prefixes, + objects, + }) + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy(from, to, true).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.client.copy(from, to, false).await + } +} + +/// Configure a connection to a generic HTTP server +#[derive(Debug, Default, Clone)] +pub struct HttpBuilder { + url: Option, + client_options: ClientOptions, + retry_config: RetryConfig, + http_connector: Option>, +} + +impl HttpBuilder { + /// Create a new [`HttpBuilder`] with default values. + pub fn new() -> Self { + Default::default() + } + + /// Set the URL + pub fn with_url(mut self, url: impl Into) -> Self { + self.url = Some(url.into()); + self + } + + /// Set the retry configuration + pub fn with_retry(mut self, retry_config: RetryConfig) -> Self { + self.retry_config = retry_config; + self + } + + /// Set individual client configuration without overriding the entire config + pub fn with_config(mut self, key: ClientConfigKey, value: impl Into) -> Self { + self.client_options = self.client_options.with_config(key, value); + self + } + + /// Sets the client options, overriding any already set + pub fn with_client_options(mut self, options: ClientOptions) -> Self { + self.client_options = options; + self + } + + /// The [`HttpConnector`] to use + /// + /// On non-WASM32 platforms uses [`reqwest`] by default, on WASM32 platforms must be provided + pub fn with_http_connector(mut self, connector: C) -> Self { + self.http_connector = Some(Arc::new(connector)); + self + } + + /// Build an [`HttpStore`] with the configured options + pub fn build(self) -> Result { + let url = self.url.ok_or(Error::MissingUrl)?; + let parsed = Url::parse(&url).map_err(|source| Error::UnableToParseUrl { url, source })?; + + let client = http_connector(self.http_connector)?.connect(&self.client_options)?; + + Ok(HttpStore { + client: Arc::new(Client::new( + parsed, + client, + self.client_options, + self.retry_config, + )), + }) + } +} + +#[cfg(test)] +mod tests { + use crate::integration::*; + use crate::tests::*; + + use super::*; + + #[tokio::test] + async fn http_test() { + maybe_skip_integration!(); + let url = std::env::var("HTTP_URL").expect("HTTP_URL must be set"); + let options = ClientOptions::new().with_allow_http(true); + let integration = HttpBuilder::new() + .with_url(url) + .with_client_options(options) + .build() + .unwrap(); + + put_get_delete_list(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + } +} diff --git a/rust/object_store/src/integration.rs b/rust/object_store/src/integration.rs new file mode 100644 index 0000000000..99ee86da40 --- /dev/null +++ b/rust/object_store/src/integration.rs @@ -0,0 +1,1324 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for custom object store implementations +//! +//! NB: These tests will delete everything present in the provided [`DynObjectStore`]. +//! +//! These tests are not a stable part of the public API and breaking changes may be made +//! in patch releases. +//! +//! They are intended solely for testing purposes. + +use crate::list::{PaginatedListOptions, PaginatedListStore}; +use crate::multipart::MultipartStore; +use crate::path::Path; +use crate::{ + Attribute, Attributes, DynObjectStore, Error, GetOptions, GetRange, MultipartUpload, + ObjectStore, PutMode, PutPayload, UpdateVersion, WriteMultipart, +}; +use bytes::Bytes; +use futures::stream::FuturesUnordered; +use futures::{StreamExt, TryStreamExt}; +use rand::distr::Alphanumeric; +use rand::{rng, Rng}; +use std::collections::HashSet; +use std::slice; + +pub(crate) async fn flatten_list_stream( + storage: &DynObjectStore, + prefix: Option<&Path>, +) -> crate::Result> { + storage + .list(prefix) + .map_ok(|meta| meta.location) + .try_collect::>() + .await +} + +/// Tests basic read/write and listing operations +pub async fn put_get_delete_list(storage: &DynObjectStore) { + delete_fixtures(storage).await; + + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert!( + content_list.is_empty(), + "Expected list to be empty; found: {content_list:?}" + ); + + let location = Path::from("test_dir/test_file.json"); + + let data = Bytes::from("arbitrary data"); + storage.put(&location, data.clone().into()).await.unwrap(); + + let root = Path::from("/"); + + // List everything + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location)); + + // Should behave the same as no prefix + let content_list = flatten_list_stream(storage, Some(&root)).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location)); + + // List with delimiter + let result = storage.list_with_delimiter(None).await.unwrap(); + assert_eq!(&result.objects, &[]); + assert_eq!(result.common_prefixes.len(), 1); + assert_eq!(result.common_prefixes[0], Path::from("test_dir")); + + // Should behave the same as no prefix + let result = storage.list_with_delimiter(Some(&root)).await.unwrap(); + assert!(result.objects.is_empty()); + assert_eq!(result.common_prefixes.len(), 1); + assert_eq!(result.common_prefixes[0], Path::from("test_dir")); + + // Should return not found + let err = storage.get(&Path::from("test_dir")).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + // Should return not found + let err = storage.head(&Path::from("test_dir")).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + // List everything starting with a prefix that should return results + let prefix = Path::from("test_dir"); + let content_list = flatten_list_stream(storage, Some(&prefix)).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location)); + + // List everything starting with a prefix that shouldn't return results + let prefix = Path::from("something"); + let content_list = flatten_list_stream(storage, Some(&prefix)).await.unwrap(); + assert!(content_list.is_empty()); + + let read_data = storage.get(&location).await.unwrap().bytes().await.unwrap(); + assert_eq!(&*read_data, data); + + // Test range request + let range = 3..7; + let range_result = storage.get_range(&location, range.clone()).await; + + let bytes = range_result.unwrap(); + assert_eq!(bytes, data.slice(range.start as usize..range.end as usize)); + + let opts = GetOptions { + range: Some(GetRange::Bounded(2..5)), + ..Default::default() + }; + let result = storage.get_opts(&location, opts).await.unwrap(); + // Data is `"arbitrary data"`, length 14 bytes + assert_eq!(result.meta.size, 14); // Should return full object size (#5272) + assert_eq!(result.range, 2..5); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, b"bit".as_ref()); + + let out_of_range = 200..300; + let out_of_range_result = storage.get_range(&location, out_of_range).await; + + // Should be a non-fatal error + out_of_range_result.unwrap_err(); + + let opts = GetOptions { + range: Some(GetRange::Bounded(2..100)), + ..Default::default() + }; + let result = storage.get_opts(&location, opts).await.unwrap(); + assert_eq!(result.range, 2..14); + assert_eq!(result.meta.size, 14); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, b"bitrary data".as_ref()); + + let opts = GetOptions { + range: Some(GetRange::Suffix(2)), + ..Default::default() + }; + match storage.get_opts(&location, opts).await { + Ok(result) => { + assert_eq!(result.range, 12..14); + assert_eq!(result.meta.size, 14); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, b"ta".as_ref()); + } + Err(Error::NotSupported { .. }) => {} + Err(e) => panic!("{e}"), + } + + let opts = GetOptions { + range: Some(GetRange::Suffix(100)), + ..Default::default() + }; + match storage.get_opts(&location, opts).await { + Ok(result) => { + assert_eq!(result.range, 0..14); + assert_eq!(result.meta.size, 14); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, b"arbitrary data".as_ref()); + } + Err(Error::NotSupported { .. }) => {} + Err(e) => panic!("{e}"), + } + + let opts = GetOptions { + range: Some(GetRange::Offset(3)), + ..Default::default() + }; + let result = storage.get_opts(&location, opts).await.unwrap(); + assert_eq!(result.range, 3..14); + assert_eq!(result.meta.size, 14); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, b"itrary data".as_ref()); + + let opts = GetOptions { + range: Some(GetRange::Offset(100)), + ..Default::default() + }; + storage.get_opts(&location, opts).await.unwrap_err(); + + let ranges = vec![0..1, 2..3, 0..5]; + let bytes = storage.get_ranges(&location, &ranges).await.unwrap(); + for (range, bytes) in ranges.iter().zip(bytes) { + assert_eq!(bytes, data.slice(range.start as usize..range.end as usize)); + } + + let head = storage.head(&location).await.unwrap(); + assert_eq!(head.size, data.len() as u64); + + storage.delete(&location).await.unwrap(); + + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert!(content_list.is_empty()); + + let err = storage.get(&location).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + let err = storage.head(&location).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + // Test handling of paths containing an encoded delimiter + + let file_with_delimiter = Path::from_iter(["a", "b/c", "foo.file"]); + storage + .put(&file_with_delimiter, "arbitrary".into()) + .await + .unwrap(); + + let files = flatten_list_stream(storage, None).await.unwrap(); + assert_eq!(files, vec![file_with_delimiter.clone()]); + + let files = flatten_list_stream(storage, Some(&Path::from("a/b"))) + .await + .unwrap(); + assert!(files.is_empty()); + + let files = storage + .list_with_delimiter(Some(&Path::from("a/b"))) + .await + .unwrap(); + assert!(files.common_prefixes.is_empty()); + assert!(files.objects.is_empty()); + + let files = storage + .list_with_delimiter(Some(&Path::from("a"))) + .await + .unwrap(); + assert_eq!(files.common_prefixes, vec![Path::from_iter(["a", "b/c"])]); + assert!(files.objects.is_empty()); + + let files = storage + .list_with_delimiter(Some(&Path::from_iter(["a", "b/c"]))) + .await + .unwrap(); + assert!(files.common_prefixes.is_empty()); + assert_eq!(files.objects.len(), 1); + assert_eq!(files.objects[0].location, file_with_delimiter); + + storage.delete(&file_with_delimiter).await.unwrap(); + + // Test handling of paths containing non-ASCII characters, e.g. emoji + + let emoji_prefix = Path::from("🙀"); + let emoji_file = Path::from("🙀/😀.parquet"); + storage.put(&emoji_file, "arbitrary".into()).await.unwrap(); + + storage.head(&emoji_file).await.unwrap(); + storage + .get(&emoji_file) + .await + .unwrap() + .bytes() + .await + .unwrap(); + + let files = flatten_list_stream(storage, Some(&emoji_prefix)) + .await + .unwrap(); + + assert_eq!(files, vec![emoji_file.clone()]); + + let dst = Path::from("foo.parquet"); + storage.copy(&emoji_file, &dst).await.unwrap(); + let mut files = flatten_list_stream(storage, None).await.unwrap(); + files.sort_unstable(); + assert_eq!(files, vec![emoji_file.clone(), dst.clone()]); + + let dst2 = Path::from("new/nested/foo.parquet"); + storage.copy(&emoji_file, &dst2).await.unwrap(); + let mut files = flatten_list_stream(storage, None).await.unwrap(); + files.sort_unstable(); + assert_eq!(files, vec![emoji_file.clone(), dst.clone(), dst2.clone()]); + + let dst3 = Path::from("new/nested2/bar.parquet"); + storage.rename(&dst, &dst3).await.unwrap(); + let mut files = flatten_list_stream(storage, None).await.unwrap(); + files.sort_unstable(); + assert_eq!(files, vec![emoji_file.clone(), dst2.clone(), dst3.clone()]); + + let err = storage.head(&dst).await.unwrap_err(); + assert!(matches!(err, Error::NotFound { .. })); + + storage.delete(&emoji_file).await.unwrap(); + storage.delete(&dst3).await.unwrap(); + storage.delete(&dst2).await.unwrap(); + let files = flatten_list_stream(storage, Some(&emoji_prefix)) + .await + .unwrap(); + assert!(files.is_empty()); + + // Test handling of paths containing percent-encoded sequences + + // "HELLO" percent encoded + let hello_prefix = Path::parse("%48%45%4C%4C%4F").unwrap(); + let path = hello_prefix.child("foo.parquet"); + + storage.put(&path, vec![0, 1].into()).await.unwrap(); + let files = flatten_list_stream(storage, Some(&hello_prefix)) + .await + .unwrap(); + assert_eq!(files, vec![path.clone()]); + + // Cannot list by decoded representation + let files = flatten_list_stream(storage, Some(&Path::from("HELLO"))) + .await + .unwrap(); + assert!(files.is_empty()); + + // Cannot access by decoded representation + let err = storage + .head(&Path::from("HELLO/foo.parquet")) + .await + .unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + storage.delete(&path).await.unwrap(); + + // Test handling of unicode paths + let path = Path::parse("🇦🇺/$shenanigans@@~.txt").unwrap(); + storage.put(&path, "test".into()).await.unwrap(); + + let r = storage.get(&path).await.unwrap(); + assert_eq!(r.bytes().await.unwrap(), "test"); + + let dir = Path::parse("🇦🇺").unwrap(); + let r = storage.list_with_delimiter(None).await.unwrap(); + assert!(r.common_prefixes.contains(&dir)); + + let r = storage.list_with_delimiter(Some(&dir)).await.unwrap(); + assert_eq!(r.objects.len(), 1); + assert_eq!(r.objects[0].location, path); + + storage.delete(&path).await.unwrap(); + + // Can also write non-percent encoded sequences + let path = Path::parse("%Q.parquet").unwrap(); + storage.put(&path, vec![0, 1].into()).await.unwrap(); + + let files = flatten_list_stream(storage, None).await.unwrap(); + assert_eq!(files, vec![path.clone()]); + + storage.delete(&path).await.unwrap(); + + let path = Path::parse("foo bar/I contain spaces.parquet").unwrap(); + storage.put(&path, vec![0, 1].into()).await.unwrap(); + storage.head(&path).await.unwrap(); + + let files = flatten_list_stream(storage, Some(&Path::from("foo bar"))) + .await + .unwrap(); + assert_eq!(files, vec![path.clone()]); + + storage.delete(&path).await.unwrap(); + + let files = flatten_list_stream(storage, None).await.unwrap(); + assert!(files.is_empty(), "{files:?}"); + + // Test list order + let files = vec![ + Path::from("a a/b.file"), + Path::parse("a%2Fa.file").unwrap(), + Path::from("a/😀.file"), + Path::from("a/a file"), + Path::parse("a/a%2F.file").unwrap(), + Path::from("a/a.file"), + Path::from("a/a/b.file"), + Path::from("a/b.file"), + Path::from("aa/a.file"), + Path::from("ab/a.file"), + ]; + + for file in &files { + storage.put(file, "foo".into()).await.unwrap(); + } + + let cases = [ + (None, Path::from("a")), + (None, Path::from("a/a file")), + (None, Path::from("a/a/b.file")), + (None, Path::from("ab/a.file")), + (None, Path::from("a%2Fa.file")), + (None, Path::from("a/😀.file")), + (Some(Path::from("a")), Path::from("")), + (Some(Path::from("a")), Path::from("a")), + (Some(Path::from("a")), Path::from("a/😀")), + (Some(Path::from("a")), Path::from("a/😀.file")), + (Some(Path::from("a")), Path::from("a/b")), + (Some(Path::from("a")), Path::from("a/a/b.file")), + ]; + + for (prefix, offset) in cases { + let s = storage.list_with_offset(prefix.as_ref(), &offset); + let mut actual: Vec<_> = s.map_ok(|x| x.location).try_collect().await.unwrap(); + + actual.sort_unstable(); + + let expected: Vec<_> = files + .iter() + .filter(|x| { + let prefix_match = prefix.as_ref().map(|p| x.prefix_matches(p)).unwrap_or(true); + prefix_match && *x > &offset + }) + .cloned() + .collect(); + + assert_eq!(actual, expected, "{prefix:?} - {offset:?}"); + } + + // Test bulk delete + let paths = vec![ + Path::from("a/a.file"), + Path::from("a/a/b.file"), + Path::from("aa/a.file"), + Path::from("does_not_exist"), + Path::from("I'm a < & weird path"), + Path::from("ab/a.file"), + Path::from("a/😀.file"), + ]; + + storage.put(&paths[4], "foo".into()).await.unwrap(); + + let out_paths = storage + .delete_stream(futures::stream::iter(paths.clone()).map(Ok).boxed()) + .collect::>() + .await; + + assert_eq!(out_paths.len(), paths.len()); + + let expect_errors = [3]; + + for (i, input_path) in paths.iter().enumerate() { + let err = storage.head(input_path).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + if expect_errors.contains(&i) { + // Some object stores will report NotFound, but others (such as S3) will + // report success regardless. + match &out_paths[i] { + Err(Error::NotFound { path: out_path, .. }) => { + assert!(out_path.ends_with(&input_path.to_string())); + } + Ok(out_path) => { + assert_eq!(out_path, input_path); + } + _ => panic!("unexpected error"), + } + } else { + assert_eq!(out_paths[i].as_ref().unwrap(), input_path); + } + } + + delete_fixtures(storage).await; + + let path = Path::from("empty"); + storage.put(&path, PutPayload::default()).await.unwrap(); + let meta = storage.head(&path).await.unwrap(); + assert_eq!(meta.size, 0); + let data = storage.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(data.len(), 0); + + storage.delete(&path).await.unwrap(); +} + +/// Tests the ability to read and write [`Attributes`] +pub async fn put_get_attributes(integration: &dyn ObjectStore) { + // Test handling of attributes + let attributes = Attributes::from_iter([ + (Attribute::CacheControl, "max-age=604800"), + ( + Attribute::ContentDisposition, + r#"attachment; filename="test.html""#, + ), + (Attribute::ContentEncoding, "gzip"), + (Attribute::ContentLanguage, "en-US"), + (Attribute::ContentType, "text/html; charset=utf-8"), + (Attribute::Metadata("test_key".into()), "test_value"), + ]); + + let path = Path::from("attributes"); + let opts = attributes.clone().into(); + match integration.put_opts(&path, "foo".into(), opts).await { + Ok(_) => { + let r = integration.get(&path).await.unwrap(); + assert_eq!(r.attributes, attributes); + } + Err(Error::NotImplemented) => {} + Err(e) => panic!("{e}"), + } + + let opts = attributes.clone().into(); + match integration.put_multipart_opts(&path, opts).await { + Ok(mut w) => { + w.put_part("foo".into()).await.unwrap(); + w.complete().await.unwrap(); + + let r = integration.get(&path).await.unwrap(); + assert_eq!(r.attributes, attributes); + } + Err(Error::NotImplemented) => {} + Err(e) => panic!("{e}"), + } +} + +/// Tests conditional read requests +pub async fn get_opts(storage: &dyn ObjectStore) { + let path = Path::from("test"); + storage.put(&path, "foo".into()).await.unwrap(); + let meta = storage.head(&path).await.unwrap(); + + let options = GetOptions { + if_unmodified_since: Some(meta.last_modified), + ..GetOptions::default() + }; + match storage.get_opts(&path, options).await { + Ok(_) | Err(Error::NotSupported { .. }) => {} + Err(e) => panic!("{e}"), + } + + let options = GetOptions { + if_unmodified_since: Some(meta.last_modified + chrono::Duration::try_hours(10).unwrap()), + ..GetOptions::default() + }; + match storage.get_opts(&path, options).await { + Ok(_) | Err(Error::NotSupported { .. }) => {} + Err(e) => panic!("{e}"), + } + + let options = GetOptions { + if_unmodified_since: Some(meta.last_modified - chrono::Duration::try_hours(10).unwrap()), + ..GetOptions::default() + }; + match storage.get_opts(&path, options).await { + Err(Error::Precondition { .. } | Error::NotSupported { .. }) => {} + d => panic!("{d:?}"), + } + + let options = GetOptions { + if_modified_since: Some(meta.last_modified), + ..GetOptions::default() + }; + match storage.get_opts(&path, options).await { + Err(Error::NotModified { .. } | Error::NotSupported { .. }) => {} + d => panic!("{d:?}"), + } + + let options = GetOptions { + if_modified_since: Some(meta.last_modified - chrono::Duration::try_hours(10).unwrap()), + ..GetOptions::default() + }; + match storage.get_opts(&path, options).await { + Ok(_) | Err(Error::NotSupported { .. }) => {} + Err(e) => panic!("{e}"), + } + + let tag = meta.e_tag.unwrap(); + let options = GetOptions { + if_match: Some(tag.clone()), + ..GetOptions::default() + }; + storage.get_opts(&path, options).await.unwrap(); + + let options = GetOptions { + if_match: Some("invalid".to_string()), + ..GetOptions::default() + }; + let err = storage.get_opts(&path, options).await.unwrap_err(); + assert!(matches!(err, Error::Precondition { .. }), "{err}"); + + let options = GetOptions { + if_none_match: Some(tag.clone()), + ..GetOptions::default() + }; + let err = storage.get_opts(&path, options).await.unwrap_err(); + assert!(matches!(err, Error::NotModified { .. }), "{err}"); + + let options = GetOptions { + if_none_match: Some("invalid".to_string()), + ..GetOptions::default() + }; + storage.get_opts(&path, options).await.unwrap(); + + let result = storage.put(&path, "test".into()).await.unwrap(); + let new_tag = result.e_tag.unwrap(); + assert_ne!(tag, new_tag); + + let meta = storage.head(&path).await.unwrap(); + assert_eq!(meta.e_tag.unwrap(), new_tag); + + let options = GetOptions { + if_match: Some(new_tag), + ..GetOptions::default() + }; + storage.get_opts(&path, options).await.unwrap(); + + let options = GetOptions { + if_match: Some(tag), + ..GetOptions::default() + }; + let err = storage.get_opts(&path, options).await.unwrap_err(); + assert!(matches!(err, Error::Precondition { .. }), "{err}"); + + if let Some(version) = meta.version { + storage.put(&path, "bar".into()).await.unwrap(); + + let options = GetOptions { + version: Some(version), + ..GetOptions::default() + }; + + // Can retrieve previous version + let get_opts = storage.get_opts(&path, options).await.unwrap(); + let old = get_opts.bytes().await.unwrap(); + assert_eq!(old, b"test".as_slice()); + + // Current version contains the updated data + let current = storage.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(¤t, b"bar".as_slice()); + } +} + +/// Tests conditional writes +pub async fn put_opts(storage: &dyn ObjectStore, supports_update: bool) { + // When using DynamoCommit repeated runs of this test will produce the same sequence of records in DynamoDB + // As a result each conditional operation will need to wait for the lease to timeout before proceeding + // One solution would be to clear DynamoDB before each test, but this would require non-trivial additional code + // so we instead just generate a random suffix for the filenames + let rng = rng(); + let suffix = String::from_utf8(rng.sample_iter(Alphanumeric).take(32).collect()).unwrap(); + + delete_fixtures(storage).await; + let path = Path::from(format!("put_opts_{suffix}")); + let v1 = storage + .put_opts(&path, "a".into(), PutMode::Create.into()) + .await + .unwrap(); + + let err = storage + .put_opts(&path, "b".into(), PutMode::Create.into()) + .await + .unwrap_err(); + assert!(matches!(err, Error::AlreadyExists { .. }), "{err}"); + + let b = storage.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(b.as_ref(), b"a"); + + if !supports_update { + let err = storage + .put_opts(&path, "c".into(), PutMode::Update(v1.clone().into()).into()) + .await + .unwrap_err(); + assert!(matches!(err, Error::NotImplemented), "{err}"); + + return; + } + + let v2 = storage + .put_opts(&path, "c".into(), PutMode::Update(v1.clone().into()).into()) + .await + .unwrap(); + + let b = storage.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(b.as_ref(), b"c"); + + let err = storage + .put_opts(&path, "d".into(), PutMode::Update(v1.into()).into()) + .await + .unwrap_err(); + assert!(matches!(err, Error::Precondition { .. }), "{err}"); + + storage + .put_opts(&path, "e".into(), PutMode::Update(v2.clone().into()).into()) + .await + .unwrap(); + + let b = storage.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(b.as_ref(), b"e"); + + // Update not exists + let path = Path::from("I don't exist"); + let err = storage + .put_opts(&path, "e".into(), PutMode::Update(v2.into()).into()) + .await + .unwrap_err(); + assert!(matches!(err, Error::Precondition { .. }), "{err}"); + + const NUM_WORKERS: usize = 5; + const NUM_INCREMENTS: usize = 10; + + let path = Path::from(format!("RACE-{suffix}")); + let mut futures: FuturesUnordered<_> = (0..NUM_WORKERS) + .map(|_| async { + for _ in 0..NUM_INCREMENTS { + loop { + match storage.get(&path).await { + Ok(r) => { + let mode = PutMode::Update(UpdateVersion { + e_tag: r.meta.e_tag.clone(), + version: r.meta.version.clone(), + }); + + let b = r.bytes().await.unwrap(); + let v: usize = std::str::from_utf8(&b).unwrap().parse().unwrap(); + let new = (v + 1).to_string(); + + match storage.put_opts(&path, new.into(), mode.into()).await { + Ok(_) => break, + Err(Error::Precondition { .. }) => continue, + Err(e) => return Err(e), + } + } + Err(Error::NotFound { .. }) => { + let mode = PutMode::Create; + match storage.put_opts(&path, "1".into(), mode.into()).await { + Ok(_) => break, + Err(Error::AlreadyExists { .. }) => continue, + Err(e) => return Err(e), + } + } + Err(e) => return Err(e), + } + } + } + Ok(()) + }) + .collect(); + + while futures.next().await.transpose().unwrap().is_some() {} + let b = storage.get(&path).await.unwrap().bytes().await.unwrap(); + let v = std::str::from_utf8(&b).unwrap().parse::().unwrap(); + assert_eq!(v, NUM_WORKERS * NUM_INCREMENTS); +} + +/// Returns a chunk of length `chunk_length` +fn get_chunk(chunk_length: usize) -> Bytes { + let mut data = vec![0_u8; chunk_length]; + let mut rng = rng(); + // Set a random selection of bytes + for _ in 0..1000 { + data[rng.random_range(0..chunk_length)] = rng.random(); + } + data.into() +} + +/// Returns `num_chunks` of length `chunks` +fn get_chunks(chunk_length: usize, num_chunks: usize) -> Vec { + (0..num_chunks).map(|_| get_chunk(chunk_length)).collect() +} + +/// Tests the ability to perform multipart writes +pub async fn stream_get(storage: &DynObjectStore) { + let location = Path::from("test_dir/test_upload_file.txt"); + + // Can write to storage + let data = get_chunks(5 * 1024 * 1024, 3); + let bytes_expected = data.concat(); + let mut upload = storage.put_multipart(&location).await.unwrap(); + let uploads = data.into_iter().map(|x| upload.put_part(x.into())); + futures::future::try_join_all(uploads).await.unwrap(); + + // Object should not yet exist in store + let meta_res = storage.head(&location).await; + assert!(meta_res.is_err()); + assert!(matches!( + meta_res.unwrap_err(), + crate::Error::NotFound { .. } + )); + + let files = flatten_list_stream(storage, None).await.unwrap(); + assert_eq!(&files, &[]); + + let result = storage.list_with_delimiter(None).await.unwrap(); + assert_eq!(&result.objects, &[]); + + upload.complete().await.unwrap(); + + let bytes_written = storage.get(&location).await.unwrap().bytes().await.unwrap(); + assert_eq!(bytes_expected, bytes_written); + + // Can overwrite some storage + // Sizes chosen to ensure we write three parts + let data = get_chunks(3_200_000, 7); + let bytes_expected = data.concat(); + let upload = storage.put_multipart(&location).await.unwrap(); + let mut writer = WriteMultipart::new(upload); + for chunk in &data { + writer.write(chunk) + } + writer.finish().await.unwrap(); + let bytes_written = storage.get(&location).await.unwrap().bytes().await.unwrap(); + assert_eq!(bytes_expected, bytes_written); + + let location = Path::from("test_dir/test_put_part.txt"); + let upload = storage.put_multipart(&location).await.unwrap(); + let mut write = WriteMultipart::new(upload); + write.put(vec![0; 2].into()); + write.put(vec![3; 4].into()); + write.finish().await.unwrap(); + + let meta = storage.head(&location).await.unwrap(); + assert_eq!(meta.size, 6); + + let location = Path::from("test_dir/test_put_part_mixed.txt"); + let upload = storage.put_multipart(&location).await.unwrap(); + let mut write = WriteMultipart::new(upload); + write.put(vec![0; 2].into()); + write.write(&[1, 2, 3]); + write.put(vec![4, 5, 6, 7].into()); + write.finish().await.unwrap(); + + let r = storage.get(&location).await.unwrap(); + let r = r.bytes().await.unwrap(); + assert_eq!(r.as_ref(), &[0, 0, 1, 2, 3, 4, 5, 6, 7]); + + // We can abort an empty write + let location = Path::from("test_dir/test_abort_upload.txt"); + let mut upload = storage.put_multipart(&location).await.unwrap(); + upload.abort().await.unwrap(); + let get_res = storage.get(&location).await; + assert!(get_res.is_err()); + assert!(matches!( + get_res.unwrap_err(), + crate::Error::NotFound { .. } + )); + + // We can abort an in-progress write + let mut upload = storage.put_multipart(&location).await.unwrap(); + upload + .put_part(data.first().unwrap().clone().into()) + .await + .unwrap(); + + upload.abort().await.unwrap(); + let get_res = storage.get(&location).await; + assert!(get_res.is_err()); + assert!(matches!(get_res.unwrap_err(), Error::NotFound { .. })); +} + +/// Tests that directories are transparent +pub async fn list_uses_directories_correctly(storage: &DynObjectStore) { + delete_fixtures(storage).await; + + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert!( + content_list.is_empty(), + "Expected list to be empty; found: {content_list:?}" + ); + + let location1 = Path::from("foo/x.json"); + let location2 = Path::from("foo.bar/y.json"); + + let data = PutPayload::from("arbitrary data"); + storage.put(&location1, data.clone()).await.unwrap(); + storage.put(&location2, data).await.unwrap(); + + let prefix = Path::from("foo"); + let content_list = flatten_list_stream(storage, Some(&prefix)).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location1)); + + let result = storage.list_with_delimiter(Some(&prefix)).await.unwrap(); + assert_eq!(result.objects.len(), 1); + assert_eq!(result.objects[0].location, location1); + assert_eq!(result.common_prefixes, &[]); + + // Listing an existing path (file) should return an empty list: + // https://github.com/apache/arrow-rs/issues/3712 + let content_list = flatten_list_stream(storage, Some(&location1)) + .await + .unwrap(); + assert_eq!(content_list, &[]); + + let list = storage.list_with_delimiter(Some(&location1)).await.unwrap(); + assert_eq!(list.objects, &[]); + assert_eq!(list.common_prefixes, &[]); + + let prefix = Path::from("foo/x"); + let content_list = flatten_list_stream(storage, Some(&prefix)).await.unwrap(); + assert_eq!(content_list, &[]); + + let list = storage.list_with_delimiter(Some(&prefix)).await.unwrap(); + assert_eq!(list.objects, &[]); + assert_eq!(list.common_prefixes, &[]); +} + +/// Tests listing with delimiter +pub async fn list_with_delimiter(storage: &DynObjectStore) { + delete_fixtures(storage).await; + + // ==================== check: store is empty ==================== + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert!(content_list.is_empty()); + + // ==================== do: create files ==================== + let data = Bytes::from("arbitrary data"); + + let files: Vec<_> = [ + "test_file", + "mydb/wb/000/000/000.segment", + "mydb/wb/000/000/001.segment", + "mydb/wb/000/000/002.segment", + "mydb/wb/001/001/000.segment", + "mydb/wb/foo.json", + "mydb/wbwbwb/111/222/333.segment", + "mydb/data/whatevs", + ] + .iter() + .map(|&s| Path::from(s)) + .collect(); + + for f in &files { + storage.put(f, data.clone().into()).await.unwrap(); + } + + // ==================== check: prefix-list `mydb/wb` (directory) ==================== + let prefix = Path::from("mydb/wb"); + + let expected_000 = Path::from("mydb/wb/000"); + let expected_001 = Path::from("mydb/wb/001"); + let expected_location = Path::from("mydb/wb/foo.json"); + + let result = storage.list_with_delimiter(Some(&prefix)).await.unwrap(); + + assert_eq!(result.common_prefixes, vec![expected_000, expected_001]); + assert_eq!(result.objects.len(), 1); + + let object = &result.objects[0]; + + assert_eq!(object.location, expected_location); + assert_eq!(object.size, data.len() as u64); + + // ==================== check: prefix-list `mydb/wb/000/000/001` (partial filename doesn't match) ==================== + let prefix = Path::from("mydb/wb/000/000/001"); + + let result = storage.list_with_delimiter(Some(&prefix)).await.unwrap(); + assert!(result.common_prefixes.is_empty()); + assert_eq!(result.objects.len(), 0); + + // ==================== check: prefix-list `not_there` (non-existing prefix) ==================== + let prefix = Path::from("not_there"); + + let result = storage.list_with_delimiter(Some(&prefix)).await.unwrap(); + assert!(result.common_prefixes.is_empty()); + assert!(result.objects.is_empty()); + + // ==================== do: remove all files ==================== + for f in &files { + storage.delete(f).await.unwrap(); + } + + // ==================== check: store is empty ==================== + let content_list = flatten_list_stream(storage, None).await.unwrap(); + assert!(content_list.is_empty()); +} + +/// Tests fetching a non-existent object returns a not found error +pub async fn get_nonexistent_object( + storage: &DynObjectStore, + location: Option, +) -> crate::Result { + let location = location.unwrap_or_else(|| Path::from("this_file_should_not_exist")); + + let err = storage.head(&location).await.unwrap_err(); + assert!(matches!(err, Error::NotFound { .. })); + + storage.get(&location).await?.bytes().await +} + +/// Tests copying +pub async fn rename_and_copy(storage: &DynObjectStore) { + // Create two objects + let path1 = Path::from("test1"); + let path2 = Path::from("test2"); + let contents1 = Bytes::from("cats"); + let contents2 = Bytes::from("dogs"); + + // copy() make both objects identical + storage.put(&path1, contents1.clone().into()).await.unwrap(); + storage.put(&path2, contents2.clone().into()).await.unwrap(); + storage.copy(&path1, &path2).await.unwrap(); + let new_contents = storage.get(&path2).await.unwrap().bytes().await.unwrap(); + assert_eq!(&new_contents, &contents1); + + // rename() copies contents and deletes original + storage.put(&path1, contents1.clone().into()).await.unwrap(); + storage.put(&path2, contents2.clone().into()).await.unwrap(); + storage.rename(&path1, &path2).await.unwrap(); + let new_contents = storage.get(&path2).await.unwrap().bytes().await.unwrap(); + assert_eq!(&new_contents, &contents1); + let result = storage.get(&path1).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), Error::NotFound { .. })); + + // Clean up + storage.delete(&path2).await.unwrap(); +} + +/// Tests copy if not exists +pub async fn copy_if_not_exists(storage: &DynObjectStore) { + // Create two objects + let path1 = Path::from("test1"); + let path2 = Path::from("not_exists_nested/test2"); + let contents1 = Bytes::from("cats"); + let contents2 = Bytes::from("dogs"); + + // copy_if_not_exists() errors if destination already exists + storage.put(&path1, contents1.clone().into()).await.unwrap(); + storage.put(&path2, contents2.clone().into()).await.unwrap(); + let result = storage.copy_if_not_exists(&path1, &path2).await; + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + crate::Error::AlreadyExists { .. } + )); + + // copy_if_not_exists() copies contents and allows deleting original + storage.delete(&path2).await.unwrap(); + storage.copy_if_not_exists(&path1, &path2).await.unwrap(); + storage.delete(&path1).await.unwrap(); + let new_contents = storage.get(&path2).await.unwrap().bytes().await.unwrap(); + assert_eq!(&new_contents, &contents1); + let result = storage.get(&path1).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), crate::Error::NotFound { .. })); + + // Clean up + storage.delete(&path2).await.unwrap(); +} + +/// Tests copy and renaming behaviour of non-existent objects +pub async fn copy_rename_nonexistent_object(storage: &DynObjectStore) { + // Create empty source object + let path1 = Path::from("test1"); + + // Create destination object + let path2 = Path::from("test2"); + storage.put(&path2, "hello".into()).await.unwrap(); + + // copy() errors if source does not exist + let result = storage.copy(&path1, &path2).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), crate::Error::NotFound { .. })); + + // rename() errors if source does not exist + let result = storage.rename(&path1, &path2).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), crate::Error::NotFound { .. })); + + // copy_if_not_exists() errors if source does not exist + let result = storage.copy_if_not_exists(&path1, &path2).await; + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), crate::Error::NotFound { .. })); + + // Clean up + storage.delete(&path2).await.unwrap(); +} + +/// Tests [`MultipartStore`] +pub async fn multipart(storage: &dyn ObjectStore, multipart: &dyn MultipartStore) { + let path = Path::from("test_multipart"); + let chunk_size = 5 * 1024 * 1024; + + let chunks = get_chunks(chunk_size, 2); + + let id = multipart.create_multipart(&path).await.unwrap(); + + let parts: Vec<_> = futures::stream::iter(chunks) + .enumerate() + .map(|(idx, b)| multipart.put_part(&path, &id, idx, b.into())) + .buffered(2) + .try_collect() + .await + .unwrap(); + + multipart + .complete_multipart(&path, &id, parts) + .await + .unwrap(); + + let meta = storage.head(&path).await.unwrap(); + assert_eq!(meta.size, chunk_size as u64 * 2); + + // Empty case + let path = Path::from("test_empty_multipart"); + + let id = multipart.create_multipart(&path).await.unwrap(); + + let parts = vec![]; + + multipart + .complete_multipart(&path, &id, parts) + .await + .unwrap(); + + let meta = storage.head(&path).await.unwrap(); + assert_eq!(meta.size, 0); +} + +async fn delete_fixtures(storage: &DynObjectStore) { + let paths = storage.list(None).map_ok(|meta| meta.location).boxed(); + storage + .delete_stream(paths) + .try_collect::>() + .await + .unwrap(); +} + +/// Tests a race condition where 2 threads are performing multipart writes to the same path +pub async fn multipart_race_condition(storage: &dyn ObjectStore, last_writer_wins: bool) { + let path = Path::from("test_multipart_race_condition"); + + let mut multipart_upload_1 = storage.put_multipart(&path).await.unwrap(); + let mut multipart_upload_2 = storage.put_multipart(&path).await.unwrap(); + + /// Create a string like `"1:"` followed by `part` padded to 5,300,000 places + /// + /// equivalent of format!("{prefix}:{part:05300000}"), which is no longer supported + /// + /// See: + fn make_payload(prefix: u8, part: u8) -> Vec { + // prefix = 1 byte + // ':' = 1 byte + let mut payload = vec![b'0'; 5_300_002]; + payload[0] = prefix; + payload[1] = b':'; + payload[2] = part; + payload + } + + // Upload parts interleaved + multipart_upload_1 + .put_part(Bytes::from(make_payload(b'1', 0)).into()) + .await + .unwrap(); + multipart_upload_2 + .put_part(Bytes::from(make_payload(b'2', 0)).into()) + .await + .unwrap(); + + multipart_upload_2 + .put_part(Bytes::from(make_payload(b'2', 1)).into()) + .await + .unwrap(); + multipart_upload_1 + .put_part(Bytes::from(make_payload(b'1', 1)).into()) + .await + .unwrap(); + + multipart_upload_1 + .put_part(Bytes::from(make_payload(b'1', 2)).into()) + .await + .unwrap(); + multipart_upload_2 + .put_part(Bytes::from(make_payload(b'2', 2)).into()) + .await + .unwrap(); + + multipart_upload_2 + .put_part(Bytes::from(make_payload(b'2', 3)).into()) + .await + .unwrap(); + multipart_upload_1 + .put_part(Bytes::from(make_payload(b'1', 3)).into()) + .await + .unwrap(); + + multipart_upload_1 + .put_part(Bytes::from(make_payload(b'1', 4)).into()) + .await + .unwrap(); + multipart_upload_2 + .put_part(Bytes::from(make_payload(b'2', 4)).into()) + .await + .unwrap(); + + multipart_upload_1.complete().await.unwrap(); + + if last_writer_wins { + multipart_upload_2.complete().await.unwrap(); + } else { + let err = multipart_upload_2.complete().await.unwrap_err(); + + assert!(matches!(err, crate::Error::Generic { .. }), "{err}"); + } + + let get_result = storage.get(&path).await.unwrap(); + let result_bytes = get_result.bytes().await.unwrap(); + + let expected_writer_prefix = if last_writer_wins { b'2' } else { b'1' }; + let mut expected_writer_contents = vec![]; + for part in 0..5 { + expected_writer_contents.append(&mut make_payload(expected_writer_prefix, part)); + } + + assert!(result_bytes.starts_with(&expected_writer_contents)); +} + +/// Tests performing out of order multipart uploads +pub async fn multipart_out_of_order(storage: &dyn ObjectStore) { + let path = Path::from("test_multipart_out_of_order"); + let mut multipart_upload = storage.put_multipart(&path).await.unwrap(); + + let part1 = std::iter::repeat(b'1') + .take(5 * 1024 * 1024) + .collect::(); + let part2 = std::iter::repeat(b'2') + .take(5 * 1024 * 1024) + .collect::(); + let part3 = std::iter::repeat(b'3') + .take(5 * 1024 * 1024) + .collect::(); + let full = [part1.as_ref(), part2.as_ref(), part3.as_ref()].concat(); + + let fut1 = multipart_upload.put_part(part1.into()); + let fut2 = multipart_upload.put_part(part2.into()); + let fut3 = multipart_upload.put_part(part3.into()); + // note order is 2,3,1 , different than the parts were created in + fut2.await.unwrap(); + fut3.await.unwrap(); + fut1.await.unwrap(); + + multipart_upload.complete().await.unwrap(); + + let result = storage.get(&path).await.unwrap(); + let bytes = result.bytes().await.unwrap(); + assert_eq!(bytes, full); +} + +/// Tests [`PaginatedListStore`] +pub async fn list_paginated(storage: &dyn ObjectStore, list: &dyn PaginatedListStore) { + delete_fixtures(storage).await; + + let r = list.list_paginated(None, Default::default()).await.unwrap(); + assert_eq!(r.page_token, None); + assert_eq!(r.result.objects, vec![]); + assert_eq!(r.result.common_prefixes, vec![]); + + let p1 = Path::from("foo/bar"); + let p2 = Path::from("foo/bax"); + let p3 = Path::from("foo/baz/bar"); + let p4 = Path::from("foo/baz/banana"); + let p5 = Path::from("fob/banana"); + let p6 = Path::from("fongle/banana"); + + let paths = HashSet::from_iter([&p1, &p2, &p3, &p4, &p5, &p6]); + + for path in &paths { + storage.put(path, vec![1].into()).await.unwrap(); + } + + // Test basic listing + + let mut listed = HashSet::new(); + let mut opts = PaginatedListOptions { + max_keys: Some(5), + ..Default::default() + }; + let ret = list.list_paginated(None, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 5); + listed.extend(ret.result.objects.iter().map(|x| &x.location)); + + opts.page_token = Some(ret.page_token.unwrap()); + let ret = list.list_paginated(None, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 1); + listed.extend(ret.result.objects.iter().map(|x| &x.location)); + + assert_eq!(listed, paths); + + // List with prefix + let prefix = Some("foo/"); + opts.page_token = None; + let ret = list.list_paginated(prefix, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 4); + assert!(ret.page_token.is_none()); + + let actual = HashSet::from_iter(ret.result.objects.iter().map(|x| &x.location)); + assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2, &p3, &p4])); + + // List with partial prefix + let prefix = Some("fo"); + opts.page_token = None; + let ret = list.list_paginated(prefix, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 5); + listed.extend(ret.result.objects.iter().map(|x| &x.location)); + + opts.page_token = Some(ret.page_token.unwrap()); + let ret = list.list_paginated(prefix, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 1); + listed.extend(ret.result.objects.iter().map(|x| &x.location)); + + assert_eq!(listed, paths); + + // List with prefix and delimiter + let prefix = Some("foo/"); + opts.page_token = None; + opts.delimiter = Some("/".into()); + let ret = list.list_paginated(prefix, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 2); + assert_eq!(ret.result.common_prefixes, vec![Path::from("foo/baz")]); + assert!(ret.page_token.is_none()); + + let actual = HashSet::from_iter(ret.result.objects.iter().map(|x| &x.location)); + assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2])); + + // List with partial prefix and delimiter + let prefix = Some("fo"); + opts.page_token = None; + opts.delimiter = Some("/".into()); + let ret = list.list_paginated(prefix, opts.clone()).await.unwrap(); + assert_eq!(ret.result.objects.len(), 0); + assert_eq!( + HashSet::::from_iter(ret.result.common_prefixes), + HashSet::from_iter([Path::from("foo"), Path::from("fob"), Path::from("fongle")]) + ); + assert!(ret.page_token.is_none()); +} diff --git a/rust/object_store/src/lib.rs b/rust/object_store/src/lib.rs new file mode 100644 index 0000000000..bb9f8b1012 --- /dev/null +++ b/rust/object_store/src/lib.rs @@ -0,0 +1,1673 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![cfg_attr(docsrs, feature(doc_auto_cfg))] +#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)] +#![warn( + missing_copy_implementations, + missing_debug_implementations, + missing_docs, + clippy::explicit_iter_loop, + clippy::future_not_send, + clippy::use_self, + clippy::clone_on_ref_ptr, + unreachable_pub +)] + +//! # object_store +//! +//! This crate provides a uniform API for interacting with object +//! storage services and local files via the [`ObjectStore`] +//! trait. +//! +//! Using this crate, the same binary and code can run in multiple +//! clouds and local test environments, via a simple runtime +//! configuration change. +//! +//! # Highlights +//! +//! 1. A high-performance async API focused on providing a consistent interface +//! mirroring that of object stores such as [S3] +//! +//! 2. Production quality, leading this crate to be used in large +//! scale production systems, such as [crates.io] and [InfluxDB IOx] +//! +//! 3. Support for advanced functionality, including atomic, conditional reads +//! and writes, vectored IO, bulk deletion, and more... +//! +//! 4. Stable and predictable governance via the [Apache Arrow] project +//! +//! 5. Small dependency footprint, depending on only a small number of common crates +//! +//! Originally developed by [InfluxData] and subsequently donated +//! to [Apache Arrow]. +//! +//! [Apache Arrow]: https://arrow.apache.org/ +//! [InfluxData]: https://www.influxdata.com/ +//! [crates.io]: https://github.com/rust-lang/crates.io +//! [ACID]: https://en.wikipedia.org/wiki/ACID +//! [S3]: https://aws.amazon.com/s3/ +//! +//! # Available [`ObjectStore`] Implementations +//! +//! By default, this crate provides the following implementations: +//! +//! * Memory: [`InMemory`](memory::InMemory) +//! +//! Feature flags are used to enable support for other implementations: +//! +#![cfg_attr( + feature = "fs", + doc = "* Local filesystem: [`LocalFileSystem`](local::LocalFileSystem)" +)] +#![cfg_attr( + feature = "gcp", + doc = "* [`gcp`]: [Google Cloud Storage](https://cloud.google.com/storage/) support. See [`GoogleCloudStorageBuilder`](gcp::GoogleCloudStorageBuilder)" +)] +#![cfg_attr( + feature = "aws", + doc = "* [`aws`]: [Amazon S3](https://aws.amazon.com/s3/). See [`AmazonS3Builder`](aws::AmazonS3Builder)" +)] +#![cfg_attr( + feature = "azure", + doc = "* [`azure`]: [Azure Blob Storage](https://azure.microsoft.com/en-gb/services/storage/blobs/). See [`MicrosoftAzureBuilder`](azure::MicrosoftAzureBuilder)" +)] +#![cfg_attr( + feature = "http", + doc = "* [`http`]: [HTTP/WebDAV Storage](https://datatracker.ietf.org/doc/html/rfc2518). See [`HttpBuilder`](http::HttpBuilder)" +)] +//! +//! # Why not a Filesystem Interface? +//! +//! The [`ObjectStore`] interface is designed to mirror the APIs +//! of object stores and *not* filesystems, and thus has stateless APIs instead +//! of cursor based interfaces such as [`Read`] or [`Seek`] available in filesystems. +//! +//! This design provides the following advantages: +//! +//! * All operations are atomic, and readers cannot observe partial and/or failed writes +//! * Methods map directly to object store APIs, providing both efficiency and predictability +//! * Abstracts away filesystem and operating system specific quirks, ensuring portability +//! * Allows for functionality not native to filesystems, such as operation preconditions +//! and atomic multipart uploads +//! +//! This crate does provide [`BufReader`] and [`BufWriter`] adapters +//! which provide a more filesystem-like API for working with the +//! [`ObjectStore`] trait, however, they should be used with care +//! +//! [`BufReader`]: buffered::BufReader +//! [`BufWriter`]: buffered::BufWriter +//! +//! # Adapters +//! +//! [`ObjectStore`] instances can be composed with various adapters +//! which add additional functionality: +//! +//! * Rate Throttling: [`ThrottleConfig`](throttle::ThrottleConfig) +//! * Concurrent Request Limit: [`LimitStore`](limit::LimitStore) +//! +//! # Configuration System +//! +//! This crate provides a configuration system inspired by the APIs exposed by [fsspec], +//! [PyArrow FileSystem], and [Hadoop FileSystem], allowing creating a [`DynObjectStore`] +//! from a URL and an optional list of key value pairs. This provides a flexible interface +//! to support a wide variety of user-defined store configurations, with minimal additional +//! application complexity. +//! +//! ```no_run,ignore-wasm32 +//! # #[cfg(feature = "aws")] { +//! # use url::Url; +//! # use object_store::{parse_url, parse_url_opts}; +//! # use object_store::aws::{AmazonS3, AmazonS3Builder}; +//! # +//! # +//! // Can manually create a specific store variant using the appropriate builder +//! let store: AmazonS3 = AmazonS3Builder::from_env() +//! .with_bucket_name("my-bucket").build().unwrap(); +//! +//! // Alternatively can create an ObjectStore from an S3 URL +//! let url = Url::parse("s3://bucket/path").unwrap(); +//! let (store, path) = parse_url(&url).unwrap(); +//! assert_eq!(path.as_ref(), "path"); +//! +//! // Potentially with additional options +//! let (store, path) = parse_url_opts(&url, vec![("aws_access_key_id", "...")]).unwrap(); +//! +//! // Or with URLs that encode the bucket name in the URL path +//! let url = Url::parse("https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path").unwrap(); +//! let (store, path) = parse_url(&url).unwrap(); +//! assert_eq!(path.as_ref(), "path"); +//! # } +//! ``` +//! +//! [PyArrow FileSystem]: https://arrow.apache.org/docs/python/generated/pyarrow.fs.FileSystem.html#pyarrow.fs.FileSystem.from_uri +//! [fsspec]: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem +//! [Hadoop FileSystem]: https://hadoop.apache.org/docs/r3.0.0/api/org/apache/hadoop/fs/FileSystem.html#get-java.net.URI-org.apache.hadoop.conf.Configuration- +//! +//! # List objects +//! +//! Use the [`ObjectStore::list`] method to iterate over objects in +//! remote storage or files in the local filesystem: +//! +//! ```ignore-wasm32 +//! # use object_store::local::LocalFileSystem; +//! # use std::sync::Arc; +//! # use object_store::{path::Path, ObjectStore}; +//! # use futures::stream::StreamExt; +//! # // use LocalFileSystem for example +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # +//! # async fn example() { +//! # +//! // create an ObjectStore +//! let object_store: Arc = get_object_store(); +//! +//! // Recursively list all files below the 'data' path. +//! // 1. On AWS S3 this would be the 'data/' prefix +//! // 2. On a local filesystem, this would be the 'data' directory +//! let prefix = Path::from("data"); +//! +//! // Get an `async` stream of Metadata objects: +//! let mut list_stream = object_store.list(Some(&prefix)); +//! +//! // Print a line about each object +//! while let Some(meta) = list_stream.next().await.transpose().unwrap() { +//! println!("Name: {}, size: {}", meta.location, meta.size); +//! } +//! # } +//! ``` +//! +//! Which will print out something like the following: +//! +//! ```text +//! Name: data/file01.parquet, size: 112832 +//! Name: data/file02.parquet, size: 143119 +//! Name: data/child/file03.parquet, size: 100 +//! ... +//! ``` +//! +//! # Fetch objects +//! +//! Use the [`ObjectStore::get`] method to fetch the data bytes +//! from remote storage or files in the local filesystem as a stream. +//! +//! ```ignore-wasm32 +//! # use futures::TryStreamExt; +//! # use object_store::local::LocalFileSystem; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use object_store::{path::Path, ObjectStore, GetResult}; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # +//! # async fn example() { +//! # +//! // Create an ObjectStore +//! let object_store: Arc = get_object_store(); +//! +//! // Retrieve a specific file +//! let path = Path::from("data/file01.parquet"); +//! +//! // Fetch just the file metadata +//! let meta = object_store.head(&path).await.unwrap(); +//! println!("{meta:?}"); +//! +//! // Fetch the object including metadata +//! let result: GetResult = object_store.get(&path).await.unwrap(); +//! assert_eq!(result.meta, meta); +//! +//! // Buffer the entire object in memory +//! let object: Bytes = result.bytes().await.unwrap(); +//! assert_eq!(object.len() as u64, meta.size); +//! +//! // Alternatively stream the bytes from object storage +//! let stream = object_store.get(&path).await.unwrap().into_stream(); +//! +//! // Count the '0's using `try_fold` from `TryStreamExt` trait +//! let num_zeros = stream +//! .try_fold(0, |acc, bytes| async move { +//! Ok(acc + bytes.iter().filter(|b| **b == 0).count()) +//! }).await.unwrap(); +//! +//! println!("Num zeros in {} is {}", path, num_zeros); +//! # } +//! ``` +//! +//! # Put Object +//! +//! Use the [`ObjectStore::put`] method to atomically write data. +//! +//! ```ignore-wasm32 +//! # use object_store::local::LocalFileSystem; +//! # use object_store::{ObjectStore, PutPayload}; +//! # use std::sync::Arc; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # async fn put() { +//! # +//! let object_store: Arc = get_object_store(); +//! let path = Path::from("data/file1"); +//! let payload = PutPayload::from_static(b"hello"); +//! object_store.put(&path, payload).await.unwrap(); +//! # } +//! ``` +//! +//! # Multipart Upload +//! +//! Use the [`ObjectStore::put_multipart`] method to atomically write a large amount of data +//! +//! ```ignore-wasm32 +//! # use object_store::local::LocalFileSystem; +//! # use object_store::{ObjectStore, WriteMultipart}; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # async fn multi_upload() { +//! # +//! let object_store: Arc = get_object_store(); +//! let path = Path::from("data/large_file"); +//! let upload = object_store.put_multipart(&path).await.unwrap(); +//! let mut write = WriteMultipart::new(upload); +//! write.write(b"hello"); +//! write.finish().await.unwrap(); +//! # } +//! ``` +//! +//! # Vectored Read +//! +//! A common pattern, especially when reading structured datasets, is to need to fetch +//! multiple, potentially non-contiguous, ranges of a particular object. +//! +//! [`ObjectStore::get_ranges`] provides an efficient way to perform such vectored IO, and will +//! automatically coalesce adjacent ranges into an appropriate number of parallel requests. +//! +//! ```ignore-wasm32 +//! # use object_store::local::LocalFileSystem; +//! # use object_store::ObjectStore; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # async fn multi_upload() { +//! # +//! let object_store: Arc = get_object_store(); +//! let path = Path::from("data/large_file"); +//! let ranges = object_store.get_ranges(&path, &[90..100, 400..600, 0..10]).await.unwrap(); +//! assert_eq!(ranges.len(), 3); +//! assert_eq!(ranges[0].len(), 10); +//! # } +//! ``` +//! +//! # Vectored Write +//! +//! When writing data it is often the case that the size of the output is not known ahead of time. +//! +//! A common approach to handling this is to bump-allocate a `Vec`, whereby the underlying +//! allocation is repeatedly reallocated, each time doubling the capacity. The performance of +//! this is suboptimal as reallocating memory will often involve copying it to a new location. +//! +//! Fortunately, as [`PutPayload`] does not require memory regions to be contiguous, it is +//! possible to instead allocate memory in chunks and avoid bump allocating. [`PutPayloadMut`] +//! encapsulates this approach +//! +//! ```ignore-wasm32 +//! # use object_store::local::LocalFileSystem; +//! # use object_store::{ObjectStore, PutPayloadMut}; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # async fn multi_upload() { +//! # +//! let object_store: Arc = get_object_store(); +//! let path = Path::from("data/large_file"); +//! let mut buffer = PutPayloadMut::new().with_block_size(8192); +//! for _ in 0..22 { +//! buffer.extend_from_slice(&[0; 1024]); +//! } +//! let payload = buffer.freeze(); +//! +//! // Payload consists of 3 separate 8KB allocations +//! assert_eq!(payload.as_ref().len(), 3); +//! assert_eq!(payload.as_ref()[0].len(), 8192); +//! assert_eq!(payload.as_ref()[1].len(), 8192); +//! assert_eq!(payload.as_ref()[2].len(), 6144); +//! +//! object_store.put(&path, payload).await.unwrap(); +//! # } +//! ``` +//! +//! # Conditional Fetch +//! +//! More complex object retrieval can be supported by [`ObjectStore::get_opts`]. +//! +//! For example, efficiently refreshing a cache without re-fetching the entire object +//! data if the object hasn't been modified. +//! +//! ``` +//! # use std::collections::btree_map::Entry; +//! # use std::collections::HashMap; +//! # use object_store::{GetOptions, GetResult, ObjectStore, Result, Error}; +//! # use std::sync::Arc; +//! # use std::time::{Duration, Instant}; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::path::Path; +//! struct CacheEntry { +//! /// Data returned by last request +//! data: Bytes, +//! /// ETag identifying the object returned by the server +//! e_tag: String, +//! /// Instant of last refresh +//! refreshed_at: Instant, +//! } +//! +//! /// Example cache that checks entries after 10 seconds for a new version +//! struct Cache { +//! entries: HashMap, +//! store: Arc, +//! } +//! +//! impl Cache { +//! pub async fn get(&mut self, path: &Path) -> Result { +//! Ok(match self.entries.get_mut(path) { +//! Some(e) => match e.refreshed_at.elapsed() < Duration::from_secs(10) { +//! true => e.data.clone(), // Return cached data +//! false => { // Check if remote version has changed +//! let opts = GetOptions { +//! if_none_match: Some(e.e_tag.clone()), +//! ..GetOptions::default() +//! }; +//! match self.store.get_opts(&path, opts).await { +//! Ok(d) => e.data = d.bytes().await?, +//! Err(Error::NotModified { .. }) => {} // Data has not changed +//! Err(e) => return Err(e), +//! }; +//! e.refreshed_at = Instant::now(); +//! e.data.clone() +//! } +//! }, +//! None => { // Not cached, fetch data +//! let get = self.store.get(&path).await?; +//! let e_tag = get.meta.e_tag.clone(); +//! let data = get.bytes().await?; +//! if let Some(e_tag) = e_tag { +//! let entry = CacheEntry { +//! e_tag, +//! data: data.clone(), +//! refreshed_at: Instant::now(), +//! }; +//! self.entries.insert(path.clone(), entry); +//! } +//! data +//! } +//! }) +//! } +//! } +//! ``` +//! +//! # Conditional Put +//! +//! The default behaviour when writing data is to upsert any existing object at the given path, +//! overwriting any previous value. More complex behaviours can be achieved using [`PutMode`], and +//! can be used to build [Optimistic Concurrency Control] based transactions. This facilitates +//! building metadata catalogs, such as [Apache Iceberg] or [Delta Lake], directly on top of object +//! storage, without relying on a separate DBMS. +//! +//! ``` +//! # use object_store::{Error, ObjectStore, PutMode, UpdateVersion}; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::memory::InMemory; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(InMemory::new()) +//! # } +//! # fn do_update(b: Bytes) -> Bytes {b} +//! # async fn conditional_put() { +//! let store = get_object_store(); +//! let path = Path::from("test"); +//! +//! // Perform a conditional update on path +//! loop { +//! // Perform get request +//! let r = store.get(&path).await.unwrap(); +//! +//! // Save version information fetched +//! let version = UpdateVersion { +//! e_tag: r.meta.e_tag.clone(), +//! version: r.meta.version.clone(), +//! }; +//! +//! // Compute new version of object contents +//! let new = do_update(r.bytes().await.unwrap()); +//! +//! // Attempt to commit transaction +//! match store.put_opts(&path, new.into(), PutMode::Update(version).into()).await { +//! Ok(_) => break, // Successfully committed +//! Err(Error::Precondition { .. }) => continue, // Object has changed, try again +//! Err(e) => panic!("{e}") +//! } +//! } +//! # } +//! ``` +//! +//! [Optimistic Concurrency Control]: https://en.wikipedia.org/wiki/Optimistic_concurrency_control +//! [Apache Iceberg]: https://iceberg.apache.org/ +//! [Delta Lake]: https://delta.io/ +//! +//! # TLS Certificates +//! +//! Stores that use HTTPS/TLS (this is true for most cloud stores) can choose the source of their [CA] +//! certificates. By default the system-bundled certificates are used (see +//! [`rustls-native-certs`]). The `tls-webpki-roots` feature switch can be used to also bundle Mozilla's +//! root certificates with the library/application (see [`webpki-roots`]). +//! +//! [CA]: https://en.wikipedia.org/wiki/Certificate_authority +//! [`rustls-native-certs`]: https://crates.io/crates/rustls-native-certs/ +//! [`webpki-roots`]: https://crates.io/crates/webpki-roots +//! +//! # Customizing HTTP Clients +//! +//! Many [`ObjectStore`] implementations permit customization of the HTTP client via +//! the [`HttpConnector`] trait and utilities in the [`client`] module. +//! Examples include injecting custom HTTP headers or using an alternate +//! tokio Runtime I/O requests. +//! +//! [`HttpConnector`]: client::HttpConnector + +#[cfg(feature = "aws")] +pub mod aws; +#[cfg(feature = "azure")] +pub mod azure; +pub mod buffered; +#[cfg(not(target_arch = "wasm32"))] +pub mod chunked; +pub mod delimited; +#[cfg(feature = "gcp")] +pub mod gcp; +#[cfg(feature = "http")] +pub mod http; +pub mod limit; +#[cfg(all(feature = "fs", not(target_arch = "wasm32")))] +pub mod local; +pub mod memory; +pub mod path; +pub mod prefix; +pub mod registry; +#[cfg(feature = "cloud")] +pub mod signer; +pub mod throttle; + +#[cfg(feature = "cloud")] +pub mod client; + +#[cfg(feature = "cloud")] +pub use client::{ + backoff::BackoffConfig, retry::RetryConfig, ClientConfigKey, ClientOptions, CredentialProvider, + StaticCredentialProvider, +}; + +#[cfg(all(feature = "cloud", not(target_arch = "wasm32")))] +pub use client::Certificate; + +#[cfg(feature = "cloud")] +mod config; + +mod tags; + +pub use tags::TagSet; + +pub mod list; +pub mod multipart; +mod parse; +mod payload; +mod upload; +mod util; + +mod attributes; + +#[cfg(any(feature = "integration", test))] +pub mod integration; + +pub use attributes::*; + +pub use parse::{parse_url, parse_url_opts, ObjectStoreScheme}; +pub use payload::*; +pub use upload::*; +pub use util::{coalesce_ranges, collect_bytes, GetRange, OBJECT_STORE_COALESCE_DEFAULT}; + +// Re-export HTTP types used in public API +pub use ::http::{Extensions, HeaderMap, HeaderValue}; + +use crate::path::Path; +#[cfg(all(feature = "fs", not(target_arch = "wasm32")))] +use crate::util::maybe_spawn_blocking; +use async_trait::async_trait; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use futures::{stream::BoxStream, StreamExt, TryStreamExt}; +use std::fmt::{Debug, Formatter}; +#[cfg(all(feature = "fs", not(target_arch = "wasm32")))] +use std::io::{Read, Seek, SeekFrom}; +use std::ops::Range; +use std::sync::Arc; + +/// An alias for a dynamically dispatched object store implementation. +pub type DynObjectStore = dyn ObjectStore; + +/// Id type for multipart uploads. +pub type MultipartId = String; + +/// Universal API to multiple object store services. +#[async_trait] +pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static { + /// Save the provided bytes to the specified location + /// + /// The operation is guaranteed to be atomic, it will either successfully + /// write the entirety of `payload` to `location`, or fail. No clients + /// should be able to observe a partially written object + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + self.put_opts(location, payload, PutOptions::default()) + .await + } + + /// Save the provided `payload` to `location` with the given options + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result; + + /// Perform a multipart upload + /// + /// Client should prefer [`ObjectStore::put`] for small payloads, as streaming uploads + /// typically require multiple separate requests. See [`MultipartUpload`] for more information + /// + /// For more advanced multipart uploads see [`MultipartStore`](multipart::MultipartStore) + async fn put_multipart(&self, location: &Path) -> Result> { + self.put_multipart_opts(location, PutMultipartOptions::default()) + .await + } + + /// Perform a multipart upload with options + /// + /// Client should prefer [`ObjectStore::put`] for small payloads, as streaming uploads + /// typically require multiple separate requests. See [`MultipartUpload`] for more information + /// + /// For more advanced multipart uploads see [`MultipartStore`](multipart::MultipartStore) + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result>; + + /// Return the bytes that are stored at the specified location. + async fn get(&self, location: &Path) -> Result { + self.get_opts(location, GetOptions::default()).await + } + + /// Perform a get request with options + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result; + + /// Return the bytes that are stored at the specified location + /// in the given byte range. + /// + /// See [`GetRange::Bounded`] for more details on how `range` gets interpreted + async fn get_range(&self, location: &Path, range: Range) -> Result { + let options = GetOptions { + range: Some(range.into()), + ..Default::default() + }; + self.get_opts(location, options).await?.bytes().await + } + + /// Return the bytes that are stored at the specified location + /// in the given byte ranges + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + coalesce_ranges( + ranges, + |range| self.get_range(location, range), + OBJECT_STORE_COALESCE_DEFAULT, + ) + .await + } + + /// Return the metadata for the specified location + async fn head(&self, location: &Path) -> Result { + let options = GetOptions { + head: true, + ..Default::default() + }; + Ok(self.get_opts(location, options).await?.meta) + } + + /// Delete the object at the specified location. + async fn delete(&self, location: &Path) -> Result<()>; + + /// Delete all the objects at the specified locations + /// + /// When supported, this method will use bulk operations that delete more + /// than one object per a request. The default implementation will call + /// the single object delete method for each location, but with up to 10 + /// concurrent requests. + /// + /// The returned stream yields the results of the delete operations in the + /// same order as the input locations. However, some errors will be from + /// an overall call to a bulk delete operation, and not from a specific + /// location. + /// + /// If the object did not exist, the result may be an error or a success, + /// depending on the behavior of the underlying store. For example, local + /// filesystems, GCP, and Azure return an error, while S3 and in-memory will + /// return Ok. If it is an error, it will be [`Error::NotFound`]. + /// + /// ```ignore-wasm32 + /// # use futures::{StreamExt, TryStreamExt}; + /// # use object_store::local::LocalFileSystem; + /// # async fn example() -> Result<(), Box> { + /// # let root = tempfile::TempDir::new().unwrap(); + /// # let store = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + /// # use object_store::{ObjectStore, ObjectMeta}; + /// # use object_store::path::Path; + /// # use futures::{StreamExt, TryStreamExt}; + /// # + /// // Create two objects + /// store.put(&Path::from("foo"), "foo".into()).await?; + /// store.put(&Path::from("bar"), "bar".into()).await?; + /// + /// // List object + /// let locations = store.list(None).map_ok(|m| m.location).boxed(); + /// + /// // Delete them + /// store.delete_stream(locations).try_collect::>().await?; + /// # Ok(()) + /// # } + /// # let rt = tokio::runtime::Builder::new_current_thread().build().unwrap(); + /// # rt.block_on(example()).unwrap(); + /// ``` + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, Result>, + ) -> BoxStream<'a, Result> { + locations + .map(|location| async { + let location = location?; + self.delete(&location).await?; + Ok(location) + }) + .buffered(10) + .boxed() + } + + /// List all the objects with the given prefix. + /// + /// Prefixes are evaluated on a path segment basis, i.e. `foo/bar` is a prefix of `foo/bar/x` but not of + /// `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x` will be included. + /// + /// Note: the order of returned [`ObjectMeta`] is not guaranteed + /// + /// For more advanced listing see [`PaginatedListStore`](list::PaginatedListStore) + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result>; + + /// List all the objects with the given prefix and a location greater than `offset` + /// + /// Some stores, such as S3 and GCS, may be able to push `offset` down to reduce + /// the number of network requests required + /// + /// Note: the order of returned [`ObjectMeta`] is not guaranteed + /// + /// For more advanced listing see [`PaginatedListStore`](list::PaginatedListStore) + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + let offset = offset.clone(); + self.list(prefix) + .try_filter(move |f| futures::future::ready(f.location > offset)) + .boxed() + } + + /// List objects with the given prefix and an implementation specific + /// delimiter. Returns common prefixes (directories) in addition to object + /// metadata. + /// + /// Prefixes are evaluated on a path segment basis, i.e. `foo/bar` is a prefix of `foo/bar/x` but not of + /// `foo/bar_baz/x`. List is not recursive, i.e. `foo/bar/more/x` will not be included. + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result; + + /// Copy an object from one path to another in the same object store. + /// + /// If there exists an object at the destination, it will be overwritten. + async fn copy(&self, from: &Path, to: &Path) -> Result<()>; + + /// Move an object from one path to another in the same object store. + /// + /// By default, this is implemented as a copy and then delete source. It may not + /// check when deleting source that it was the same object that was originally copied. + /// + /// If there exists an object at the destination, it will be overwritten. + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + self.copy(from, to).await?; + self.delete(from).await + } + + /// Copy an object from one path to another, only if destination is empty. + /// + /// Will return an error if the destination already has an object. + /// + /// Performs an atomic operation if the underlying object storage supports it. + /// If atomic operations are not supported by the underlying object storage (like S3) + /// it will return an error. + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()>; + + /// Move an object from one path to another in the same object store. + /// + /// Will return an error if the destination already has an object. + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.copy_if_not_exists(from, to).await?; + self.delete(from).await + } +} + +macro_rules! as_ref_impl { + ($type:ty) => { + #[async_trait] + impl ObjectStore for $type { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + self.as_ref().put(location, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.as_ref().put_opts(location, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + self.as_ref().put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + self.as_ref().put_multipart_opts(location, opts).await + } + + async fn get(&self, location: &Path) -> Result { + self.as_ref().get(location).await + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.as_ref().get_opts(location, options).await + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + self.as_ref().get_range(location, range).await + } + + async fn get_ranges( + &self, + location: &Path, + ranges: &[Range], + ) -> Result> { + self.as_ref().get_ranges(location, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + self.as_ref().head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.as_ref().delete(location).await + } + + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, Result>, + ) -> BoxStream<'a, Result> { + self.as_ref().delete_stream(locations) + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.as_ref().list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + self.as_ref().list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.as_ref().list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.as_ref().copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + self.as_ref().rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.as_ref().copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.as_ref().rename_if_not_exists(from, to).await + } + } + }; +} + +as_ref_impl!(Arc); +as_ref_impl!(Box); + +/// Result of a list call that includes objects, prefixes (directories) and a +/// token for the next set of results. Individual result sets may be limited to +/// 1,000 objects based on the underlying object storage's limitations. +#[derive(Debug)] +pub struct ListResult { + /// Prefixes that are common (like directories) + pub common_prefixes: Vec, + /// Object metadata for the listing + pub objects: Vec, +} + +/// The metadata that describes an object. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ObjectMeta { + /// The full path to the object + pub location: Path, + /// The last modified time + pub last_modified: DateTime, + /// The size in bytes of the object. + /// + /// Note this is not `usize` as `object_store` supports 32-bit architectures such as WASM + pub size: u64, + /// The unique identifier for the object + /// + /// + pub e_tag: Option, + /// A version indicator for this object + pub version: Option, +} + +/// Options for a get request, such as range +#[derive(Debug, Default, Clone)] +pub struct GetOptions { + /// Request will succeed if the `ObjectMeta::e_tag` matches + /// otherwise returning [`Error::Precondition`] + /// + /// See + /// + /// Examples: + /// + /// ```text + /// If-Match: "xyzzy" + /// If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + /// If-Match: * + /// ``` + pub if_match: Option, + /// Request will succeed if the `ObjectMeta::e_tag` does not match + /// otherwise returning [`Error::NotModified`] + /// + /// See + /// + /// Examples: + /// + /// ```text + /// If-None-Match: "xyzzy" + /// If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + /// If-None-Match: * + /// ``` + pub if_none_match: Option, + /// Request will succeed if the object has been modified since + /// + /// + pub if_modified_since: Option>, + /// Request will succeed if the object has not been modified since + /// otherwise returning [`Error::Precondition`] + /// + /// Some stores, such as S3, will only return `NotModified` for exact + /// timestamp matches, instead of for any timestamp greater than or equal. + /// + /// + pub if_unmodified_since: Option>, + /// Request transfer of only the specified range of bytes + /// otherwise returning [`Error::NotModified`] + /// + /// + pub range: Option, + /// Request a particular object version + pub version: Option, + /// Request transfer of no content + /// + /// + pub head: bool, + /// Implementation-specific extensions. Intended for use by [`ObjectStore`] implementations + /// that need to pass context-specific information (like tracing spans) via trait methods. + /// + /// These extensions are ignored entirely by backends offered through this crate. + pub extensions: Extensions, +} + +impl GetOptions { + /// Returns an error if the modification conditions on this request are not satisfied + /// + /// + pub fn check_preconditions(&self, meta: &ObjectMeta) -> Result<()> { + // The use of the invalid etag "*" means no ETag is equivalent to never matching + let etag = meta.e_tag.as_deref().unwrap_or("*"); + let last_modified = meta.last_modified; + + if let Some(m) = &self.if_match { + if m != "*" && m.split(',').map(str::trim).all(|x| x != etag) { + return Err(Error::Precondition { + path: meta.location.to_string(), + source: format!("{etag} does not match {m}").into(), + }); + } + } else if let Some(date) = self.if_unmodified_since { + if last_modified > date { + return Err(Error::Precondition { + path: meta.location.to_string(), + source: format!("{date} < {last_modified}").into(), + }); + } + } + + if let Some(m) = &self.if_none_match { + if m == "*" || m.split(',').map(str::trim).any(|x| x == etag) { + return Err(Error::NotModified { + path: meta.location.to_string(), + source: format!("{etag} matches {m}").into(), + }); + } + } else if let Some(date) = self.if_modified_since { + if last_modified <= date { + return Err(Error::NotModified { + path: meta.location.to_string(), + source: format!("{date} >= {last_modified}").into(), + }); + } + } + Ok(()) + } +} + +/// Result for a get request +#[derive(Debug)] +pub struct GetResult { + /// The [`GetResultPayload`] + pub payload: GetResultPayload, + /// The [`ObjectMeta`] for this object + pub meta: ObjectMeta, + /// The range of bytes returned by this request + /// + /// Note this is not `usize` as `object_store` supports 32-bit architectures such as WASM + pub range: Range, + /// Additional object attributes + pub attributes: Attributes, +} + +/// The kind of a [`GetResult`] +/// +/// This special cases the case of a local file, as some systems may +/// be able to optimise the case of a file already present on local disk +pub enum GetResultPayload { + /// The file, path + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + File(std::fs::File, std::path::PathBuf), + /// An opaque stream of bytes + Stream(BoxStream<'static, Result>), +} + +impl Debug for GetResultPayload { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + Self::File(_, _) => write!(f, "GetResultPayload(File)"), + Self::Stream(_) => write!(f, "GetResultPayload(Stream)"), + } + } +} + +impl GetResult { + /// Collects the data into a [`Bytes`] + pub async fn bytes(self) -> Result { + let len = self.range.end - self.range.start; + match self.payload { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + GetResultPayload::File(mut file, path) => { + maybe_spawn_blocking(move || { + file.seek(SeekFrom::Start(self.range.start as _)) + .map_err(|source| local::Error::Seek { + source, + path: path.clone(), + })?; + + let mut buffer = if let Ok(len) = len.try_into() { + Vec::with_capacity(len) + } else { + Vec::new() + }; + file.take(len as _) + .read_to_end(&mut buffer) + .map_err(|source| local::Error::UnableToReadBytes { source, path })?; + + Ok(buffer.into()) + }) + .await + } + GetResultPayload::Stream(s) => collect_bytes(s, Some(len)).await, + } + } + + /// Converts this into a byte stream + /// + /// If the `self.kind` is [`GetResultPayload::File`] will perform chunked reads of the file, + /// otherwise will return the [`GetResultPayload::Stream`]. + /// + /// # Tokio Compatibility + /// + /// Tokio discourages performing blocking IO on a tokio worker thread, however, + /// no major operating systems have stable async file APIs. Therefore if called from + /// a tokio context, this will use [`tokio::runtime::Handle::spawn_blocking`] to dispatch + /// IO to a blocking thread pool, much like `tokio::fs` does under-the-hood. + /// + /// If not called from a tokio context, this will perform IO on the current thread with + /// no additional complexity or overheads + pub fn into_stream(self) -> BoxStream<'static, Result> { + match self.payload { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + GetResultPayload::File(file, path) => { + const CHUNK_SIZE: usize = 8 * 1024; + local::chunked_stream(file, path, self.range, CHUNK_SIZE) + } + GetResultPayload::Stream(s) => s, + } + } +} + +/// Configure preconditions for the put operation +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum PutMode { + /// Perform an atomic write operation, overwriting any object present at the provided path + #[default] + Overwrite, + /// Perform an atomic write operation, returning [`Error::AlreadyExists`] if an + /// object already exists at the provided path + Create, + /// Perform an atomic write operation if the current version of the object matches the + /// provided [`UpdateVersion`], returning [`Error::Precondition`] otherwise + Update(UpdateVersion), +} + +/// Uniquely identifies a version of an object to update +/// +/// Stores will use differing combinations of `e_tag` and `version` to provide conditional +/// updates, and it is therefore recommended applications preserve both +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UpdateVersion { + /// The unique identifier for the newly created object + /// + /// + pub e_tag: Option, + /// A version indicator for the newly created object + pub version: Option, +} + +impl From for UpdateVersion { + fn from(value: PutResult) -> Self { + Self { + e_tag: value.e_tag, + version: value.version, + } + } +} + +/// Options for a put request +#[derive(Debug, Clone, Default)] +pub struct PutOptions { + /// Configure the [`PutMode`] for this operation + pub mode: PutMode, + /// Provide a [`TagSet`] for this object + /// + /// Implementations that don't support object tagging should ignore this + pub tags: TagSet, + /// Provide a set of [`Attributes`] + /// + /// Implementations that don't support an attribute should return an error + pub attributes: Attributes, + /// Implementation-specific extensions. Intended for use by [`ObjectStore`] implementations + /// that need to pass context-specific information (like tracing spans) via trait methods. + /// + /// These extensions are ignored entirely by backends offered through this crate. + /// + /// They are also eclused from [`PartialEq`] and [`Eq`]. + pub extensions: Extensions, +} + +impl PartialEq for PutOptions { + fn eq(&self, other: &Self) -> bool { + let Self { + mode, + tags, + attributes, + extensions: _, + } = self; + let Self { + mode: other_mode, + tags: other_tags, + attributes: other_attributes, + extensions: _, + } = other; + (mode == other_mode) && (tags == other_tags) && (attributes == other_attributes) + } +} + +impl Eq for PutOptions {} + +impl From for PutOptions { + fn from(mode: PutMode) -> Self { + Self { + mode, + ..Default::default() + } + } +} + +impl From for PutOptions { + fn from(tags: TagSet) -> Self { + Self { + tags, + ..Default::default() + } + } +} + +impl From for PutOptions { + fn from(attributes: Attributes) -> Self { + Self { + attributes, + ..Default::default() + } + } +} + +// See . +#[doc(hidden)] +#[deprecated(note = "Use PutMultipartOptions", since = "0.12.3")] +pub type PutMultipartOpts = PutMultipartOptions; + +/// Options for [`ObjectStore::put_multipart_opts`] +#[derive(Debug, Clone, Default)] +pub struct PutMultipartOptions { + /// Provide a [`TagSet`] for this object + /// + /// Implementations that don't support object tagging should ignore this + pub tags: TagSet, + /// Provide a set of [`Attributes`] + /// + /// Implementations that don't support an attribute should return an error + pub attributes: Attributes, + /// Implementation-specific extensions. Intended for use by [`ObjectStore`] implementations + /// that need to pass context-specific information (like tracing spans) via trait methods. + /// + /// These extensions are ignored entirely by backends offered through this crate. + /// + /// They are also eclused from [`PartialEq`] and [`Eq`]. + pub extensions: Extensions, +} + +impl PartialEq for PutMultipartOptions { + fn eq(&self, other: &Self) -> bool { + let Self { + tags, + attributes, + extensions: _, + } = self; + let Self { + tags: other_tags, + attributes: other_attributes, + extensions: _, + } = other; + (tags == other_tags) && (attributes == other_attributes) + } +} + +impl Eq for PutMultipartOptions {} + +impl From for PutMultipartOptions { + fn from(tags: TagSet) -> Self { + Self { + tags, + ..Default::default() + } + } +} + +impl From for PutMultipartOptions { + fn from(attributes: Attributes) -> Self { + Self { + attributes, + ..Default::default() + } + } +} + +/// Result for a put request +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PutResult { + /// The unique identifier for the newly created object + /// + /// + pub e_tag: Option, + /// A version indicator for the newly created object + pub version: Option, +} + +/// A specialized `Result` for object store-related errors +pub type Result = std::result::Result; + +/// A specialized `Error` for object store-related errors +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum Error { + /// A fallback error type when no variant matches + #[error("Generic {} error: {}", store, source)] + Generic { + /// The store this error originated from + store: &'static str, + /// The wrapped error + source: Box, + }, + + /// Error when the object is not found at given location + #[error("Object at location {} not found: {}", path, source)] + NotFound { + /// The path to file + path: String, + /// The wrapped error + source: Box, + }, + + /// Error for invalid path + #[error("Encountered object with invalid path: {}", source)] + InvalidPath { + /// The wrapped error + #[from] + source: path::Error, + }, + + /// Error when `tokio::spawn` failed + #[error("Error joining spawned task: {}", source)] + JoinError { + /// The wrapped error + #[from] + source: tokio::task::JoinError, + }, + + /// Error when the attempted operation is not supported + #[error("Operation not supported: {}", source)] + NotSupported { + /// The wrapped error + source: Box, + }, + + /// Error when the object already exists + #[error("Object at location {} already exists: {}", path, source)] + AlreadyExists { + /// The path to the + path: String, + /// The wrapped error + source: Box, + }, + + /// Error when the required conditions failed for the operation + #[error("Request precondition failure for path {}: {}", path, source)] + Precondition { + /// The path to the file + path: String, + /// The wrapped error + source: Box, + }, + + /// Error when the object at the location isn't modified + #[error("Object at location {} not modified: {}", path, source)] + NotModified { + /// The path to the file + path: String, + /// The wrapped error + source: Box, + }, + + /// Error when an operation is not implemented + #[error("Operation not yet implemented.")] + NotImplemented, + + /// Error when the used credentials don't have enough permission + /// to perform the requested operation + #[error( + "The operation lacked the necessary privileges to complete for path {}: {}", + path, + source + )] + PermissionDenied { + /// The path to the file + path: String, + /// The wrapped error + source: Box, + }, + + /// Error when the used credentials lack valid authentication + #[error( + "The operation lacked valid authentication credentials for path {}: {}", + path, + source + )] + Unauthenticated { + /// The path to the file + path: String, + /// The wrapped error + source: Box, + }, + + /// Error when a configuration key is invalid for the store used + #[error("Configuration key: '{}' is not valid for store '{}'.", key, store)] + UnknownConfigurationKey { + /// The object store used + store: &'static str, + /// The configuration key used + key: String, + }, +} + +impl From for std::io::Error { + fn from(e: Error) -> Self { + let kind = match &e { + Error::NotFound { .. } => std::io::ErrorKind::NotFound, + _ => std::io::ErrorKind::Other, + }; + Self::new(kind, e) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::buffered::BufWriter; + use chrono::TimeZone; + use tokio::io::AsyncWriteExt; + + macro_rules! maybe_skip_integration { + () => { + if std::env::var("TEST_INTEGRATION").is_err() { + eprintln!("Skipping integration test - set TEST_INTEGRATION"); + return; + } + }; + } + pub(crate) use maybe_skip_integration; + + /// Test that the returned stream does not borrow the lifetime of Path + fn list_store<'a>( + store: &'a dyn ObjectStore, + path_str: &str, + ) -> BoxStream<'a, Result> { + let path = Path::from(path_str); + store.list(Some(&path)) + } + + #[cfg(any(feature = "azure", feature = "aws"))] + pub(crate) async fn signing(integration: &T) + where + T: ObjectStore + signer::Signer, + { + use reqwest::Method; + use std::time::Duration; + + let data = Bytes::from("hello world"); + let path = Path::from("file.txt"); + integration.put(&path, data.clone().into()).await.unwrap(); + + let signed = integration + .signed_url(Method::GET, &path, Duration::from_secs(60)) + .await + .unwrap(); + + let resp = reqwest::get(signed).await.unwrap(); + let loaded = resp.bytes().await.unwrap(); + + assert_eq!(data, loaded); + } + + #[cfg(any(feature = "aws", feature = "azure"))] + pub(crate) async fn tagging(storage: Arc, validate: bool, get_tags: F) + where + F: Fn(Path) -> Fut + Send + Sync, + Fut: std::future::Future> + Send, + { + use bytes::Buf; + use serde::Deserialize; + + #[derive(Deserialize)] + struct Tagging { + #[serde(rename = "TagSet")] + list: TagList, + } + + #[derive(Debug, Deserialize)] + struct TagList { + #[serde(rename = "Tag")] + tags: Vec, + } + + #[derive(Debug, Deserialize, Eq, PartialEq)] + #[serde(rename_all = "PascalCase")] + struct Tag { + key: String, + value: String, + } + + let tags = vec![ + Tag { + key: "foo.com=bar/s".to_string(), + value: "bananas/foo.com-_".to_string(), + }, + Tag { + key: "namespace/key.foo".to_string(), + value: "value with a space".to_string(), + }, + ]; + let mut tag_set = TagSet::default(); + for t in &tags { + tag_set.push(&t.key, &t.value) + } + + let path = Path::from("tag_test"); + storage + .put_opts(&path, "test".into(), tag_set.clone().into()) + .await + .unwrap(); + + let multi_path = Path::from("tag_test_multi"); + let mut write = storage + .put_multipart_opts(&multi_path, tag_set.clone().into()) + .await + .unwrap(); + + write.put_part("foo".into()).await.unwrap(); + write.complete().await.unwrap(); + + let buf_path = Path::from("tag_test_buf"); + let mut buf = BufWriter::new(storage, buf_path.clone()).with_tags(tag_set); + buf.write_all(b"foo").await.unwrap(); + buf.shutdown().await.unwrap(); + + // Write should always succeed, but certain configurations may simply ignore tags + if !validate { + return; + } + + for path in [path, multi_path, buf_path] { + let resp = get_tags(path.clone()).await.unwrap(); + let body = resp.into_body().bytes().await.unwrap(); + + let mut resp: Tagging = quick_xml::de::from_reader(body.reader()).unwrap(); + resp.list.tags.sort_by(|a, b| a.key.cmp(&b.key)); + assert_eq!(resp.list.tags, tags); + } + } + + #[tokio::test] + async fn test_list_lifetimes() { + let store = memory::InMemory::new(); + let mut stream = list_store(&store, "path"); + assert!(stream.next().await.is_none()); + } + + #[test] + fn test_preconditions() { + let mut meta = ObjectMeta { + location: Path::from("test"), + last_modified: Utc.timestamp_nanos(100), + size: 100, + e_tag: Some("123".to_string()), + version: None, + }; + + let mut options = GetOptions::default(); + options.check_preconditions(&meta).unwrap(); + + options.if_modified_since = Some(Utc.timestamp_nanos(50)); + options.check_preconditions(&meta).unwrap(); + + options.if_modified_since = Some(Utc.timestamp_nanos(100)); + options.check_preconditions(&meta).unwrap_err(); + + options.if_modified_since = Some(Utc.timestamp_nanos(101)); + options.check_preconditions(&meta).unwrap_err(); + + options = GetOptions::default(); + + options.if_unmodified_since = Some(Utc.timestamp_nanos(50)); + options.check_preconditions(&meta).unwrap_err(); + + options.if_unmodified_since = Some(Utc.timestamp_nanos(100)); + options.check_preconditions(&meta).unwrap(); + + options.if_unmodified_since = Some(Utc.timestamp_nanos(101)); + options.check_preconditions(&meta).unwrap(); + + options = GetOptions::default(); + + options.if_match = Some("123".to_string()); + options.check_preconditions(&meta).unwrap(); + + options.if_match = Some("123,354".to_string()); + options.check_preconditions(&meta).unwrap(); + + options.if_match = Some("354, 123,".to_string()); + options.check_preconditions(&meta).unwrap(); + + options.if_match = Some("354".to_string()); + options.check_preconditions(&meta).unwrap_err(); + + options.if_match = Some("*".to_string()); + options.check_preconditions(&meta).unwrap(); + + // If-Match takes precedence + options.if_unmodified_since = Some(Utc.timestamp_nanos(200)); + options.check_preconditions(&meta).unwrap(); + + options = GetOptions::default(); + + options.if_none_match = Some("123".to_string()); + options.check_preconditions(&meta).unwrap_err(); + + options.if_none_match = Some("*".to_string()); + options.check_preconditions(&meta).unwrap_err(); + + options.if_none_match = Some("1232".to_string()); + options.check_preconditions(&meta).unwrap(); + + options.if_none_match = Some("23, 123".to_string()); + options.check_preconditions(&meta).unwrap_err(); + + // If-None-Match takes precedence + options.if_modified_since = Some(Utc.timestamp_nanos(10)); + options.check_preconditions(&meta).unwrap_err(); + + // Check missing ETag + meta.e_tag = None; + options = GetOptions::default(); + + options.if_none_match = Some("*".to_string()); // Fails if any file exists + options.check_preconditions(&meta).unwrap_err(); + + options = GetOptions::default(); + options.if_match = Some("*".to_string()); // Passes if file exists + options.check_preconditions(&meta).unwrap(); + } + + #[test] + #[cfg(feature = "http")] + fn test_reexported_types() { + // Test HeaderMap + let mut headers = HeaderMap::new(); + headers.insert("content-type", HeaderValue::from_static("text/plain")); + assert_eq!(headers.len(), 1); + + // Test HeaderValue + let value = HeaderValue::from_static("test-value"); + assert_eq!(value.as_bytes(), b"test-value"); + + // Test Extensions + let mut extensions = Extensions::new(); + extensions.insert("test-key"); + assert!(extensions.get::<&str>().is_some()); + } +} diff --git a/rust/object_store/src/limit.rs b/rust/object_store/src/limit.rs new file mode 100644 index 0000000000..85714967a2 --- /dev/null +++ b/rust/object_store/src/limit.rs @@ -0,0 +1,320 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store that limits the maximum concurrency of the wrapped implementation + +use crate::{ + BoxStream, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, Path, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, StreamExt, + UploadPart, +}; +use async_trait::async_trait; +use bytes::Bytes; +use futures::{FutureExt, Stream}; +use std::ops::Range; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; + +/// Store wrapper that wraps an inner store and limits the maximum number of concurrent +/// object store operations. Where each call to an [`ObjectStore`] member function is +/// considered a single operation, even if it may result in more than one network call +/// +/// ``` +/// # use object_store::memory::InMemory; +/// # use object_store::limit::LimitStore; +/// +/// // Create an in-memory `ObjectStore` limited to 20 concurrent requests +/// let store = LimitStore::new(InMemory::new(), 20); +/// ``` +/// +#[derive(Debug)] +pub struct LimitStore { + inner: Arc, + max_requests: usize, + semaphore: Arc, +} + +impl LimitStore { + /// Create new limit store that will limit the maximum + /// number of outstanding concurrent requests to + /// `max_requests` + pub fn new(inner: T, max_requests: usize) -> Self { + Self { + inner: Arc::new(inner), + max_requests, + semaphore: Arc::new(Semaphore::new(max_requests)), + } + } +} + +impl std::fmt::Display for LimitStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "LimitStore({}, {})", self.max_requests, self.inner) + } +} + +#[async_trait] +impl ObjectStore for LimitStore { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.put(location, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.put_opts(location, payload, opts).await + } + async fn put_multipart(&self, location: &Path) -> Result> { + let upload = self.inner.put_multipart(location).await?; + Ok(Box::new(LimitUpload { + semaphore: Arc::clone(&self.semaphore), + upload, + })) + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + let upload = self.inner.put_multipart_opts(location, opts).await?; + Ok(Box::new(LimitUpload { + semaphore: Arc::clone(&self.semaphore), + upload, + })) + } + + async fn get(&self, location: &Path) -> Result { + let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap(); + let r = self.inner.get(location).await?; + Ok(permit_get_result(r, permit)) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap(); + let r = self.inner.get_opts(location, options).await?; + Ok(permit_get_result(r, permit)) + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.get_range(location, range).await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.get_ranges(location, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.delete(location).await + } + + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, Result>, + ) -> BoxStream<'a, Result> { + self.inner.delete_stream(locations) + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + let prefix = prefix.cloned(); + let inner = Arc::clone(&self.inner); + let fut = Arc::clone(&self.semaphore) + .acquire_owned() + .map(move |permit| { + let s = inner.list(prefix.as_ref()); + PermitWrapper::new(s, permit.unwrap()) + }); + fut.into_stream().flatten().boxed() + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + let prefix = prefix.cloned(); + let offset = offset.clone(); + let inner = Arc::clone(&self.inner); + let fut = Arc::clone(&self.semaphore) + .acquire_owned() + .map(move |permit| { + let s = inner.list_with_offset(prefix.as_ref(), &offset); + PermitWrapper::new(s, permit.unwrap()) + }); + fut.into_stream().flatten().boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.inner.rename_if_not_exists(from, to).await + } +} + +fn permit_get_result(r: GetResult, permit: OwnedSemaphorePermit) -> GetResult { + let payload = match r.payload { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + v @ GetResultPayload::File(_, _) => v, + GetResultPayload::Stream(s) => { + GetResultPayload::Stream(PermitWrapper::new(s, permit).boxed()) + } + }; + GetResult { payload, ..r } +} + +/// Combines an [`OwnedSemaphorePermit`] with some other type +struct PermitWrapper { + inner: T, + #[allow(dead_code)] + permit: OwnedSemaphorePermit, +} + +impl PermitWrapper { + fn new(inner: T, permit: OwnedSemaphorePermit) -> Self { + Self { inner, permit } + } +} + +impl Stream for PermitWrapper { + type Item = T::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.inner).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +/// An [`MultipartUpload`] wrapper that limits the maximum number of concurrent requests +#[derive(Debug)] +pub struct LimitUpload { + upload: Box, + semaphore: Arc, +} + +impl LimitUpload { + /// Create a new [`LimitUpload`] limiting `upload` to `max_concurrency` concurrent requests + pub fn new(upload: Box, max_concurrency: usize) -> Self { + Self { + upload, + semaphore: Arc::new(Semaphore::new(max_concurrency)), + } + } +} + +#[async_trait] +impl MultipartUpload for LimitUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + let upload = self.upload.put_part(data); + let s = Arc::clone(&self.semaphore); + Box::pin(async move { + let _permit = s.acquire().await.unwrap(); + upload.await + }) + } + + async fn complete(&mut self) -> Result { + let _permit = self.semaphore.acquire().await.unwrap(); + self.upload.complete().await + } + + async fn abort(&mut self) -> Result<()> { + let _permit = self.semaphore.acquire().await.unwrap(); + self.upload.abort().await + } +} + +#[cfg(test)] +mod tests { + use crate::integration::*; + use crate::limit::LimitStore; + use crate::memory::InMemory; + use crate::ObjectStore; + use futures::stream::StreamExt; + use std::pin::Pin; + use std::time::Duration; + use tokio::time::timeout; + + #[tokio::test] + async fn limit_test() { + let max_requests = 10; + let memory = InMemory::new(); + let integration = LimitStore::new(memory, max_requests); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + stream_get(&integration).await; + + let mut streams = Vec::with_capacity(max_requests); + for _ in 0..max_requests { + let mut stream = integration.list(None).peekable(); + Pin::new(&mut stream).peek().await; // Ensure semaphore is acquired + streams.push(stream); + } + + let t = Duration::from_millis(20); + + // Expect to not be able to make another request + let fut = integration.list(None).collect::>(); + assert!(timeout(t, fut).await.is_err()); + + // Drop one of the streams + streams.pop(); + + // Can now make another request + integration.list(None).collect::>().await; + } +} diff --git a/rust/object_store/src/list.rs b/rust/object_store/src/list.rs new file mode 100644 index 0000000000..e73fe5252f --- /dev/null +++ b/rust/object_store/src/list.rs @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Paginated Listing + +use super::Result; +use crate::ListResult; +use async_trait::async_trait; +use std::borrow::Cow; + +/// Options for a paginated list request +#[derive(Debug, Default, Clone)] +pub struct PaginatedListOptions { + /// Path to start listing from + /// + /// Note: Not all stores support this + pub offset: Option, + + /// A delimiter use to group keys with a common prefix + /// + /// Note: Some stores only support `/` + pub delimiter: Option>, + + /// The maximum number of paths to return + pub max_keys: Option, + + /// A page token from a previous request + /// + /// Note: Behaviour is implementation defined if the previous request + /// used a different prefix or options + pub page_token: Option, + + /// Implementation-specific extensions. Intended for use by implementations + /// that need to pass context-specific information (like tracing spans) via trait methods. + /// + /// These extensions are ignored entirely by backends offered through this crate. + pub extensions: http::Extensions, +} + +/// A [`ListResult`] with optional pagination token +#[derive(Debug)] +pub struct PaginatedListResult { + /// The list result + pub result: ListResult, + /// If result set truncated, the pagination token to fetch next results + pub page_token: Option, +} + +/// A low-level interface for interacting with paginated listing APIs +/// +/// Most use-cases should prefer [`ObjectStore::list`] as this is supported by more +/// backends, including [`LocalFileSystem`], however, [`PaginatedListStore`] can be +/// used where stateless pagination or non-path segment based listing is required +/// +/// [`ObjectStore::list`]: crate::ObjectStore::list +/// [`LocalFileSystem`]: crate::local::LocalFileSystem +#[async_trait] +pub trait PaginatedListStore: Send + Sync + 'static { + /// Perform a paginated list request + /// + /// Note: the order of returned objects is not guaranteed and + /// unlike [`ObjectStore::list`] a trailing delimiter is not + /// automatically added to `prefix` + /// + /// [`ObjectStore::list`]: crate::ObjectStore::list + async fn list_paginated( + &self, + prefix: Option<&str>, + opts: PaginatedListOptions, + ) -> Result; +} diff --git a/rust/object_store/src/local.rs b/rust/object_store/src/local.rs new file mode 100644 index 0000000000..3404bc8958 --- /dev/null +++ b/rust/object_store/src/local.rs @@ -0,0 +1,1742 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store implementation for a local filesystem +use std::fs::{metadata, symlink_metadata, File, Metadata, OpenOptions}; +use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; +use std::ops::Range; +use std::sync::Arc; +use std::time::SystemTime; +use std::{collections::BTreeSet, io}; +use std::{collections::VecDeque, path::PathBuf}; + +use async_trait::async_trait; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use futures::{stream::BoxStream, StreamExt}; +use futures::{FutureExt, TryStreamExt}; +use parking_lot::Mutex; +use url::Url; +use walkdir::{DirEntry, WalkDir}; + +use crate::{ + maybe_spawn_blocking, + path::{absolute_path_to_url, Path}, + util::InvalidGetRange, + Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, PutMode, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, + UploadPart, +}; + +/// A specialized `Error` for filesystem object store-related errors +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("Unable to walk dir: {}", source)] + UnableToWalkDir { source: walkdir::Error }, + + #[error("Unable to access metadata for {}: {}", path, source)] + Metadata { + source: Box, + path: String, + }, + + #[error("Unable to copy data to file: {}", source)] + UnableToCopyDataToFile { source: io::Error }, + + #[error("Unable to rename file: {}", source)] + UnableToRenameFile { source: io::Error }, + + #[error("Unable to create dir {}: {}", path.display(), source)] + UnableToCreateDir { source: io::Error, path: PathBuf }, + + #[error("Unable to create file {}: {}", path.display(), source)] + UnableToCreateFile { source: io::Error, path: PathBuf }, + + #[error("Unable to delete file {}: {}", path.display(), source)] + UnableToDeleteFile { source: io::Error, path: PathBuf }, + + #[error("Unable to open file {}: {}", path.display(), source)] + UnableToOpenFile { source: io::Error, path: PathBuf }, + + #[error("Unable to read data from file {}: {}", path.display(), source)] + UnableToReadBytes { source: io::Error, path: PathBuf }, + + #[error("Out of range of file {}, expected: {}, actual: {}", path.display(), expected, actual)] + OutOfRange { + path: PathBuf, + expected: u64, + actual: u64, + }, + + #[error("Requested range was invalid")] + InvalidRange { source: InvalidGetRange }, + + #[error("Unable to copy file from {} to {}: {}", from.display(), to.display(), source)] + UnableToCopyFile { + from: PathBuf, + to: PathBuf, + source: io::Error, + }, + + #[error("NotFound")] + NotFound { path: PathBuf, source: io::Error }, + + #[error("Error seeking file {}: {}", path.display(), source)] + Seek { source: io::Error, path: PathBuf }, + + #[error("Unable to convert URL \"{}\" to filesystem path", url)] + InvalidUrl { url: Url }, + + #[error("AlreadyExists")] + AlreadyExists { path: String, source: io::Error }, + + #[error("Unable to canonicalize filesystem root: {}", path.display())] + UnableToCanonicalize { path: PathBuf, source: io::Error }, + + #[error("Filenames containing trailing '/#\\d+/' are not supported: {}", path)] + InvalidPath { path: String }, + + #[error("Upload aborted")] + Aborted, +} + +impl From for super::Error { + fn from(source: Error) -> Self { + match source { + Error::NotFound { path, source } => Self::NotFound { + path: path.to_string_lossy().to_string(), + source: source.into(), + }, + Error::AlreadyExists { path, source } => Self::AlreadyExists { + path, + source: source.into(), + }, + _ => Self::Generic { + store: "LocalFileSystem", + source: Box::new(source), + }, + } + } +} + +/// Local filesystem storage providing an [`ObjectStore`] interface to files on +/// local disk. Can optionally be created with a directory prefix +/// +/// # Path Semantics +/// +/// This implementation follows the [file URI] scheme outlined in [RFC 3986]. In +/// particular paths are delimited by `/` +/// +/// [file URI]: https://en.wikipedia.org/wiki/File_URI_scheme +/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986 +/// +/// # Path Semantics +/// +/// [`LocalFileSystem`] will expose the path semantics of the underlying filesystem, which may +/// have additional restrictions beyond those enforced by [`Path`]. +/// +/// For example: +/// +/// * Windows forbids certain filenames, e.g. `COM0`, +/// * Windows forbids folders with trailing `.` +/// * Windows forbids certain ASCII characters, e.g. `<` or `|` +/// * OS X forbids filenames containing `:` +/// * Leading `-` are discouraged on Unix systems where they may be interpreted as CLI flags +/// * Filesystems may have restrictions on the maximum path or path segment length +/// * Filesystem support for non-ASCII characters is inconsistent +/// +/// Additionally some filesystems, such as NTFS, are case-insensitive, whilst others like +/// FAT don't preserve case at all. Further some filesystems support non-unicode character +/// sequences, such as unpaired UTF-16 surrogates, and [`LocalFileSystem`] will error on +/// encountering such sequences. +/// +/// Finally, filenames matching the regex `/.*#\d+/`, e.g. `foo.parquet#123`, are not supported +/// by [`LocalFileSystem`] as they are used to provide atomic writes. Such files will be ignored +/// for listing operations, and attempting to address such a file will error. +/// +/// # Tokio Compatibility +/// +/// Tokio discourages performing blocking IO on a tokio worker thread, however, +/// no major operating systems have stable async file APIs. Therefore if called from +/// a tokio context, this will use [`tokio::runtime::Handle::spawn_blocking`] to dispatch +/// IO to a blocking thread pool, much like `tokio::fs` does under-the-hood. +/// +/// If not called from a tokio context, this will perform IO on the current thread with +/// no additional complexity or overheads +/// +/// # Symlinks +/// +/// [`LocalFileSystem`] will follow symlinks as normal, however, it is worth noting: +/// +/// * Broken symlinks will be silently ignored by listing operations +/// * No effort is made to prevent breaking symlinks when deleting files +/// * Symlinks that resolve to paths outside the root **will** be followed +/// * Mutating a file through one or more symlinks will mutate the underlying file +/// * Deleting a path that resolves to a symlink will only delete the symlink +/// +/// # Cross-Filesystem Copy +/// +/// [`LocalFileSystem::copy`] is implemented using [`std::fs::hard_link`], and therefore +/// does not support copying across filesystem boundaries. +/// +#[derive(Debug)] +pub struct LocalFileSystem { + config: Arc, + // if you want to delete empty directories when deleting files + automatic_cleanup: bool, +} + +#[derive(Debug)] +struct Config { + root: Url, +} + +impl std::fmt::Display for LocalFileSystem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "LocalFileSystem({})", self.config.root) + } +} + +impl Default for LocalFileSystem { + fn default() -> Self { + Self::new() + } +} + +impl LocalFileSystem { + /// Create new filesystem storage with no prefix + pub fn new() -> Self { + Self { + config: Arc::new(Config { + root: Url::parse("file:///").unwrap(), + }), + automatic_cleanup: false, + } + } + + /// Create new filesystem storage with `prefix` applied to all paths + /// + /// Returns an error if the path does not exist + /// + pub fn new_with_prefix(prefix: impl AsRef) -> Result { + let path = std::fs::canonicalize(&prefix).map_err(|source| { + let path = prefix.as_ref().into(); + Error::UnableToCanonicalize { source, path } + })?; + + Ok(Self { + config: Arc::new(Config { + root: absolute_path_to_url(path)?, + }), + automatic_cleanup: false, + }) + } + + /// Return an absolute filesystem path of the given file location + pub fn path_to_filesystem(&self, location: &Path) -> Result { + if !is_valid_file_path(location) { + let path = location.as_ref().into(); + let error = Error::InvalidPath { path }; + return Err(error.into()); + } + + let path = self.config.prefix_to_filesystem(location)?; + + #[cfg(target_os = "windows")] + let path = { + let path = path.to_string_lossy(); + + // Assume the first char is the drive letter and the next is a colon. + let mut out = String::new(); + let drive = &path[..2]; // The drive letter and colon (e.g., "C:") + let filepath = &path[2..].replace(':', "%3A"); // Replace subsequent colons + out.push_str(drive); + out.push_str(filepath); + PathBuf::from(out) + }; + + Ok(path) + } + + /// Enable automatic cleanup of empty directories when deleting files + pub fn with_automatic_cleanup(mut self, automatic_cleanup: bool) -> Self { + self.automatic_cleanup = automatic_cleanup; + self + } +} + +impl Config { + /// Return an absolute filesystem path of the given location + fn prefix_to_filesystem(&self, location: &Path) -> Result { + let mut url = self.root.clone(); + url.path_segments_mut() + .expect("url path") + // technically not necessary as Path ignores empty segments + // but avoids creating paths with "//" which look odd in error messages. + .pop_if_empty() + .extend(location.parts()); + + url.to_file_path() + .map_err(|_| Error::InvalidUrl { url }.into()) + } + + /// Resolves the provided absolute filesystem path to a [`Path`] prefix + fn filesystem_to_path(&self, location: &std::path::Path) -> Result { + Ok(Path::from_absolute_path_with_base( + location, + Some(&self.root), + )?) + } +} + +fn is_valid_file_path(path: &Path) -> bool { + match path.filename() { + Some(p) => match p.split_once('#') { + Some((_, suffix)) if !suffix.is_empty() => { + // Valid if contains non-digits + !suffix.as_bytes().iter().all(|x| x.is_ascii_digit()) + } + _ => true, + }, + None => false, + } +} + +#[async_trait] +impl ObjectStore for LocalFileSystem { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + if matches!(opts.mode, PutMode::Update(_)) { + return Err(crate::Error::NotImplemented); + } + + if !opts.attributes.is_empty() { + return Err(crate::Error::NotImplemented); + } + + let path = self.path_to_filesystem(location)?; + maybe_spawn_blocking(move || { + let (mut file, staging_path) = new_staged_upload(&path)?; + let mut e_tag = None; + + let err = match payload.iter().try_for_each(|x| file.write_all(x)) { + Ok(_) => { + let metadata = file.metadata().map_err(|e| Error::Metadata { + source: e.into(), + path: path.to_string_lossy().to_string(), + })?; + e_tag = Some(get_etag(&metadata)); + match opts.mode { + PutMode::Overwrite => { + // For some fuse types of file systems, the file must be closed first + // to trigger the upload operation, and then renamed, such as Blobfuse + std::mem::drop(file); + match std::fs::rename(&staging_path, &path) { + Ok(_) => None, + Err(source) => Some(Error::UnableToRenameFile { source }), + } + } + PutMode::Create => match std::fs::hard_link(&staging_path, &path) { + Ok(_) => { + let _ = std::fs::remove_file(&staging_path); // Attempt to cleanup + None + } + Err(source) => match source.kind() { + ErrorKind::AlreadyExists => Some(Error::AlreadyExists { + path: path.to_str().unwrap().to_string(), + source, + }), + _ => Some(Error::UnableToRenameFile { source }), + }, + }, + PutMode::Update(_) => unreachable!(), + } + } + Err(source) => Some(Error::UnableToCopyDataToFile { source }), + }; + + if let Some(err) = err { + let _ = std::fs::remove_file(&staging_path); // Attempt to cleanup + return Err(err.into()); + } + + Ok(PutResult { + e_tag, + version: None, + }) + }) + .await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + if !opts.attributes.is_empty() { + return Err(crate::Error::NotImplemented); + } + + let dest = self.path_to_filesystem(location)?; + let (file, src) = new_staged_upload(&dest)?; + Ok(Box::new(LocalUpload::new(src, dest, file))) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let location = location.clone(); + let path = self.path_to_filesystem(&location)?; + maybe_spawn_blocking(move || { + let (file, metadata) = open_file(&path)?; + let meta = convert_metadata(metadata, location); + options.check_preconditions(&meta)?; + + let range = match options.range { + Some(r) => r + .as_range(meta.size) + .map_err(|source| Error::InvalidRange { source })?, + None => 0..meta.size, + }; + + Ok(GetResult { + payload: GetResultPayload::File(file, path), + attributes: Attributes::default(), + range, + meta, + }) + }) + .await + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + let path = self.path_to_filesystem(location)?; + maybe_spawn_blocking(move || { + let (mut file, _) = open_file(&path)?; + read_range(&mut file, &path, range) + }) + .await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + let path = self.path_to_filesystem(location)?; + let ranges = ranges.to_vec(); + maybe_spawn_blocking(move || { + // Vectored IO might be faster + let (mut file, _) = open_file(&path)?; + ranges + .into_iter() + .map(|r| read_range(&mut file, &path, r)) + .collect() + }) + .await + } + + async fn delete(&self, location: &Path) -> Result<()> { + let config = Arc::clone(&self.config); + let path = self.path_to_filesystem(location)?; + let automactic_cleanup = self.automatic_cleanup; + maybe_spawn_blocking(move || { + if let Err(e) = std::fs::remove_file(&path) { + Err(match e.kind() { + ErrorKind::NotFound => Error::NotFound { path, source: e }.into(), + _ => Error::UnableToDeleteFile { path, source: e }.into(), + }) + } else if automactic_cleanup { + let root = &config.root; + let root = root + .to_file_path() + .map_err(|_| Error::InvalidUrl { url: root.clone() })?; + + // here we will try to traverse up and delete an empty dir if possible until we reach the root or get an error + let mut parent = path.parent(); + + while let Some(loc) = parent { + if loc != root && std::fs::remove_dir(loc).is_ok() { + parent = loc.parent(); + } else { + break; + } + } + + Ok(()) + } else { + Ok(()) + } + }) + .await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + self.list_with_maybe_offset(prefix, None) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + self.list_with_maybe_offset(prefix, Some(offset)) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let config = Arc::clone(&self.config); + + let prefix = prefix.cloned().unwrap_or_default(); + let resolved_prefix = config.prefix_to_filesystem(&prefix)?; + + maybe_spawn_blocking(move || { + let walkdir = WalkDir::new(&resolved_prefix) + .min_depth(1) + .max_depth(1) + .follow_links(true); + + let mut common_prefixes = BTreeSet::new(); + let mut objects = Vec::new(); + + for entry_res in walkdir.into_iter().map(convert_walkdir_result) { + if let Some(entry) = entry_res? { + let is_directory = entry.file_type().is_dir(); + let entry_location = config.filesystem_to_path(entry.path())?; + if !is_directory && !is_valid_file_path(&entry_location) { + continue; + } + + let mut parts = match entry_location.prefix_match(&prefix) { + Some(parts) => parts, + None => continue, + }; + + let common_prefix = match parts.next() { + Some(p) => p, + None => continue, + }; + + drop(parts); + + if is_directory { + common_prefixes.insert(prefix.child(common_prefix)); + } else if let Some(metadata) = convert_entry(entry, entry_location)? { + objects.push(metadata); + } + } + } + + Ok(ListResult { + common_prefixes: common_prefixes.into_iter().collect(), + objects, + }) + }) + .await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let from = self.path_to_filesystem(from)?; + let to = self.path_to_filesystem(to)?; + let mut id = 0; + // In order to make this atomic we: + // + // - hard link to a hidden temporary file + // - atomically rename this temporary file into place + // + // This is necessary because hard_link returns an error if the destination already exists + maybe_spawn_blocking(move || loop { + let staged = staged_upload_path(&to, &id.to_string()); + match std::fs::hard_link(&from, &staged) { + Ok(_) => { + return std::fs::rename(&staged, &to).map_err(|source| { + let _ = std::fs::remove_file(&staged); // Attempt to clean up + Error::UnableToCopyFile { from, to, source }.into() + }); + } + Err(source) => match source.kind() { + ErrorKind::AlreadyExists => id += 1, + ErrorKind::NotFound => match from.exists() { + true => create_parent_dirs(&to, source)?, + false => return Err(Error::NotFound { path: from, source }.into()), + }, + _ => return Err(Error::UnableToCopyFile { from, to, source }.into()), + }, + } + }) + .await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + let from = self.path_to_filesystem(from)?; + let to = self.path_to_filesystem(to)?; + maybe_spawn_blocking(move || loop { + match std::fs::rename(&from, &to) { + Ok(_) => return Ok(()), + Err(source) => match source.kind() { + ErrorKind::NotFound => match from.exists() { + true => create_parent_dirs(&to, source)?, + false => return Err(Error::NotFound { path: from, source }.into()), + }, + _ => return Err(Error::UnableToCopyFile { from, to, source }.into()), + }, + } + }) + .await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let from = self.path_to_filesystem(from)?; + let to = self.path_to_filesystem(to)?; + + maybe_spawn_blocking(move || loop { + match std::fs::hard_link(&from, &to) { + Ok(_) => return Ok(()), + Err(source) => match source.kind() { + ErrorKind::AlreadyExists => { + return Err(Error::AlreadyExists { + path: to.to_str().unwrap().to_string(), + source, + } + .into()) + } + ErrorKind::NotFound => match from.exists() { + true => create_parent_dirs(&to, source)?, + false => return Err(Error::NotFound { path: from, source }.into()), + }, + _ => return Err(Error::UnableToCopyFile { from, to, source }.into()), + }, + } + }) + .await + } +} + +impl LocalFileSystem { + fn list_with_maybe_offset( + &self, + prefix: Option<&Path>, + maybe_offset: Option<&Path>, + ) -> BoxStream<'static, Result> { + let config = Arc::clone(&self.config); + + let root_path = match prefix { + Some(prefix) => match config.prefix_to_filesystem(prefix) { + Ok(path) => path, + Err(e) => return futures::future::ready(Err(e)).into_stream().boxed(), + }, + None => config.root.to_file_path().unwrap(), + }; + + let walkdir = WalkDir::new(root_path) + // Don't include the root directory itself + .min_depth(1) + .follow_links(true); + + let maybe_offset = maybe_offset.cloned(); + + let s = walkdir.into_iter().flat_map(move |result_dir_entry| { + // Apply offset filter before proceeding, to reduce statx file system calls + // This matters for NFS mounts + if let (Some(offset), Ok(entry)) = (maybe_offset.as_ref(), result_dir_entry.as_ref()) { + let location = config.filesystem_to_path(entry.path()); + match location { + Ok(path) if path <= *offset => return None, + Err(e) => return Some(Err(e)), + _ => {} + } + } + + let entry = match convert_walkdir_result(result_dir_entry).transpose()? { + Ok(entry) => entry, + Err(e) => return Some(Err(e)), + }; + + if !entry.path().is_file() { + return None; + } + + match config.filesystem_to_path(entry.path()) { + Ok(path) => match is_valid_file_path(&path) { + true => convert_entry(entry, path).transpose(), + false => None, + }, + Err(e) => Some(Err(e)), + } + }); + + // If no tokio context, return iterator directly as no + // need to perform chunked spawn_blocking reads + if tokio::runtime::Handle::try_current().is_err() { + return futures::stream::iter(s).boxed(); + } + + // Otherwise list in batches of CHUNK_SIZE + const CHUNK_SIZE: usize = 1024; + + let buffer = VecDeque::with_capacity(CHUNK_SIZE); + futures::stream::try_unfold((s, buffer), |(mut s, mut buffer)| async move { + if buffer.is_empty() { + (s, buffer) = tokio::task::spawn_blocking(move || { + for _ in 0..CHUNK_SIZE { + match s.next() { + Some(r) => buffer.push_back(r), + None => break, + } + } + (s, buffer) + }) + .await?; + } + + match buffer.pop_front() { + Some(Err(e)) => Err(e), + Some(Ok(meta)) => Ok(Some((meta, (s, buffer)))), + None => Ok(None), + } + }) + .boxed() + } +} + +/// Creates the parent directories of `path` or returns an error based on `source` if no parent +fn create_parent_dirs(path: &std::path::Path, source: io::Error) -> Result<()> { + let parent = path.parent().ok_or_else(|| { + let path = path.to_path_buf(); + Error::UnableToCreateFile { path, source } + })?; + + std::fs::create_dir_all(parent).map_err(|source| { + let path = parent.into(); + Error::UnableToCreateDir { source, path } + })?; + Ok(()) +} + +/// Generates a unique file path `{base}#{suffix}`, returning the opened `File` and `path` +/// +/// Creates any directories if necessary +fn new_staged_upload(base: &std::path::Path) -> Result<(File, PathBuf)> { + let mut multipart_id = 1; + loop { + let suffix = multipart_id.to_string(); + let path = staged_upload_path(base, &suffix); + let mut options = OpenOptions::new(); + match options.read(true).write(true).create_new(true).open(&path) { + Ok(f) => return Ok((f, path)), + Err(source) => match source.kind() { + ErrorKind::AlreadyExists => multipart_id += 1, + ErrorKind::NotFound => create_parent_dirs(&path, source)?, + _ => return Err(Error::UnableToOpenFile { source, path }.into()), + }, + } + } +} + +/// Returns the unique upload for the given path and suffix +fn staged_upload_path(dest: &std::path::Path, suffix: &str) -> PathBuf { + let mut staging_path = dest.as_os_str().to_owned(); + staging_path.push("#"); + staging_path.push(suffix); + staging_path.into() +} + +#[derive(Debug)] +struct LocalUpload { + /// The upload state + state: Arc, + /// The location of the temporary file + src: Option, + /// The next offset to write into the file + offset: u64, +} + +#[derive(Debug)] +struct UploadState { + dest: PathBuf, + file: Mutex, +} + +impl LocalUpload { + pub(crate) fn new(src: PathBuf, dest: PathBuf, file: File) -> Self { + Self { + state: Arc::new(UploadState { + dest, + file: Mutex::new(file), + }), + src: Some(src), + offset: 0, + } + } +} + +#[async_trait] +impl MultipartUpload for LocalUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + let offset = self.offset; + self.offset += data.content_length() as u64; + + let s = Arc::clone(&self.state); + maybe_spawn_blocking(move || { + let mut file = s.file.lock(); + file.seek(SeekFrom::Start(offset)).map_err(|source| { + let path = s.dest.clone(); + Error::Seek { source, path } + })?; + + data.iter() + .try_for_each(|x| file.write_all(x)) + .map_err(|source| Error::UnableToCopyDataToFile { source })?; + + Ok(()) + }) + .boxed() + } + + async fn complete(&mut self) -> Result { + let src = self.src.take().ok_or(Error::Aborted)?; + let s = Arc::clone(&self.state); + maybe_spawn_blocking(move || { + // Ensure no inflight writes + let file = s.file.lock(); + std::fs::rename(&src, &s.dest) + .map_err(|source| Error::UnableToRenameFile { source })?; + let metadata = file.metadata().map_err(|e| Error::Metadata { + source: e.into(), + path: src.to_string_lossy().to_string(), + })?; + + Ok(PutResult { + e_tag: Some(get_etag(&metadata)), + version: None, + }) + }) + .await + } + + async fn abort(&mut self) -> Result<()> { + let src = self.src.take().ok_or(Error::Aborted)?; + maybe_spawn_blocking(move || { + std::fs::remove_file(&src) + .map_err(|source| Error::UnableToDeleteFile { source, path: src })?; + Ok(()) + }) + .await + } +} + +impl Drop for LocalUpload { + fn drop(&mut self) { + if let Some(src) = self.src.take() { + // Try to clean up intermediate file ignoring any error + match tokio::runtime::Handle::try_current() { + Ok(r) => drop(r.spawn_blocking(move || std::fs::remove_file(src))), + Err(_) => drop(std::fs::remove_file(src)), + }; + } + } +} + +pub(crate) fn chunked_stream( + mut file: File, + path: PathBuf, + range: Range, + chunk_size: usize, +) -> BoxStream<'static, Result> { + futures::stream::once(async move { + let (file, path) = maybe_spawn_blocking(move || { + file.seek(SeekFrom::Start(range.start as _)) + .map_err(|source| Error::Seek { + source, + path: path.clone(), + })?; + Ok((file, path)) + }) + .await?; + + let stream = futures::stream::try_unfold( + (file, path, range.end - range.start), + move |(mut file, path, remaining)| { + maybe_spawn_blocking(move || { + if remaining == 0 { + return Ok(None); + } + + let to_read = remaining.min(chunk_size as u64); + let cap = usize::try_from(to_read).map_err(|_e| Error::InvalidRange { + source: InvalidGetRange::TooLarge { + requested: to_read, + max: usize::MAX as u64, + }, + })?; + let mut buffer = Vec::with_capacity(cap); + let read = (&mut file) + .take(to_read) + .read_to_end(&mut buffer) + .map_err(|e| Error::UnableToReadBytes { + source: e, + path: path.clone(), + })?; + + Ok(Some((buffer.into(), (file, path, remaining - read as u64)))) + }) + }, + ); + Ok::<_, super::Error>(stream) + }) + .try_flatten() + .boxed() +} + +pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range) -> Result { + let file_metadata = file.metadata().map_err(|e| Error::Metadata { + source: e.into(), + path: path.to_string_lossy().to_string(), + })?; + + // If none of the range is satisfiable we should error, e.g. if the start offset is beyond the + // extents of the file + let file_len = file_metadata.len(); + if range.start >= file_len { + return Err(Error::InvalidRange { + source: InvalidGetRange::StartTooLarge { + requested: range.start, + length: file_len, + }, + } + .into()); + } + + // Don't read past end of file + let to_read = range.end.min(file_len) - range.start; + + file.seek(SeekFrom::Start(range.start)).map_err(|source| { + let path = path.into(); + Error::Seek { source, path } + })?; + + let mut buf = Vec::with_capacity(to_read as usize); + let read = file.take(to_read).read_to_end(&mut buf).map_err(|source| { + let path = path.into(); + Error::UnableToReadBytes { source, path } + })? as u64; + + if read != to_read { + let error = Error::OutOfRange { + path: path.into(), + expected: to_read, + actual: read, + }; + + return Err(error.into()); + } + + Ok(buf.into()) +} + +fn open_file(path: &PathBuf) -> Result<(File, Metadata)> { + let ret = match File::open(path).and_then(|f| Ok((f.metadata()?, f))) { + Err(e) => Err(match e.kind() { + ErrorKind::NotFound => Error::NotFound { + path: path.clone(), + source: e, + }, + _ => Error::UnableToOpenFile { + path: path.clone(), + source: e, + }, + }), + Ok((metadata, file)) => match !metadata.is_dir() { + true => Ok((file, metadata)), + false => Err(Error::NotFound { + path: path.clone(), + source: io::Error::new(ErrorKind::NotFound, "is directory"), + }), + }, + }?; + Ok(ret) +} + +fn convert_entry(entry: DirEntry, location: Path) -> Result> { + match entry.metadata() { + Ok(metadata) => Ok(Some(convert_metadata(metadata, location))), + Err(e) => { + if let Some(io_err) = e.io_error() { + if io_err.kind() == ErrorKind::NotFound { + return Ok(None); + } + } + Err(Error::Metadata { + source: e.into(), + path: location.to_string(), + })? + } + } +} + +fn last_modified(metadata: &Metadata) -> DateTime { + metadata + .modified() + .expect("Modified file time should be supported on this platform") + .into() +} + +fn get_etag(metadata: &Metadata) -> String { + let inode = get_inode(metadata); + let size = metadata.len(); + let mtime = metadata + .modified() + .ok() + .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok()) + .unwrap_or_default() + .as_micros(); + + // Use an ETag scheme based on that used by many popular HTTP servers + // + // + format!("{inode:x}-{mtime:x}-{size:x}") +} + +fn convert_metadata(metadata: Metadata, location: Path) -> ObjectMeta { + let last_modified = last_modified(&metadata); + + ObjectMeta { + location, + last_modified, + size: metadata.len(), + e_tag: Some(get_etag(&metadata)), + version: None, + } +} + +#[cfg(unix)] +/// We include the inode when available to yield an ETag more resistant to collisions +/// and as used by popular web servers such as [Apache](https://httpd.apache.org/docs/2.2/mod/core.html#fileetag) +fn get_inode(metadata: &Metadata) -> u64 { + std::os::unix::fs::MetadataExt::ino(metadata) +} + +#[cfg(not(unix))] +/// On platforms where an inode isn't available, fallback to just relying on size and mtime +fn get_inode(_metadata: &Metadata) -> u64 { + 0 +} + +/// Convert walkdir results and converts not-found errors into `None`. +/// Convert broken symlinks to `None`. +fn convert_walkdir_result( + res: std::result::Result, +) -> Result> { + match res { + Ok(entry) => { + // To check for broken symlink: call symlink_metadata() - it does not traverse symlinks); + // if ok: check if entry is symlink; and try to read it by calling metadata(). + match symlink_metadata(entry.path()) { + Ok(attr) => { + if attr.is_symlink() { + let target_metadata = metadata(entry.path()); + match target_metadata { + Ok(_) => { + // symlink is valid + Ok(Some(entry)) + } + Err(_) => { + // this is a broken symlink, return None + Ok(None) + } + } + } else { + Ok(Some(entry)) + } + } + Err(_) => Ok(None), + } + } + + Err(walkdir_err) => match walkdir_err.io_error() { + Some(io_err) => match io_err.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(Error::UnableToWalkDir { + source: walkdir_err, + } + .into()), + }, + None => Err(Error::UnableToWalkDir { + source: walkdir_err, + } + .into()), + }, + } +} + +#[cfg(test)] +mod tests { + use std::fs; + + use futures::TryStreamExt; + use tempfile::TempDir; + + #[cfg(target_family = "unix")] + use tempfile::NamedTempFile; + + use crate::integration::*; + + use super::*; + + #[tokio::test] + #[cfg(target_family = "unix")] + async fn file_test() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + copy_rename_nonexistent_object(&integration).await; + stream_get(&integration).await; + put_opts(&integration, false).await; + } + + #[test] + #[cfg(target_family = "unix")] + fn test_non_tokio() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + futures::executor::block_on(async move { + put_get_delete_list(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + + // Can't use stream_get test as WriteMultipart uses a tokio JoinSet + let p = Path::from("manual_upload"); + let mut upload = integration.put_multipart(&p).await.unwrap(); + upload.put_part("123".into()).await.unwrap(); + upload.put_part("45678".into()).await.unwrap(); + let r = upload.complete().await.unwrap(); + + let get = integration.get(&p).await.unwrap(); + assert_eq!(get.meta.e_tag.as_ref().unwrap(), r.e_tag.as_ref().unwrap()); + let actual = get.bytes().await.unwrap(); + assert_eq!(actual.as_ref(), b"12345678"); + }); + } + + #[tokio::test] + async fn creates_dir_if_not_present() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from("nested/file/test_file"); + + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + let read_data = integration + .get(&location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + assert_eq!(&*read_data, data); + } + + #[tokio::test] + async fn unknown_length() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from("some_file"); + + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + let read_data = integration + .get(&location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + assert_eq!(&*read_data, data); + } + + #[tokio::test] + async fn range_request_start_beyond_end_of_file() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from("some_file"); + + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + integration + .get_range(&location, 100..200) + .await + .expect_err("Should error with start range beyond end of file"); + } + + #[tokio::test] + async fn range_request_beyond_end_of_file() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from("some_file"); + + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + let read_data = integration.get_range(&location, 0..100).await.unwrap(); + assert_eq!(&*read_data, data); + } + + #[tokio::test] + #[cfg(target_family = "unix")] + // Fails on github actions runner (which runs the tests as root) + #[ignore] + async fn bubble_up_io_errors() { + use std::{fs::set_permissions, os::unix::prelude::PermissionsExt}; + + let root = TempDir::new().unwrap(); + + // make non-readable + let metadata = root.path().metadata().unwrap(); + let mut permissions = metadata.permissions(); + permissions.set_mode(0o000); + set_permissions(root.path(), permissions).unwrap(); + + let store = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let mut stream = store.list(None); + let mut any_err = false; + while let Some(res) = stream.next().await { + if res.is_err() { + any_err = true; + } + } + assert!(any_err); + + // `list_with_delimiter + assert!(store.list_with_delimiter(None).await.is_err()); + } + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn get_nonexistent_location() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from(NON_EXISTENT_NAME); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + if let crate::Error::NotFound { path, source } = err { + let source_variant = source.downcast_ref::(); + assert!( + matches!(source_variant, Some(std::io::Error { .. }),), + "got: {source_variant:?}" + ); + assert!(path.ends_with(NON_EXISTENT_NAME), "{}", path); + } else { + panic!("unexpected error type: {err:?}"); + } + } + + #[tokio::test] + async fn root() { + let integration = LocalFileSystem::new(); + + let canonical = std::path::Path::new("Cargo.toml").canonicalize().unwrap(); + let url = Url::from_directory_path(&canonical).unwrap(); + let path = Path::parse(url.path()).unwrap(); + + let roundtrip = integration.path_to_filesystem(&path).unwrap(); + + // Needed as on Windows canonicalize returns extended length path syntax + // C:\Users\circleci -> \\?\C:\Users\circleci + let roundtrip = roundtrip.canonicalize().unwrap(); + + assert_eq!(roundtrip, canonical); + + integration.head(&path).await.unwrap(); + } + + #[tokio::test] + #[cfg(target_family = "windows")] + async fn test_list_root() { + let fs = LocalFileSystem::new(); + let r = fs.list_with_delimiter(None).await.unwrap_err().to_string(); + + assert!( + r.contains("Unable to convert URL \"file:///\" to filesystem path"), + "{}", + r + ); + } + + #[tokio::test] + #[cfg(target_os = "linux")] + async fn test_list_root() { + let fs = LocalFileSystem::new(); + fs.list_with_delimiter(None).await.unwrap(); + } + + #[cfg(target_family = "unix")] + async fn check_list(integration: &LocalFileSystem, prefix: Option<&Path>, expected: &[&str]) { + let result: Vec<_> = integration.list(prefix).try_collect().await.unwrap(); + + let mut strings: Vec<_> = result.iter().map(|x| x.location.as_ref()).collect(); + strings.sort_unstable(); + assert_eq!(&strings, expected) + } + + #[tokio::test] + #[cfg(target_family = "unix")] + async fn test_symlink() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let subdir = root.path().join("a"); + std::fs::create_dir(&subdir).unwrap(); + let file = subdir.join("file.parquet"); + std::fs::write(file, "test").unwrap(); + + check_list(&integration, None, &["a/file.parquet"]).await; + integration + .head(&Path::from("a/file.parquet")) + .await + .unwrap(); + + // Follow out of tree symlink + let other = NamedTempFile::new().unwrap(); + std::os::unix::fs::symlink(other.path(), root.path().join("test.parquet")).unwrap(); + + // Should return test.parquet even though out of tree + check_list(&integration, None, &["a/file.parquet", "test.parquet"]).await; + + // Can fetch test.parquet + integration.head(&Path::from("test.parquet")).await.unwrap(); + + // Follow in tree symlink + std::os::unix::fs::symlink(&subdir, root.path().join("b")).unwrap(); + check_list( + &integration, + None, + &["a/file.parquet", "b/file.parquet", "test.parquet"], + ) + .await; + check_list(&integration, Some(&Path::from("b")), &["b/file.parquet"]).await; + + // Can fetch through symlink + integration + .head(&Path::from("b/file.parquet")) + .await + .unwrap(); + + // Ignore broken symlink + std::os::unix::fs::symlink(root.path().join("foo.parquet"), root.path().join("c")).unwrap(); + + check_list( + &integration, + None, + &["a/file.parquet", "b/file.parquet", "test.parquet"], + ) + .await; + + let mut r = integration.list_with_delimiter(None).await.unwrap(); + r.common_prefixes.sort_unstable(); + assert_eq!(r.common_prefixes.len(), 2); + assert_eq!(r.common_prefixes[0].as_ref(), "a"); + assert_eq!(r.common_prefixes[1].as_ref(), "b"); + assert_eq!(r.objects.len(), 1); + assert_eq!(r.objects[0].location.as_ref(), "test.parquet"); + + let r = integration + .list_with_delimiter(Some(&Path::from("a"))) + .await + .unwrap(); + assert_eq!(r.common_prefixes.len(), 0); + assert_eq!(r.objects.len(), 1); + assert_eq!(r.objects[0].location.as_ref(), "a/file.parquet"); + + // Deleting a symlink doesn't delete the source file + integration + .delete(&Path::from("test.parquet")) + .await + .unwrap(); + assert!(other.path().exists()); + + check_list(&integration, None, &["a/file.parquet", "b/file.parquet"]).await; + + // Deleting through a symlink deletes both files + integration + .delete(&Path::from("b/file.parquet")) + .await + .unwrap(); + + check_list(&integration, None, &[]).await; + + // Adding a file through a symlink creates in both paths + integration + .put(&Path::from("b/file.parquet"), vec![0, 1, 2].into()) + .await + .unwrap(); + + check_list(&integration, None, &["a/file.parquet", "b/file.parquet"]).await; + } + + #[tokio::test] + async fn invalid_path() { + let root = TempDir::new().unwrap(); + let root = root.path().join("🙀"); + std::fs::create_dir(root.clone()).unwrap(); + + // Invalid paths supported above root of store + let integration = LocalFileSystem::new_with_prefix(root.clone()).unwrap(); + + let directory = Path::from("directory"); + let object = directory.child("child.txt"); + let data = Bytes::from("arbitrary"); + integration.put(&object, data.clone().into()).await.unwrap(); + integration.head(&object).await.unwrap(); + let result = integration.get(&object).await.unwrap(); + assert_eq!(result.bytes().await.unwrap(), data); + + flatten_list_stream(&integration, None).await.unwrap(); + flatten_list_stream(&integration, Some(&directory)) + .await + .unwrap(); + + let result = integration + .list_with_delimiter(Some(&directory)) + .await + .unwrap(); + assert_eq!(result.objects.len(), 1); + assert!(result.common_prefixes.is_empty()); + assert_eq!(result.objects[0].location, object); + + let emoji = root.join("💀"); + std::fs::write(emoji, "foo").unwrap(); + + // Can list illegal file + let mut paths = flatten_list_stream(&integration, None).await.unwrap(); + paths.sort_unstable(); + + assert_eq!( + paths, + vec![ + Path::parse("directory/child.txt").unwrap(), + Path::parse("💀").unwrap() + ] + ); + } + + #[tokio::test] + async fn list_hides_incomplete_uploads() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + let location = Path::from("some_file"); + + let data = PutPayload::from("arbitrary data"); + let mut u1 = integration.put_multipart(&location).await.unwrap(); + u1.put_part(data.clone()).await.unwrap(); + + let mut u2 = integration.put_multipart(&location).await.unwrap(); + u2.put_part(data).await.unwrap(); + + let list = flatten_list_stream(&integration, None).await.unwrap(); + assert_eq!(list.len(), 0); + + assert_eq!( + integration + .list_with_delimiter(None) + .await + .unwrap() + .objects + .len(), + 0 + ); + } + + #[tokio::test] + async fn test_path_with_offset() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let root_path = root.path(); + for i in 0..5 { + let filename = format!("test{i}.parquet"); + let file = root_path.join(filename); + std::fs::write(file, "test").unwrap(); + } + let filter_str = "test"; + let filter = String::from(filter_str); + let offset_str = filter + "1"; + let offset = Path::from(offset_str.clone()); + + // Use list_with_offset to retrieve files + let res = integration.list_with_offset(None, &offset); + let offset_paths: Vec<_> = res.map_ok(|x| x.location).try_collect().await.unwrap(); + let mut offset_files: Vec<_> = offset_paths + .iter() + .map(|x| String::from(x.filename().unwrap())) + .collect(); + + // Check result with direct filesystem read + let files = fs::read_dir(root_path).unwrap(); + let filtered_files = files + .filter_map(Result::ok) + .filter_map(|d| { + d.file_name().to_str().and_then(|f| { + if f.contains(filter_str) { + Some(String::from(f)) + } else { + None + } + }) + }) + .collect::>(); + + let mut expected_offset_files: Vec<_> = filtered_files + .iter() + .filter(|s| **s > offset_str) + .cloned() + .collect(); + + fn do_vecs_match(a: &[T], b: &[T]) -> bool { + let matching = a.iter().zip(b.iter()).filter(|&(a, b)| a == b).count(); + matching == a.len() && matching == b.len() + } + + offset_files.sort(); + expected_offset_files.sort(); + + // println!("Expected Offset Files: {:?}", expected_offset_files); + // println!("Actual Offset Files: {:?}", offset_files); + + assert_eq!(offset_files.len(), expected_offset_files.len()); + assert!(do_vecs_match(&expected_offset_files, &offset_files)); + } + + #[tokio::test] + async fn filesystem_filename_with_percent() { + let temp_dir = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(temp_dir.path()).unwrap(); + let filename = "L%3ABC.parquet"; + + std::fs::write(temp_dir.path().join(filename), "foo").unwrap(); + + let res: Vec<_> = integration.list(None).try_collect().await.unwrap(); + assert_eq!(res.len(), 1); + assert_eq!(res[0].location.as_ref(), filename); + + let res = integration.list_with_delimiter(None).await.unwrap(); + assert_eq!(res.objects.len(), 1); + assert_eq!(res.objects[0].location.as_ref(), filename); + } + + #[tokio::test] + async fn relative_paths() { + LocalFileSystem::new_with_prefix(".").unwrap(); + LocalFileSystem::new_with_prefix("..").unwrap(); + LocalFileSystem::new_with_prefix("../..").unwrap(); + + let integration = LocalFileSystem::new(); + let path = Path::from_filesystem_path(".").unwrap(); + integration.list_with_delimiter(Some(&path)).await.unwrap(); + } + + #[test] + fn test_valid_path() { + let cases = [ + ("foo#123/test.txt", true), + ("foo#123/test#23.txt", true), + ("foo#123/test#34", false), + ("foo😁/test#34", false), + ("foo/test#😁34", true), + ]; + + for (case, expected) in cases { + let path = Path::parse(case).unwrap(); + assert_eq!(is_valid_file_path(&path), expected); + } + } + + #[tokio::test] + async fn test_intermediate_files() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let a = Path::parse("foo#123/test.txt").unwrap(); + integration.put(&a, "test".into()).await.unwrap(); + + let list = flatten_list_stream(&integration, None).await.unwrap(); + assert_eq!(list, vec![a.clone()]); + + std::fs::write(root.path().join("bar#123"), "test").unwrap(); + + // Should ignore file + let list = flatten_list_stream(&integration, None).await.unwrap(); + assert_eq!(list, vec![a.clone()]); + + let b = Path::parse("bar#123").unwrap(); + let err = integration.get(&b).await.unwrap_err().to_string(); + assert_eq!(err, "Generic LocalFileSystem error: Filenames containing trailing '/#\\d+/' are not supported: bar#123"); + + let c = Path::parse("foo#123.txt").unwrap(); + integration.put(&c, "test".into()).await.unwrap(); + + let mut list = flatten_list_stream(&integration, None).await.unwrap(); + list.sort_unstable(); + assert_eq!(list, vec![c, a]); + } + + #[tokio::test] + #[cfg(target_os = "windows")] + async fn filesystem_filename_with_colon() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + let path = Path::parse("file%3Aname.parquet").unwrap(); + let location = Path::parse("file:name.parquet").unwrap(); + + integration.put(&location, "test".into()).await.unwrap(); + let list = flatten_list_stream(&integration, None).await.unwrap(); + assert_eq!(list, vec![path.clone()]); + + let result = integration + .get(&location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + assert_eq!(result, Bytes::from("test")); + } + + #[tokio::test] + async fn delete_dirs_automatically() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()) + .unwrap() + .with_automatic_cleanup(true); + let location = Path::from("nested/file/test_file"); + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + let read_data = integration + .get(&location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + + assert_eq!(&*read_data, data); + assert!(fs::read_dir(root.path()).unwrap().count() > 0); + integration.delete(&location).await.unwrap(); + assert!(fs::read_dir(root.path()).unwrap().count() == 0); + } +} + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(test)] +mod not_wasm_tests { + use std::time::Duration; + use tempfile::TempDir; + + use crate::local::LocalFileSystem; + use crate::{ObjectStore, Path, PutPayload}; + + #[tokio::test] + async fn test_cleanup_intermediate_files() { + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + + let location = Path::from("some_file"); + let data = PutPayload::from_static(b"hello"); + let mut upload = integration.put_multipart(&location).await.unwrap(); + upload.put_part(data).await.unwrap(); + + let file_count = std::fs::read_dir(root.path()).unwrap().count(); + assert_eq!(file_count, 1); + drop(upload); + + for _ in 0..100 { + tokio::time::sleep(Duration::from_millis(1)).await; + let file_count = std::fs::read_dir(root.path()).unwrap().count(); + if file_count == 0 { + return; + } + } + panic!("Failed to cleanup file in 100ms") + } +} + +#[cfg(target_family = "unix")] +#[cfg(test)] +mod unix_test { + use std::fs::OpenOptions; + + use nix::sys::stat; + use nix::unistd; + use tempfile::TempDir; + + use crate::local::LocalFileSystem; + use crate::{ObjectStore, Path}; + + #[tokio::test] + async fn test_fifo() { + let filename = "some_file"; + let root = TempDir::new().unwrap(); + let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + let path = root.path().join(filename); + unistd::mkfifo(&path, stat::Mode::S_IRWXU).unwrap(); + + // Need to open read and write side in parallel + let spawned = + tokio::task::spawn_blocking(|| OpenOptions::new().write(true).open(path).unwrap()); + + let location = Path::from(filename); + integration.head(&location).await.unwrap(); + integration.get(&location).await.unwrap(); + + spawned.await.unwrap(); + } +} diff --git a/rust/object_store/src/memory.rs b/rust/object_store/src/memory.rs new file mode 100644 index 0000000000..e15c2465fb --- /dev/null +++ b/rust/object_store/src/memory.rs @@ -0,0 +1,630 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An in-memory object store implementation +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::ops::Range; +use std::sync::Arc; + +use async_trait::async_trait; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use futures::{stream::BoxStream, StreamExt}; +use parking_lot::RwLock; + +use crate::multipart::{MultipartStore, PartId}; +use crate::util::InvalidGetRange; +use crate::{ + path::Path, Attributes, GetRange, GetResult, GetResultPayload, ListResult, MultipartId, + MultipartUpload, ObjectMeta, ObjectStore, PutMode, PutMultipartOptions, PutOptions, PutResult, + Result, UpdateVersion, UploadPart, +}; +use crate::{GetOptions, PutPayload}; + +/// A specialized `Error` for in-memory object store-related errors +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("No data in memory found. Location: {path}")] + NoDataInMemory { path: String }, + + #[error("Invalid range: {source}")] + Range { source: InvalidGetRange }, + + #[error("Object already exists at that location: {path}")] + AlreadyExists { path: String }, + + #[error("ETag required for conditional update")] + MissingETag, + + #[error("MultipartUpload not found: {id}")] + UploadNotFound { id: String }, + + #[error("Missing part at index: {part}")] + MissingPart { part: usize }, +} + +impl From for super::Error { + fn from(source: Error) -> Self { + match source { + Error::NoDataInMemory { ref path } => Self::NotFound { + path: path.into(), + source: source.into(), + }, + Error::AlreadyExists { ref path } => Self::AlreadyExists { + path: path.into(), + source: source.into(), + }, + _ => Self::Generic { + store: "InMemory", + source: Box::new(source), + }, + } + } +} + +/// In-memory storage suitable for testing or for opting out of using a cloud +/// storage provider. +#[derive(Debug, Default)] +pub struct InMemory { + storage: SharedStorage, +} + +#[derive(Debug, Clone)] +struct Entry { + data: Bytes, + last_modified: DateTime, + attributes: Attributes, + e_tag: usize, +} + +impl Entry { + fn new( + data: Bytes, + last_modified: DateTime, + e_tag: usize, + attributes: Attributes, + ) -> Self { + Self { + data, + last_modified, + e_tag, + attributes, + } + } +} + +#[derive(Debug, Default, Clone)] +struct Storage { + next_etag: usize, + map: BTreeMap, + uploads: HashMap, +} + +#[derive(Debug, Default, Clone)] +struct PartStorage { + parts: Vec>, +} + +type SharedStorage = Arc>; + +impl Storage { + fn insert(&mut self, location: &Path, bytes: Bytes, attributes: Attributes) -> usize { + let etag = self.next_etag; + self.next_etag += 1; + let entry = Entry::new(bytes, Utc::now(), etag, attributes); + self.overwrite(location, entry); + etag + } + + fn overwrite(&mut self, location: &Path, entry: Entry) { + self.map.insert(location.clone(), entry); + } + + fn create(&mut self, location: &Path, entry: Entry) -> Result<()> { + use std::collections::btree_map; + match self.map.entry(location.clone()) { + btree_map::Entry::Occupied(_) => Err(Error::AlreadyExists { + path: location.to_string(), + } + .into()), + btree_map::Entry::Vacant(v) => { + v.insert(entry); + Ok(()) + } + } + } + + fn update(&mut self, location: &Path, v: UpdateVersion, entry: Entry) -> Result<()> { + match self.map.get_mut(location) { + // Return Precondition instead of NotFound for consistency with stores + None => Err(crate::Error::Precondition { + path: location.to_string(), + source: format!("Object at location {location} not found").into(), + }), + Some(e) => { + let existing = e.e_tag.to_string(); + let expected = v.e_tag.ok_or(Error::MissingETag)?; + if existing == expected { + *e = entry; + Ok(()) + } else { + Err(crate::Error::Precondition { + path: location.to_string(), + source: format!("{existing} does not match {expected}").into(), + }) + } + } + } + } + + fn upload_mut(&mut self, id: &MultipartId) -> Result<&mut PartStorage> { + let parts = id + .parse() + .ok() + .and_then(|x| self.uploads.get_mut(&x)) + .ok_or_else(|| Error::UploadNotFound { id: id.into() })?; + Ok(parts) + } + + fn remove_upload(&mut self, id: &MultipartId) -> Result { + let parts = id + .parse() + .ok() + .and_then(|x| self.uploads.remove(&x)) + .ok_or_else(|| Error::UploadNotFound { id: id.into() })?; + Ok(parts) + } +} + +impl std::fmt::Display for InMemory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "InMemory") + } +} + +#[async_trait] +impl ObjectStore for InMemory { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let mut storage = self.storage.write(); + let etag = storage.next_etag; + let entry = Entry::new(payload.into(), Utc::now(), etag, opts.attributes); + + match opts.mode { + PutMode::Overwrite => storage.overwrite(location, entry), + PutMode::Create => storage.create(location, entry)?, + PutMode::Update(v) => storage.update(location, v, entry)?, + } + storage.next_etag += 1; + + Ok(PutResult { + e_tag: Some(etag.to_string()), + version: None, + }) + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + Ok(Box::new(InMemoryUpload { + location: location.clone(), + attributes: opts.attributes, + parts: vec![], + storage: Arc::clone(&self.storage), + })) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let entry = self.entry(location)?; + let e_tag = entry.e_tag.to_string(); + + let meta = ObjectMeta { + location: location.clone(), + last_modified: entry.last_modified, + size: entry.data.len() as u64, + e_tag: Some(e_tag), + version: None, + }; + options.check_preconditions(&meta)?; + + let (range, data) = match options.range { + Some(range) => { + let r = range + .as_range(entry.data.len() as u64) + .map_err(|source| Error::Range { source })?; + ( + r.clone(), + entry.data.slice(r.start as usize..r.end as usize), + ) + } + None => (0..entry.data.len() as u64, entry.data), + }; + let stream = futures::stream::once(futures::future::ready(Ok(data))); + + Ok(GetResult { + payload: GetResultPayload::Stream(stream.boxed()), + attributes: entry.attributes, + meta, + range, + }) + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + let entry = self.entry(location)?; + ranges + .iter() + .map(|range| { + let r = GetRange::Bounded(range.clone()) + .as_range(entry.data.len() as u64) + .map_err(|source| Error::Range { source })?; + let r_end = usize::try_from(r.end).map_err(|_e| Error::Range { + source: InvalidGetRange::TooLarge { + requested: r.end, + max: usize::MAX as u64, + }, + })?; + let r_start = usize::try_from(r.start).map_err(|_e| Error::Range { + source: InvalidGetRange::TooLarge { + requested: r.start, + max: usize::MAX as u64, + }, + })?; + Ok(entry.data.slice(r_start..r_end)) + }) + .collect() + } + + async fn head(&self, location: &Path) -> Result { + let entry = self.entry(location)?; + + Ok(ObjectMeta { + location: location.clone(), + last_modified: entry.last_modified, + size: entry.data.len() as u64, + e_tag: Some(entry.e_tag.to_string()), + version: None, + }) + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.storage.write().map.remove(location); + Ok(()) + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + let root = Path::default(); + let prefix = prefix.unwrap_or(&root); + + let storage = self.storage.read(); + let values: Vec<_> = storage + .map + .range((prefix)..) + .take_while(|(key, _)| key.as_ref().starts_with(prefix.as_ref())) + .filter(|(key, _)| { + // Don't return for exact prefix match + key.prefix_match(prefix) + .map(|mut x| x.next().is_some()) + .unwrap_or(false) + }) + .map(|(key, value)| { + Ok(ObjectMeta { + location: key.clone(), + last_modified: value.last_modified, + size: value.data.len() as u64, + e_tag: Some(value.e_tag.to_string()), + version: None, + }) + }) + .collect(); + + futures::stream::iter(values).boxed() + } + + /// The memory implementation returns all results, as opposed to the cloud + /// versions which limit their results to 1k or more because of API + /// limitations. + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let root = Path::default(); + let prefix = prefix.unwrap_or(&root); + + let mut common_prefixes = BTreeSet::new(); + + // Only objects in this base level should be returned in the + // response. Otherwise, we just collect the common prefixes. + let mut objects = vec![]; + for (k, v) in self.storage.read().map.range((prefix)..) { + if !k.as_ref().starts_with(prefix.as_ref()) { + break; + } + + let mut parts = match k.prefix_match(prefix) { + Some(parts) => parts, + None => continue, + }; + + // Pop first element + let common_prefix = match parts.next() { + Some(p) => p, + // Should only return children of the prefix + None => continue, + }; + + if parts.next().is_some() { + common_prefixes.insert(prefix.child(common_prefix)); + } else { + let object = ObjectMeta { + location: k.clone(), + last_modified: v.last_modified, + size: v.data.len() as u64, + e_tag: Some(v.e_tag.to_string()), + version: None, + }; + objects.push(object); + } + } + + Ok(ListResult { + objects, + common_prefixes: common_prefixes.into_iter().collect(), + }) + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let entry = self.entry(from)?; + self.storage + .write() + .insert(to, entry.data, entry.attributes); + Ok(()) + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let entry = self.entry(from)?; + let mut storage = self.storage.write(); + if storage.map.contains_key(to) { + return Err(Error::AlreadyExists { + path: to.to_string(), + } + .into()); + } + storage.insert(to, entry.data, entry.attributes); + Ok(()) + } +} + +#[async_trait] +impl MultipartStore for InMemory { + async fn create_multipart(&self, _path: &Path) -> Result { + let mut storage = self.storage.write(); + let etag = storage.next_etag; + storage.next_etag += 1; + storage.uploads.insert(etag, Default::default()); + Ok(etag.to_string()) + } + + async fn put_part( + &self, + _path: &Path, + id: &MultipartId, + part_idx: usize, + payload: PutPayload, + ) -> Result { + let mut storage = self.storage.write(); + let upload = storage.upload_mut(id)?; + if part_idx <= upload.parts.len() { + upload.parts.resize(part_idx + 1, None); + } + upload.parts[part_idx] = Some(payload.into()); + Ok(PartId { + content_id: Default::default(), + }) + } + + async fn complete_multipart( + &self, + path: &Path, + id: &MultipartId, + _parts: Vec, + ) -> Result { + let mut storage = self.storage.write(); + let upload = storage.remove_upload(id)?; + + let mut cap = 0; + for (part, x) in upload.parts.iter().enumerate() { + cap += x.as_ref().ok_or(Error::MissingPart { part })?.len(); + } + let mut buf = Vec::with_capacity(cap); + for x in &upload.parts { + buf.extend_from_slice(x.as_ref().unwrap()) + } + let etag = storage.insert(path, buf.into(), Default::default()); + Ok(PutResult { + e_tag: Some(etag.to_string()), + version: None, + }) + } + + async fn abort_multipart(&self, _path: &Path, id: &MultipartId) -> Result<()> { + self.storage.write().remove_upload(id)?; + Ok(()) + } +} + +impl InMemory { + /// Create new in-memory storage. + pub fn new() -> Self { + Self::default() + } + + /// Creates a fork of the store, with the current content copied into the + /// new store. + pub fn fork(&self) -> Self { + let storage = self.storage.read(); + let storage = Arc::new(RwLock::new(storage.clone())); + Self { storage } + } + + fn entry(&self, location: &Path) -> Result { + let storage = self.storage.read(); + let value = storage + .map + .get(location) + .cloned() + .ok_or_else(|| Error::NoDataInMemory { + path: location.to_string(), + })?; + + Ok(value) + } +} + +#[derive(Debug)] +struct InMemoryUpload { + location: Path, + attributes: Attributes, + parts: Vec, + storage: Arc>, +} + +#[async_trait] +impl MultipartUpload for InMemoryUpload { + fn put_part(&mut self, payload: PutPayload) -> UploadPart { + self.parts.push(payload); + Box::pin(futures::future::ready(Ok(()))) + } + + async fn complete(&mut self) -> Result { + let cap = self.parts.iter().map(|x| x.content_length()).sum(); + let mut buf = Vec::with_capacity(cap); + let parts = self.parts.iter().flatten(); + parts.for_each(|x| buf.extend_from_slice(x)); + let etag = self.storage.write().insert( + &self.location, + buf.into(), + std::mem::take(&mut self.attributes), + ); + + Ok(PutResult { + e_tag: Some(etag.to_string()), + version: None, + }) + } + + async fn abort(&mut self) -> Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::integration::*; + + use super::*; + + #[tokio::test] + async fn in_memory_test() { + let integration = InMemory::new(); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + put_opts(&integration, true).await; + multipart(&integration, &integration).await; + put_get_attributes(&integration).await; + } + + #[tokio::test] + async fn box_test() { + let integration: Box = Box::new(InMemory::new()); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + } + + #[tokio::test] + async fn arc_test() { + let integration: Arc = Arc::new(InMemory::new()); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + } + + #[tokio::test] + async fn unknown_length() { + let integration = InMemory::new(); + + let location = Path::from("some_file"); + + let data = Bytes::from("arbitrary data"); + + integration + .put(&location, data.clone().into()) + .await + .unwrap(); + + let read_data = integration + .get(&location) + .await + .unwrap() + .bytes() + .await + .unwrap(); + assert_eq!(&*read_data, data); + } + + const NON_EXISTENT_NAME: &str = "nonexistentname"; + + #[tokio::test] + async fn nonexistent_location() { + let integration = InMemory::new(); + + let location = Path::from(NON_EXISTENT_NAME); + + let err = get_nonexistent_object(&integration, Some(location)) + .await + .unwrap_err(); + if let crate::Error::NotFound { path, source } = err { + let source_variant = source.downcast_ref::(); + assert!( + matches!(source_variant, Some(Error::NoDataInMemory { .. }),), + "got: {source_variant:?}" + ); + assert_eq!(path, NON_EXISTENT_NAME); + } else { + panic!("unexpected error type: {err:?}"); + } + } +} diff --git a/rust/object_store/src/multipart.rs b/rust/object_store/src/multipart.rs new file mode 100644 index 0000000000..d94e7f1505 --- /dev/null +++ b/rust/object_store/src/multipart.rs @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Cloud Multipart Upload +//! +//! This crate provides an asynchronous interface for multipart file uploads to +//! cloud storage services. It's designed to offer efficient, non-blocking operations, +//! especially useful when dealing with large files or high-throughput systems. + +use async_trait::async_trait; + +use crate::path::Path; +use crate::{MultipartId, PutPayload, PutResult, Result}; + +/// Represents a part of a file that has been successfully uploaded in a multipart upload process. +#[derive(Debug, Clone)] +pub struct PartId { + /// Id of this part + pub content_id: String, +} + +/// A low-level interface for interacting with multipart upload APIs +/// +/// Most use-cases should prefer [`ObjectStore::put_multipart`] as this is supported by more +/// backends, including [`LocalFileSystem`], and automatically handles uploading fixed +/// size parts of sufficient size in parallel +/// +/// [`ObjectStore::put_multipart`]: crate::ObjectStore::put_multipart +/// [`LocalFileSystem`]: crate::local::LocalFileSystem +#[async_trait] +pub trait MultipartStore: Send + Sync + 'static { + /// Creates a new multipart upload, returning the [`MultipartId`] + async fn create_multipart(&self, path: &Path) -> Result; + + /// Uploads a new part with index `part_idx` + /// + /// `part_idx` should be an integer in the range `0..N` where `N` is the number of + /// parts in the upload. Parts may be uploaded concurrently and in any order. + /// + /// Most stores require that all parts excluding the last are at least 5 MiB, and some + /// further require that all parts excluding the last be the same size, e.g. [R2]. + /// [`WriteMultipart`] performs writes in fixed size blocks of 5 MiB, and clients wanting + /// to maximise compatibility should look to do likewise. + /// + /// [R2]: https://developers.cloudflare.com/r2/objects/multipart-objects/#limitations + /// [`WriteMultipart`]: crate::upload::WriteMultipart + async fn put_part( + &self, + path: &Path, + id: &MultipartId, + part_idx: usize, + data: PutPayload, + ) -> Result; + + /// Completes a multipart upload + /// + /// The `i`'th value of `parts` must be a [`PartId`] returned by a call to [`Self::put_part`] + /// with a `part_idx` of `i`, and the same `path` and `id` as provided to this method. Calling + /// this method with out of sequence or repeated [`PartId`], or [`PartId`] returned for other + /// values of `path` or `id`, will result in implementation-defined behaviour + async fn complete_multipart( + &self, + path: &Path, + id: &MultipartId, + parts: Vec, + ) -> Result; + + /// Aborts a multipart upload + async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()>; +} diff --git a/rust/object_store/src/parse.rs b/rust/object_store/src/parse.rs new file mode 100644 index 0000000000..b1f653c51f --- /dev/null +++ b/rust/object_store/src/parse.rs @@ -0,0 +1,424 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(all(feature = "fs", not(target_arch = "wasm32")))] +use crate::local::LocalFileSystem; +use crate::memory::InMemory; +use crate::path::Path; +use crate::ObjectStore; +use url::Url; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Unable to recognise URL \"{}\"", url)] + Unrecognised { url: Url }, + + #[error(transparent)] + Path { + #[from] + source: crate::path::Error, + }, +} + +impl From for super::Error { + fn from(e: Error) -> Self { + Self::Generic { + store: "URL", + source: Box::new(e), + } + } +} + +/// Recognizes various URL formats, identifying the relevant [`ObjectStore`] +/// +/// See [`ObjectStoreScheme::parse`] for more details +/// +/// # Supported formats: +/// - `file:///path/to/my/file` -> [`LocalFileSystem`] +/// - `memory:///` -> [`InMemory`] +/// - `s3://bucket/path` -> [`AmazonS3`](crate::aws::AmazonS3) (also supports `s3a`) +/// - `gs://bucket/path` -> [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage) +/// - `az://account/container/path` -> [`MicrosoftAzure`](crate::azure::MicrosoftAzure) (also supports `adl`, `azure`, `abfs`, `abfss`) +/// - `http://mydomain/path` -> [`HttpStore`](crate::http::HttpStore) +/// - `https://mydomain/path` -> [`HttpStore`](crate::http::HttpStore) +/// +/// There are also special cases for AWS and Azure for `https://{host?}/path` paths: +/// - `dfs.core.windows.net`, `blob.core.windows.net`, `dfs.fabric.microsoft.com`, `blob.fabric.microsoft.com` -> [`MicrosoftAzure`](crate::azure::MicrosoftAzure) +/// - `amazonaws.com` -> [`AmazonS3`](crate::aws::AmazonS3) +/// - `r2.cloudflarestorage.com` -> [`AmazonS3`](crate::aws::AmazonS3) +/// +#[non_exhaustive] // permit new variants +#[derive(Debug, Eq, PartialEq, Clone)] +pub enum ObjectStoreScheme { + /// Url corresponding to [`LocalFileSystem`] + Local, + /// Url corresponding to [`InMemory`] + Memory, + /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3) + AmazonS3, + /// Url corresponding to [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage) + GoogleCloudStorage, + /// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure) + MicrosoftAzure, + /// Url corresponding to [`HttpStore`](crate::http::HttpStore) + Http, +} + +impl ObjectStoreScheme { + /// Create an [`ObjectStoreScheme`] from the provided [`Url`] + /// + /// Returns the [`ObjectStoreScheme`] and the remaining [`Path`] + /// + /// # Example + /// ``` + /// # use url::Url; + /// # use object_store::ObjectStoreScheme; + /// let url: Url = "file:///path/to/my/file".parse().unwrap(); + /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap(); + /// assert_eq!(scheme, ObjectStoreScheme::Local); + /// assert_eq!(path.as_ref(), "path/to/my/file"); + /// + /// let url: Url = "https://blob.core.windows.net/container/path/to/my/file".parse().unwrap(); + /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap(); + /// assert_eq!(scheme, ObjectStoreScheme::MicrosoftAzure); + /// assert_eq!(path.as_ref(), "path/to/my/file"); + /// + /// let url: Url = "https://example.com/path/to/my/file".parse().unwrap(); + /// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap(); + /// assert_eq!(scheme, ObjectStoreScheme::Http); + /// assert_eq!(path.as_ref(), "path/to/my/file"); + /// ``` + pub fn parse(url: &Url) -> Result<(Self, Path), Error> { + let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1); + + let (scheme, path) = match (url.scheme(), url.host_str()) { + ("file", None) => (Self::Local, url.path()), + ("memory", None) => (Self::Memory, url.path()), + ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()), + ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()), + ("az", Some(_)) => (Self::MicrosoftAzure, strip_bucket().unwrap_or_default()), + ("adl" | "azure" | "abfs" | "abfss", Some(_)) => (Self::MicrosoftAzure, url.path()), + ("http", Some(_)) => (Self::Http, url.path()), + ("https", Some(host)) => { + if host.ends_with("dfs.core.windows.net") + || host.ends_with("blob.core.windows.net") + || host.ends_with("dfs.fabric.microsoft.com") + || host.ends_with("blob.fabric.microsoft.com") + { + (Self::MicrosoftAzure, strip_bucket().unwrap_or_default()) + } else if host.ends_with("amazonaws.com") { + match host.starts_with("s3") { + true => (Self::AmazonS3, strip_bucket().unwrap_or_default()), + false => (Self::AmazonS3, url.path()), + } + } else if host.ends_with("r2.cloudflarestorage.com") { + (Self::AmazonS3, strip_bucket().unwrap_or_default()) + } else { + (Self::Http, url.path()) + } + } + _ => return Err(Error::Unrecognised { url: url.clone() }), + }; + + Ok((scheme, Path::from_url_path(path)?)) + } +} + +#[cfg(feature = "cloud")] +macro_rules! builder_opts { + ($builder:ty, $url:expr, $options:expr) => {{ + let builder = $options.into_iter().fold( + <$builder>::new().with_url($url.to_string()), + |builder, (key, value)| match key.as_ref().parse() { + Ok(k) => builder.with_config(k, value), + Err(_) => builder, + }, + ); + Box::new(builder.build()?) as _ + }}; +} + +/// Create an [`ObjectStore`] based on the provided `url` +/// +/// Returns +/// - An [`ObjectStore`] of the corresponding type +/// - The [`Path`] into the [`ObjectStore`] of the addressed resource +pub fn parse_url(url: &Url) -> Result<(Box, Path), super::Error> { + parse_url_opts(url, std::iter::empty::<(&str, &str)>()) +} + +/// Create an [`ObjectStore`] based on the provided `url` and options +/// +/// This method can be used to create an instance of one of the provided +/// `ObjectStore` implementations based on the URL scheme (see +/// [`ObjectStoreScheme`] for more details). +/// +/// For example +/// * `file:///path/to/my/file` will return a [`LocalFileSystem`] instance +/// * `s3://bucket/path` will return an [`AmazonS3`] instance if the `aws` feature is enabled. +/// +/// Arguments: +/// * `url`: The URL to parse +/// * `options`: A list of key-value pairs to pass to the [`ObjectStore`] builder. +/// Note different object stores accept different configuration options, so +/// the options that are read depends on the `url` value. One common pattern +/// is to pass configuration information via process variables using +/// [`std::env::vars`]. +/// +/// Returns +/// - An [`ObjectStore`] of the corresponding type +/// - The [`Path`] into the [`ObjectStore`] of the addressed resource +/// +/// [`AmazonS3`]: https://docs.rs/object_store/0.12.0/object_store/aws/struct.AmazonS3.html +pub fn parse_url_opts( + url: &Url, + options: I, +) -> Result<(Box, Path), super::Error> +where + I: IntoIterator, + K: AsRef, + V: Into, +{ + let _options = options; + let (scheme, path) = ObjectStoreScheme::parse(url)?; + let path = Path::parse(path)?; + + let store = match scheme { + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()) as _, + ObjectStoreScheme::Memory => Box::new(InMemory::new()) as _, + #[cfg(feature = "aws")] + ObjectStoreScheme::AmazonS3 => { + builder_opts!(crate::aws::AmazonS3Builder, url, _options) + } + #[cfg(feature = "gcp")] + ObjectStoreScheme::GoogleCloudStorage => { + builder_opts!(crate::gcp::GoogleCloudStorageBuilder, url, _options) + } + #[cfg(feature = "azure")] + ObjectStoreScheme::MicrosoftAzure => { + builder_opts!(crate::azure::MicrosoftAzureBuilder, url, _options) + } + #[cfg(feature = "http")] + ObjectStoreScheme::Http => { + let url = &url[..url::Position::BeforePath]; + builder_opts!(crate::http::HttpBuilder, url, _options) + } + #[cfg(not(all( + feature = "fs", + feature = "aws", + feature = "azure", + feature = "gcp", + feature = "http", + not(target_arch = "wasm32") + )))] + s => { + return Err(super::Error::Generic { + store: "parse_url", + source: format!("feature for {s:?} not enabled").into(), + }) + } + }; + + Ok((store, path)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse() { + let cases = [ + ("file:/path", (ObjectStoreScheme::Local, "path")), + ("file:///path", (ObjectStoreScheme::Local, "path")), + ("memory:/path", (ObjectStoreScheme::Memory, "path")), + ("memory:///", (ObjectStoreScheme::Memory, "")), + ("s3://bucket/path", (ObjectStoreScheme::AmazonS3, "path")), + ("s3a://bucket/path", (ObjectStoreScheme::AmazonS3, "path")), + ( + "https://s3.region.amazonaws.com/bucket", + (ObjectStoreScheme::AmazonS3, ""), + ), + ( + "https://s3.region.amazonaws.com/bucket/path", + (ObjectStoreScheme::AmazonS3, "path"), + ), + ( + "https://bucket.s3.region.amazonaws.com", + (ObjectStoreScheme::AmazonS3, ""), + ), + ( + "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket", + (ObjectStoreScheme::AmazonS3, ""), + ), + ( + "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path", + (ObjectStoreScheme::AmazonS3, "path"), + ), + ( + "abfs://container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "abfs://file_system@account_name.dfs.core.windows.net/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "abfss://file_system@account_name.dfs.core.windows.net/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "https://account.dfs.core.windows.net", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.dfs.core.windows.net/container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "https://account.blob.core.windows.net", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.blob.core.windows.net/container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "az://account/container", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "az://account/container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "gs://bucket/path", + (ObjectStoreScheme::GoogleCloudStorage, "path"), + ), + ( + "gs://test.example.com/path", + (ObjectStoreScheme::GoogleCloudStorage, "path"), + ), + ("http://mydomain/path", (ObjectStoreScheme::Http, "path")), + ("https://mydomain/path", (ObjectStoreScheme::Http, "path")), + ( + "s3://bucket/foo%20bar", + (ObjectStoreScheme::AmazonS3, "foo bar"), + ), + ( + "s3://bucket/foo bar", + (ObjectStoreScheme::AmazonS3, "foo bar"), + ), + ("s3://bucket/😀", (ObjectStoreScheme::AmazonS3, "😀")), + ( + "s3://bucket/%F0%9F%98%80", + (ObjectStoreScheme::AmazonS3, "😀"), + ), + ( + "https://foo/bar%20baz", + (ObjectStoreScheme::Http, "bar baz"), + ), + ( + "file:///bar%252Efoo", + (ObjectStoreScheme::Local, "bar%2Efoo"), + ), + ( + "abfss://file_system@account.dfs.fabric.microsoft.com/", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "abfss://file_system@account.dfs.fabric.microsoft.com/", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.dfs.fabric.microsoft.com/", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.dfs.fabric.microsoft.com/container", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.dfs.fabric.microsoft.com/container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ( + "https://account.blob.fabric.microsoft.com/", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.blob.fabric.microsoft.com/container", + (ObjectStoreScheme::MicrosoftAzure, ""), + ), + ( + "https://account.blob.fabric.microsoft.com/container/path", + (ObjectStoreScheme::MicrosoftAzure, "path"), + ), + ]; + + for (s, (expected_scheme, expected_path)) in cases { + let url = Url::parse(s).unwrap(); + let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap(); + + assert_eq!(scheme, expected_scheme, "{s}"); + assert_eq!(path, Path::parse(expected_path).unwrap(), "{s}"); + } + + let neg_cases = [ + "unix:/run/foo.socket", + "file://remote/path", + "memory://remote/", + ]; + for s in neg_cases { + let url = Url::parse(s).unwrap(); + assert!(ObjectStoreScheme::parse(&url).is_err()); + } + } + + #[test] + fn test_url_spaces() { + let url = Url::parse("file:///my file with spaces").unwrap(); + assert_eq!(url.path(), "/my%20file%20with%20spaces"); + let (_, path) = parse_url(&url).unwrap(); + assert_eq!(path.as_ref(), "my file with spaces"); + } + + #[tokio::test] + #[cfg(all(feature = "http", not(target_arch = "wasm32")))] + async fn test_url_http() { + use crate::client::mock_server::MockServer; + use http::{header::USER_AGENT, Response}; + + let server = MockServer::new().await; + + server.push_fn(|r| { + assert_eq!(r.uri().path(), "/foo/bar"); + assert_eq!(r.headers().get(USER_AGENT).unwrap(), "test_url"); + Response::new(String::new()) + }); + + let test = format!("{}/foo/bar", server.url()); + let opts = [("user_agent", "test_url"), ("allow_http", "true")]; + let url = test.parse().unwrap(); + let (store, path) = parse_url_opts(&url, opts).unwrap(); + assert_eq!(path.as_ref(), "foo/bar"); + store.get(&path).await.unwrap(); + + server.shutdown().await; + } +} diff --git a/rust/object_store/src/path/mod.rs b/rust/object_store/src/path/mod.rs new file mode 100644 index 0000000000..f8affe8dfb --- /dev/null +++ b/rust/object_store/src/path/mod.rs @@ -0,0 +1,614 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Path abstraction for Object Storage + +use itertools::Itertools; +use percent_encoding::percent_decode; +use std::fmt::Formatter; +#[cfg(not(target_arch = "wasm32"))] +use url::Url; + +/// The delimiter to separate object namespaces, creating a directory structure. +pub const DELIMITER: &str = "/"; + +/// The path delimiter as a single byte +pub const DELIMITER_BYTE: u8 = DELIMITER.as_bytes()[0]; + +mod parts; + +pub use parts::{InvalidPart, PathPart}; + +/// Error returned by [`Path::parse`] +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum Error { + /// Error when there's an empty segment between two slashes `/` in the path + #[error("Path \"{}\" contained empty path segment", path)] + EmptySegment { + /// The source path + path: String, + }, + + /// Error when an invalid segment is encountered in the given path + #[error("Error parsing Path \"{}\": {}", path, source)] + BadSegment { + /// The source path + path: String, + /// The part containing the error + source: InvalidPart, + }, + + /// Error when path cannot be canonicalized + #[error("Failed to canonicalize path \"{}\": {}", path.display(), source)] + Canonicalize { + /// The source path + path: std::path::PathBuf, + /// The underlying error + source: std::io::Error, + }, + + /// Error when the path is not a valid URL + #[error("Unable to convert path \"{}\" to URL", path.display())] + InvalidPath { + /// The source path + path: std::path::PathBuf, + }, + + /// Error when a path contains non-unicode characters + #[error("Path \"{}\" contained non-unicode characters: {}", path, source)] + NonUnicode { + /// The source path + path: String, + /// The underlying `UTF8Error` + source: std::str::Utf8Error, + }, + + /// Error when the a path doesn't start with given prefix + #[error("Path {} does not start with prefix {}", path, prefix)] + PrefixMismatch { + /// The source path + path: String, + /// The mismatched prefix + prefix: String, + }, +} + +/// A parsed path representation that can be safely written to object storage +/// +/// A [`Path`] maintains the following invariants: +/// +/// * Paths are delimited by `/` +/// * Paths do not contain leading or trailing `/` +/// * Paths do not contain relative path segments, i.e. `.` or `..` +/// * Paths do not contain empty path segments +/// * Paths do not contain any ASCII control characters +/// +/// There are no enforced restrictions on path length, however, it should be noted that most +/// object stores do not permit paths longer than 1024 bytes, and many filesystems do not +/// support path segments longer than 255 bytes. +/// +/// # Encode +/// +/// In theory object stores support any UTF-8 character sequence, however, certain character +/// sequences cause compatibility problems with some applications and protocols. Additionally +/// some filesystems may impose character restrictions, see [`LocalFileSystem`]. As such the +/// naming guidelines for [S3], [GCS] and [Azure Blob Storage] all recommend sticking to a +/// limited character subset. +/// +/// [S3]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html +/// [GCS]: https://cloud.google.com/storage/docs/naming-objects +/// [Azure Blob Storage]: https://docs.microsoft.com/en-us/rest/api/storageservices/Naming-and-Referencing-Containers--Blobs--and-Metadata#blob-names +/// +/// A string containing potentially problematic path segments can therefore be encoded to a [`Path`] +/// using [`Path::from`] or [`Path::from_iter`]. This will percent encode any problematic +/// segments according to [RFC 1738]. +/// +/// ``` +/// # use object_store::path::Path; +/// assert_eq!(Path::from("foo/bar").as_ref(), "foo/bar"); +/// assert_eq!(Path::from("foo//bar").as_ref(), "foo/bar"); +/// assert_eq!(Path::from("foo/../bar").as_ref(), "foo/%2E%2E/bar"); +/// assert_eq!(Path::from("/").as_ref(), ""); +/// assert_eq!(Path::from_iter(["foo", "foo/bar"]).as_ref(), "foo/foo%2Fbar"); +/// ``` +/// +/// Note: if provided with an already percent encoded string, this will encode it again +/// +/// ``` +/// # use object_store::path::Path; +/// assert_eq!(Path::from("foo/foo%2Fbar").as_ref(), "foo/foo%252Fbar"); +/// ``` +/// +/// # Parse +/// +/// Alternatively a [`Path`] can be parsed from an existing string, returning an +/// error if it is invalid. Unlike the encoding methods above, this will permit +/// arbitrary unicode, including percent encoded sequences. +/// +/// ``` +/// # use object_store::path::Path; +/// assert_eq!(Path::parse("/foo/foo%2Fbar").unwrap().as_ref(), "foo/foo%2Fbar"); +/// Path::parse("..").unwrap_err(); // Relative path segments are disallowed +/// Path::parse("/foo//").unwrap_err(); // Empty path segments are disallowed +/// Path::parse("\x00").unwrap_err(); // ASCII control characters are disallowed +/// ``` +/// +/// [RFC 1738]: https://www.ietf.org/rfc/rfc1738.txt +/// [`LocalFileSystem`]: crate::local::LocalFileSystem +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct Path { + /// The raw path with no leading or trailing delimiters + raw: String, +} + +impl Path { + /// Parse a string as a [`Path`], returning a [`Error`] if invalid, + /// as defined on the docstring for [`Path`] + /// + /// Note: this will strip any leading `/` or trailing `/` + pub fn parse(path: impl AsRef) -> Result { + let path = path.as_ref(); + + let stripped = path.strip_prefix(DELIMITER).unwrap_or(path); + if stripped.is_empty() { + return Ok(Default::default()); + } + + let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped); + + for segment in stripped.split(DELIMITER) { + if segment.is_empty() { + return Err(Error::EmptySegment { path: path.into() }); + } + + PathPart::parse(segment).map_err(|source| { + let path = path.into(); + Error::BadSegment { source, path } + })?; + } + + Ok(Self { + raw: stripped.to_string(), + }) + } + + #[cfg(not(target_arch = "wasm32"))] + /// Convert a filesystem path to a [`Path`] relative to the filesystem root + /// + /// This will return an error if the path contains illegal character sequences + /// as defined on the docstring for [`Path`] or does not exist + /// + /// Note: this will canonicalize the provided path, resolving any symlinks + pub fn from_filesystem_path(path: impl AsRef) -> Result { + let absolute = std::fs::canonicalize(&path).map_err(|source| { + let path = path.as_ref().into(); + Error::Canonicalize { source, path } + })?; + + Self::from_absolute_path(absolute) + } + + #[cfg(not(target_arch = "wasm32"))] + /// Convert an absolute filesystem path to a [`Path`] relative to the filesystem root + /// + /// This will return an error if the path contains illegal character sequences, + /// as defined on the docstring for [`Path`], or `base` is not an absolute path + pub fn from_absolute_path(path: impl AsRef) -> Result { + Self::from_absolute_path_with_base(path, None) + } + + #[cfg(not(target_arch = "wasm32"))] + /// Convert a filesystem path to a [`Path`] relative to the provided base + /// + /// This will return an error if the path contains illegal character sequences, + /// as defined on the docstring for [`Path`], or `base` does not refer to a parent + /// path of `path`, or `base` is not an absolute path + pub(crate) fn from_absolute_path_with_base( + path: impl AsRef, + base: Option<&Url>, + ) -> Result { + let url = absolute_path_to_url(path)?; + let path = match base { + Some(prefix) => { + url.path() + .strip_prefix(prefix.path()) + .ok_or_else(|| Error::PrefixMismatch { + path: url.path().to_string(), + prefix: prefix.to_string(), + })? + } + None => url.path(), + }; + + // Reverse any percent encoding performed by conversion to URL + Self::from_url_path(path) + } + + /// Parse a url encoded string as a [`Path`], returning a [`Error`] if invalid + /// + /// This will return an error if the path contains illegal character sequences + /// as defined on the docstring for [`Path`] + pub fn from_url_path(path: impl AsRef) -> Result { + let path = path.as_ref(); + let decoded = percent_decode(path.as_bytes()) + .decode_utf8() + .map_err(|source| { + let path = path.into(); + Error::NonUnicode { source, path } + })?; + + Self::parse(decoded) + } + + /// Returns the [`PathPart`] of this [`Path`] + pub fn parts(&self) -> impl Iterator> { + self.raw + .split_terminator(DELIMITER) + .map(|s| PathPart { raw: s.into() }) + } + + /// Returns the last path segment containing the filename stored in this [`Path`] + pub fn filename(&self) -> Option<&str> { + match self.raw.is_empty() { + true => None, + false => self.raw.rsplit(DELIMITER).next(), + } + } + + /// Returns the extension of the file stored in this [`Path`], if any + pub fn extension(&self) -> Option<&str> { + self.filename() + .and_then(|f| f.rsplit_once('.')) + .and_then(|(_, extension)| { + if extension.is_empty() { + None + } else { + Some(extension) + } + }) + } + + /// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix` + /// + /// Returns `None` if the prefix does not match + pub fn prefix_match(&self, prefix: &Self) -> Option> + '_> { + let mut stripped = self.raw.strip_prefix(&prefix.raw)?; + if !stripped.is_empty() && !prefix.raw.is_empty() { + stripped = stripped.strip_prefix(DELIMITER)?; + } + let iter = stripped + .split_terminator(DELIMITER) + .map(|x| PathPart { raw: x.into() }); + Some(iter) + } + + /// Returns true if this [`Path`] starts with `prefix` + pub fn prefix_matches(&self, prefix: &Self) -> bool { + self.prefix_match(prefix).is_some() + } + + /// Creates a new child of this [`Path`] + pub fn child<'a>(&self, child: impl Into>) -> Self { + let raw = match self.raw.is_empty() { + true => format!("{}", child.into().raw), + false => format!("{}{}{}", self.raw, DELIMITER, child.into().raw), + }; + + Self { raw } + } +} + +impl AsRef for Path { + fn as_ref(&self) -> &str { + &self.raw + } +} + +impl From<&str> for Path { + fn from(path: &str) -> Self { + Self::from_iter(path.split(DELIMITER)) + } +} + +impl From for Path { + fn from(path: String) -> Self { + Self::from_iter(path.split(DELIMITER)) + } +} + +impl From for String { + fn from(path: Path) -> Self { + path.raw + } +} + +impl std::fmt::Display for Path { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.raw.fmt(f) + } +} + +impl<'a, I> FromIterator for Path +where + I: Into>, +{ + fn from_iter>(iter: T) -> Self { + let raw = T::into_iter(iter) + .map(|s| s.into()) + .filter(|s| !s.raw.is_empty()) + .map(|s| s.raw) + .join(DELIMITER); + + Self { raw } + } +} + +#[cfg(not(target_arch = "wasm32"))] +/// Given an absolute filesystem path convert it to a URL representation without canonicalization +pub(crate) fn absolute_path_to_url(path: impl AsRef) -> Result { + Url::from_file_path(&path).map_err(|_| Error::InvalidPath { + path: path.as_ref().into(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cloud_prefix_with_trailing_delimiter() { + // Use case: files exist in object storage named `foo/bar.json` and + // `foo_test.json`. A search for the prefix `foo/` should return + // `foo/bar.json` but not `foo_test.json'. + let prefix = Path::from_iter(["test"]); + assert_eq!(prefix.as_ref(), "test"); + } + + #[test] + fn push_encodes() { + let location = Path::from_iter(["foo/bar", "baz%2Ftest"]); + assert_eq!(location.as_ref(), "foo%2Fbar/baz%252Ftest"); + } + + #[test] + fn test_parse() { + assert_eq!(Path::parse("/").unwrap().as_ref(), ""); + assert_eq!(Path::parse("").unwrap().as_ref(), ""); + + let err = Path::parse("//").unwrap_err(); + assert!(matches!(err, Error::EmptySegment { .. })); + + assert_eq!(Path::parse("/foo/bar/").unwrap().as_ref(), "foo/bar"); + assert_eq!(Path::parse("foo/bar/").unwrap().as_ref(), "foo/bar"); + assert_eq!(Path::parse("foo/bar").unwrap().as_ref(), "foo/bar"); + + let err = Path::parse("foo///bar").unwrap_err(); + assert!(matches!(err, Error::EmptySegment { .. })); + } + + #[test] + fn convert_raw_before_partial_eq() { + // dir and file_name + let cloud = Path::from("test_dir/test_file.json"); + let built = Path::from_iter(["test_dir", "test_file.json"]); + + assert_eq!(built, cloud); + + // dir and file_name w/o dot + let cloud = Path::from("test_dir/test_file"); + let built = Path::from_iter(["test_dir", "test_file"]); + + assert_eq!(built, cloud); + + // dir, no file + let cloud = Path::from("test_dir/"); + let built = Path::from_iter(["test_dir"]); + assert_eq!(built, cloud); + + // file_name, no dir + let cloud = Path::from("test_file.json"); + let built = Path::from_iter(["test_file.json"]); + assert_eq!(built, cloud); + + // empty + let cloud = Path::from(""); + let built = Path::from_iter(["", ""]); + + assert_eq!(built, cloud); + } + + #[test] + fn parts_after_prefix_behavior() { + let existing_path = Path::from("apple/bear/cow/dog/egg.json"); + + // Prefix with one directory + let prefix = Path::from("apple"); + let expected_parts: Vec> = vec!["bear", "cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect(); + assert_eq!(parts, expected_parts); + + // Prefix with two directories + let prefix = Path::from("apple/bear"); + let expected_parts: Vec> = vec!["cow", "dog", "egg.json"] + .into_iter() + .map(Into::into) + .collect(); + let parts: Vec<_> = existing_path.prefix_match(&prefix).unwrap().collect(); + assert_eq!(parts, expected_parts); + + // Not a prefix + let prefix = Path::from("cow"); + assert!(existing_path.prefix_match(&prefix).is_none()); + + // Prefix with a partial directory + let prefix = Path::from("ap"); + assert!(existing_path.prefix_match(&prefix).is_none()); + + // Prefix matches but there aren't any parts after it + let existing = Path::from("apple/bear/cow/dog"); + + assert_eq!(existing.prefix_match(&existing).unwrap().count(), 0); + assert_eq!(Path::default().parts().count(), 0); + } + + #[test] + fn prefix_matches() { + let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something"]); + // self starts with self + assert!( + haystack.prefix_matches(&haystack), + "{haystack:?} should have started with {haystack:?}" + ); + + // a longer prefix doesn't match + let needle = haystack.child("longer now"); + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} shouldn't have started with {needle:?}" + ); + + // one dir prefix matches + let needle = Path::from_iter(["foo/bar"]); + assert!( + haystack.prefix_matches(&needle), + "{haystack:?} should have started with {needle:?}" + ); + + // two dir prefix matches + let needle = needle.child("baz%2Ftest"); + assert!( + haystack.prefix_matches(&needle), + "{haystack:?} should have started with {needle:?}" + ); + + // partial dir prefix doesn't match + let needle = Path::from_iter(["f"]); + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + + // one dir and one partial dir doesn't match + let needle = Path::from_iter(["foo/bar", "baz"]); + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + + // empty prefix matches + let needle = Path::from(""); + assert!( + haystack.prefix_matches(&needle), + "{haystack:?} should have started with {needle:?}" + ); + } + + #[test] + fn prefix_matches_with_file_name() { + let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo.segment"]); + + // All directories match and file name is a prefix + let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo"]); + + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + + // All directories match but file name is not a prefix + let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "e"]); + + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + + // Not all directories match; file name is a prefix of the next directory; this + // does not match + let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "s"]); + + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + + // Not all directories match; file name is NOT a prefix of the next directory; + // no match + let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "p"]); + + assert!( + !haystack.prefix_matches(&needle), + "{haystack:?} should not have started with {needle:?}" + ); + } + + #[test] + fn path_containing_spaces() { + let a = Path::from_iter(["foo bar", "baz"]); + let b = Path::from("foo bar/baz"); + let c = Path::parse("foo bar/baz").unwrap(); + + assert_eq!(a.raw, "foo bar/baz"); + assert_eq!(a.raw, b.raw); + assert_eq!(b.raw, c.raw); + } + + #[test] + fn from_url_path() { + let a = Path::from_url_path("foo%20bar").unwrap(); + let b = Path::from_url_path("foo/%2E%2E/bar").unwrap_err(); + let c = Path::from_url_path("foo%2F%252E%252E%2Fbar").unwrap(); + let d = Path::from_url_path("foo/%252E%252E/bar").unwrap(); + let e = Path::from_url_path("%48%45%4C%4C%4F").unwrap(); + let f = Path::from_url_path("foo/%FF/as").unwrap_err(); + + assert_eq!(a.raw, "foo bar"); + assert!(matches!(b, Error::BadSegment { .. })); + assert_eq!(c.raw, "foo/%2E%2E/bar"); + assert_eq!(d.raw, "foo/%2E%2E/bar"); + assert_eq!(e.raw, "HELLO"); + assert!(matches!(f, Error::NonUnicode { .. })); + } + + #[test] + fn filename_from_path() { + let a = Path::from("foo/bar"); + let b = Path::from("foo/bar.baz"); + let c = Path::from("foo.bar/baz"); + + assert_eq!(a.filename(), Some("bar")); + assert_eq!(b.filename(), Some("bar.baz")); + assert_eq!(c.filename(), Some("baz")); + } + + #[test] + fn file_extension() { + let a = Path::from("foo/bar"); + let b = Path::from("foo/bar.baz"); + let c = Path::from("foo.bar/baz"); + let d = Path::from("foo.bar/baz.qux"); + + assert_eq!(a.extension(), None); + assert_eq!(b.extension(), Some("baz")); + assert_eq!(c.extension(), None); + assert_eq!(d.extension(), Some("qux")); + } +} diff --git a/rust/object_store/src/path/parts.rs b/rust/object_store/src/path/parts.rs new file mode 100644 index 0000000000..9c6612bf93 --- /dev/null +++ b/rust/object_store/src/path/parts.rs @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use percent_encoding::{percent_encode, AsciiSet, CONTROLS}; +use std::borrow::Cow; + +use crate::path::DELIMITER_BYTE; + +/// Error returned by [`PathPart::parse`] +#[derive(Debug, thiserror::Error)] +#[error( + "Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"", + illegal, + segment +)] +#[allow(missing_copy_implementations)] +pub struct InvalidPart { + segment: String, + illegal: String, +} + +/// The PathPart type exists to validate the directory/file names that form part +/// of a path. +/// +/// A [`PathPart`] is guaranteed to: +/// +/// * Contain no ASCII control characters or `/` +/// * Not be a relative path segment, i.e. `.` or `..` +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] +pub struct PathPart<'a> { + pub(super) raw: Cow<'a, str>, +} + +impl<'a> PathPart<'a> { + /// Parse the provided path segment as a [`PathPart`] returning an error if invalid + pub fn parse(segment: &'a str) -> Result { + if segment == "." || segment == ".." { + return Err(InvalidPart { + segment: segment.to_string(), + illegal: segment.to_string(), + }); + } + + for c in segment.chars() { + if c.is_ascii_control() || c == '/' { + return Err(InvalidPart { + segment: segment.to_string(), + // This is correct as only single byte characters up to this point + illegal: c.to_string(), + }); + } + } + + Ok(Self { + raw: segment.into(), + }) + } +} + +/// Characters we want to encode. +const INVALID: &AsciiSet = &CONTROLS + // The delimiter we are reserving for internal hierarchy + .add(DELIMITER_BYTE) + // Characters AWS recommends avoiding for object keys + // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html + .add(b'\\') + .add(b'{') + .add(b'^') + .add(b'}') + .add(b'%') + .add(b'`') + .add(b']') + .add(b'"') // " <-- my editor is confused about double quotes within single quotes + .add(b'>') + .add(b'[') + .add(b'~') + .add(b'<') + .add(b'#') + .add(b'|') + // Characters Google Cloud Storage recommends avoiding for object names + // https://cloud.google.com/storage/docs/naming-objects + .add(b'\r') + .add(b'\n') + .add(b'*') + .add(b'?'); + +impl<'a> From<&'a [u8]> for PathPart<'a> { + fn from(v: &'a [u8]) -> Self { + let inner = match v { + // We don't want to encode `.` generally, but we do want to disallow parts of paths + // to be equal to `.` or `..` to prevent file system traversal shenanigans. + b"." => "%2E".into(), + b".." => "%2E%2E".into(), + other => percent_encode(other, INVALID).into(), + }; + Self { raw: inner } + } +} + +impl<'a> From<&'a str> for PathPart<'a> { + fn from(v: &'a str) -> Self { + Self::from(v.as_bytes()) + } +} + +impl From for PathPart<'static> { + fn from(s: String) -> Self { + Self { + raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()), + } + } +} + +impl AsRef for PathPart<'_> { + fn as_ref(&self) -> &str { + self.raw.as_ref() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn path_part_delimiter_gets_encoded() { + let part: PathPart<'_> = "foo/bar".into(); + assert_eq!(part.raw, "foo%2Fbar"); + } + + #[test] + fn path_part_given_already_encoded_string() { + let part: PathPart<'_> = "foo%2Fbar".into(); + assert_eq!(part.raw, "foo%252Fbar"); + } + + #[test] + fn path_part_cant_be_one_dot() { + let part: PathPart<'_> = ".".into(); + assert_eq!(part.raw, "%2E"); + } + + #[test] + fn path_part_cant_be_two_dots() { + let part: PathPart<'_> = "..".into(); + assert_eq!(part.raw, "%2E%2E"); + } + + #[test] + fn path_part_parse() { + PathPart::parse("foo").unwrap(); + PathPart::parse("foo/bar").unwrap_err(); + + // Test percent-encoded path + PathPart::parse("foo%2Fbar").unwrap(); + PathPart::parse("L%3ABC.parquet").unwrap(); + + // Test path containing bad escape sequence + PathPart::parse("%Z").unwrap(); + PathPart::parse("%%").unwrap(); + } +} diff --git a/rust/object_store/src/payload.rs b/rust/object_store/src/payload.rs new file mode 100644 index 0000000000..055336b6a3 --- /dev/null +++ b/rust/object_store/src/payload.rs @@ -0,0 +1,321 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use bytes::Bytes; +use std::sync::Arc; + +/// A cheaply cloneable, ordered collection of [`Bytes`] +#[derive(Debug, Clone)] +pub struct PutPayload(Arc<[Bytes]>); + +impl Default for PutPayload { + fn default() -> Self { + Self(Arc::new([])) + } +} + +impl PutPayload { + /// Create a new empty [`PutPayload`] + pub fn new() -> Self { + Self::default() + } + + /// Creates a [`PutPayload`] from a static slice + pub fn from_static(s: &'static [u8]) -> Self { + s.into() + } + + /// Creates a [`PutPayload`] from a [`Bytes`] + pub fn from_bytes(s: Bytes) -> Self { + s.into() + } + + /// Returns the total length of the [`Bytes`] in this payload + pub fn content_length(&self) -> usize { + self.0.iter().map(|b| b.len()).sum() + } + + /// Returns an iterator over the [`Bytes`] in this payload + pub fn iter(&self) -> PutPayloadIter<'_> { + PutPayloadIter(self.0.iter()) + } +} + +impl AsRef<[Bytes]> for PutPayload { + fn as_ref(&self) -> &[Bytes] { + self.0.as_ref() + } +} + +impl<'a> IntoIterator for &'a PutPayload { + type Item = &'a Bytes; + type IntoIter = PutPayloadIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for PutPayload { + type Item = Bytes; + type IntoIter = PutPayloadIntoIter; + + fn into_iter(self) -> Self::IntoIter { + PutPayloadIntoIter { + payload: self, + idx: 0, + } + } +} + +/// An iterator over [`PutPayload`] +#[derive(Debug)] +pub struct PutPayloadIter<'a>(std::slice::Iter<'a, Bytes>); + +impl<'a> Iterator for PutPayloadIter<'a> { + type Item = &'a Bytes; + + fn next(&mut self) -> Option { + self.0.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +/// An owning iterator of [`PutPayload`] +#[derive(Debug)] +pub struct PutPayloadIntoIter { + payload: PutPayload, + idx: usize, +} + +impl Iterator for PutPayloadIntoIter { + type Item = Bytes; + + fn next(&mut self) -> Option { + let p = self.payload.0.get(self.idx)?.clone(); + self.idx += 1; + Some(p) + } + + fn size_hint(&self) -> (usize, Option) { + let l = self.payload.0.len() - self.idx; + (l, Some(l)) + } +} + +impl From for PutPayload { + fn from(value: Bytes) -> Self { + Self(Arc::new([value])) + } +} + +impl From> for PutPayload { + fn from(value: Vec) -> Self { + Self(Arc::new([value.into()])) + } +} + +impl From<&'static str> for PutPayload { + fn from(value: &'static str) -> Self { + Bytes::from(value).into() + } +} + +impl From<&'static [u8]> for PutPayload { + fn from(value: &'static [u8]) -> Self { + Bytes::from(value).into() + } +} + +impl From for PutPayload { + fn from(value: String) -> Self { + Bytes::from(value).into() + } +} + +impl FromIterator for PutPayload { + fn from_iter>(iter: T) -> Self { + Bytes::from_iter(iter).into() + } +} + +impl FromIterator for PutPayload { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl From for Bytes { + fn from(value: PutPayload) -> Self { + match value.0.len() { + 0 => Self::new(), + 1 => value.0[0].clone(), + _ => { + let mut buf = Vec::with_capacity(value.content_length()); + value.iter().for_each(|x| buf.extend_from_slice(x)); + buf.into() + } + } + } +} + +/// A builder for [`PutPayload`] that avoids reallocating memory +/// +/// Data is allocated in fixed blocks, which are flushed to [`Bytes`] once full. +/// Unlike [`Vec`] this avoids needing to repeatedly reallocate blocks of memory, +/// which typically involves copying all the previously written data to a new +/// contiguous memory region. +#[derive(Debug)] +pub struct PutPayloadMut { + len: usize, + completed: Vec, + in_progress: Vec, + block_size: usize, +} + +impl Default for PutPayloadMut { + fn default() -> Self { + Self { + len: 0, + completed: vec![], + in_progress: vec![], + + block_size: 8 * 1024, + } + } +} + +impl PutPayloadMut { + /// Create a new [`PutPayloadMut`] + pub fn new() -> Self { + Self::default() + } + + /// Configures the minimum allocation size + /// + /// Defaults to 8KB + pub fn with_block_size(self, block_size: usize) -> Self { + Self { block_size, ..self } + } + + /// Write bytes into this [`PutPayloadMut`] + /// + /// If there is an in-progress block, data will be first written to it, flushing + /// it to [`Bytes`] once full. If data remains to be written, a new block of memory + /// of at least the configured block size will be allocated, to hold the remaining data. + pub fn extend_from_slice(&mut self, slice: &[u8]) { + let remaining = self.in_progress.capacity() - self.in_progress.len(); + let to_copy = remaining.min(slice.len()); + + self.in_progress.extend_from_slice(&slice[..to_copy]); + if self.in_progress.capacity() == self.in_progress.len() { + let new_cap = self.block_size.max(slice.len() - to_copy); + let completed = std::mem::replace(&mut self.in_progress, Vec::with_capacity(new_cap)); + if !completed.is_empty() { + self.completed.push(completed.into()) + } + self.in_progress.extend_from_slice(&slice[to_copy..]) + } + self.len += slice.len(); + } + + /// Append a [`Bytes`] to this [`PutPayloadMut`] without copying + /// + /// This will close any currently buffered block populated by [`Self::extend_from_slice`], + /// and append `bytes` to this payload without copying. + pub fn push(&mut self, bytes: Bytes) { + if !self.in_progress.is_empty() { + let completed = std::mem::take(&mut self.in_progress); + self.completed.push(completed.into()) + } + self.len += bytes.len(); + self.completed.push(bytes); + } + + /// Returns `true` if this [`PutPayloadMut`] contains no bytes + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the total length of the [`Bytes`] in this payload + #[inline] + pub fn content_length(&self) -> usize { + self.len + } + + /// Convert into [`PutPayload`] + pub fn freeze(mut self) -> PutPayload { + if !self.in_progress.is_empty() { + let completed = std::mem::take(&mut self.in_progress).into(); + self.completed.push(completed); + } + PutPayload(self.completed.into()) + } +} + +impl From for PutPayload { + fn from(value: PutPayloadMut) -> Self { + value.freeze() + } +} + +#[cfg(test)] +mod test { + use crate::PutPayloadMut; + + #[test] + fn test_put_payload() { + let mut chunk = PutPayloadMut::new().with_block_size(23); + chunk.extend_from_slice(&[1; 16]); + chunk.extend_from_slice(&[2; 32]); + chunk.extend_from_slice(&[2; 5]); + chunk.extend_from_slice(&[2; 21]); + chunk.extend_from_slice(&[2; 40]); + chunk.extend_from_slice(&[0; 0]); + chunk.push("foobar".into()); + + let payload = chunk.freeze(); + assert_eq!(payload.content_length(), 120); + + let chunks = payload.as_ref(); + assert_eq!(chunks.len(), 6); + + assert_eq!(chunks[0].len(), 23); + assert_eq!(chunks[1].len(), 25); // 32 - (23 - 16) + assert_eq!(chunks[2].len(), 23); + assert_eq!(chunks[3].len(), 23); + assert_eq!(chunks[4].len(), 20); + assert_eq!(chunks[5].len(), 6); + } + + #[test] + fn test_content_length() { + let mut chunk = PutPayloadMut::new(); + chunk.push(vec![0; 23].into()); + assert_eq!(chunk.content_length(), 23); + chunk.extend_from_slice(&[0; 4]); + assert_eq!(chunk.content_length(), 27); + chunk.push(vec![0; 121].into()); + assert_eq!(chunk.content_length(), 148); + let payload = chunk.freeze(); + assert_eq!(payload.content_length(), 148); + } +} diff --git a/rust/object_store/src/prefix.rs b/rust/object_store/src/prefix.rs new file mode 100644 index 0000000000..4720c9891a --- /dev/null +++ b/rust/object_store/src/prefix.rs @@ -0,0 +1,296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! An object store wrapper handling a constant path prefix +use bytes::Bytes; +use futures::{stream::BoxStream, StreamExt, TryStreamExt}; +use std::ops::Range; + +use crate::path::Path; +use crate::{ + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, +}; + +/// Store wrapper that applies a constant prefix to all paths handled by the store. +#[derive(Debug, Clone)] +pub struct PrefixStore { + prefix: Path, + inner: T, +} + +impl std::fmt::Display for PrefixStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "PrefixObjectStore({})", self.prefix.as_ref()) + } +} + +impl PrefixStore { + /// Create a new instance of [`PrefixStore`] + pub fn new(store: T, prefix: impl Into) -> Self { + Self { + prefix: prefix.into(), + inner: store, + } + } + + /// Create the full path from a path relative to prefix + fn full_path(&self, location: &Path) -> Path { + self.prefix.parts().chain(location.parts()).collect() + } + + /// Strip the constant prefix from a given path + fn strip_prefix(&self, path: Path) -> Path { + // Note cannot use match because of borrow checker + if let Some(suffix) = path.prefix_match(&self.prefix) { + return suffix.collect(); + } + path + } + + /// Strip the constant prefix from a given ObjectMeta + fn strip_meta(&self, meta: ObjectMeta) -> ObjectMeta { + ObjectMeta { + last_modified: meta.last_modified, + size: meta.size, + location: self.strip_prefix(meta.location), + e_tag: meta.e_tag, + version: None, + } + } +} + +// Note: This is a relative hack to move these two functions to pure functions so they don't rely +// on the `self` lifetime. Expected to be cleaned up before merge. +// +/// Strip the constant prefix from a given path +fn strip_prefix(prefix: &Path, path: Path) -> Path { + // Note cannot use match because of borrow checker + if let Some(suffix) = path.prefix_match(prefix) { + return suffix.collect(); + } + path +} + +/// Strip the constant prefix from a given ObjectMeta +fn strip_meta(prefix: &Path, meta: ObjectMeta) -> ObjectMeta { + ObjectMeta { + last_modified: meta.last_modified, + size: meta.size, + location: strip_prefix(prefix, meta.location), + e_tag: meta.e_tag, + version: None, + } +} +#[async_trait::async_trait] +impl ObjectStore for PrefixStore { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + let full_path = self.full_path(location); + self.inner.put(&full_path, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + let full_path = self.full_path(location); + self.inner.put_opts(&full_path, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + let full_path = self.full_path(location); + self.inner.put_multipart(&full_path).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + let full_path = self.full_path(location); + self.inner.put_multipart_opts(&full_path, opts).await + } + + async fn get(&self, location: &Path) -> Result { + let full_path = self.full_path(location); + self.inner.get(&full_path).await + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + let full_path = self.full_path(location); + self.inner.get_range(&full_path, range).await + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + let full_path = self.full_path(location); + self.inner.get_opts(&full_path, options).await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + let full_path = self.full_path(location); + self.inner.get_ranges(&full_path, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + let full_path = self.full_path(location); + let meta = self.inner.head(&full_path).await?; + Ok(self.strip_meta(meta)) + } + + async fn delete(&self, location: &Path) -> Result<()> { + let full_path = self.full_path(location); + self.inner.delete(&full_path).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + let prefix = self.full_path(prefix.unwrap_or(&Path::default())); + let s = self.inner.list(Some(&prefix)); + let slf_prefix = self.prefix.clone(); + s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed() + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + let offset = self.full_path(offset); + let prefix = self.full_path(prefix.unwrap_or(&Path::default())); + let s = self.inner.list_with_offset(Some(&prefix), &offset); + let slf_prefix = self.prefix.clone(); + s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + let prefix = self.full_path(prefix.unwrap_or(&Path::default())); + self.inner + .list_with_delimiter(Some(&prefix)) + .await + .map(|lst| ListResult { + common_prefixes: lst + .common_prefixes + .into_iter() + .map(|p| self.strip_prefix(p)) + .collect(), + objects: lst + .objects + .into_iter() + .map(|meta| self.strip_meta(meta)) + .collect(), + }) + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let full_from = self.full_path(from); + let full_to = self.full_path(to); + self.inner.copy(&full_from, &full_to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + let full_from = self.full_path(from); + let full_to = self.full_path(to); + self.inner.rename(&full_from, &full_to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let full_from = self.full_path(from); + let full_to = self.full_path(to); + self.inner.copy_if_not_exists(&full_from, &full_to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + let full_from = self.full_path(from); + let full_to = self.full_path(to); + self.inner.rename_if_not_exists(&full_from, &full_to).await + } +} + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(test)] +mod tests { + use std::slice; + + use super::*; + use crate::integration::*; + use crate::local::LocalFileSystem; + + use tempfile::TempDir; + + #[tokio::test] + async fn prefix_test() { + let root = TempDir::new().unwrap(); + let inner = LocalFileSystem::new_with_prefix(root.path()).unwrap(); + let integration = PrefixStore::new(inner, "prefix"); + + put_get_delete_list(&integration).await; + get_opts(&integration).await; + list_uses_directories_correctly(&integration).await; + list_with_delimiter(&integration).await; + rename_and_copy(&integration).await; + copy_if_not_exists(&integration).await; + stream_get(&integration).await; + } + + #[tokio::test] + async fn prefix_test_applies_prefix() { + let tmpdir = TempDir::new().unwrap(); + let local = LocalFileSystem::new_with_prefix(tmpdir.path()).unwrap(); + + let location = Path::from("prefix/test_file.json"); + let data = Bytes::from("arbitrary data"); + + local.put(&location, data.clone().into()).await.unwrap(); + + let prefix = PrefixStore::new(local, "prefix"); + let location_prefix = Path::from("test_file.json"); + + let content_list = flatten_list_stream(&prefix, None).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location_prefix)); + + let root = Path::from("/"); + let content_list = flatten_list_stream(&prefix, Some(&root)).await.unwrap(); + assert_eq!(content_list, slice::from_ref(&location_prefix)); + + let read_data = prefix + .get(&location_prefix) + .await + .unwrap() + .bytes() + .await + .unwrap(); + assert_eq!(&*read_data, data); + + let target_prefix = Path::from("/test_written.json"); + prefix + .put(&target_prefix, data.clone().into()) + .await + .unwrap(); + + prefix.delete(&location_prefix).await.unwrap(); + + let local = LocalFileSystem::new_with_prefix(tmpdir.path()).unwrap(); + + let err = local.get(&location).await.unwrap_err(); + assert!(matches!(err, crate::Error::NotFound { .. }), "{}", err); + + let location = Path::from("prefix/test_written.json"); + let read_data = local.get(&location).await.unwrap().bytes().await.unwrap(); + assert_eq!(&*read_data, data) + } +} diff --git a/rust/object_store/src/registry.rs b/rust/object_store/src/registry.rs new file mode 100644 index 0000000000..81770c5733 --- /dev/null +++ b/rust/object_store/src/registry.rs @@ -0,0 +1,340 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Map object URLs to [`ObjectStore`] + +use crate::path::{InvalidPart, Path, PathPart}; +use crate::{parse_url_opts, ObjectStore}; +use parking_lot::RwLock; +use std::collections::HashMap; +use std::sync::Arc; +use url::Url; + +/// [`ObjectStoreRegistry`] maps a URL to an [`ObjectStore`] instance +pub trait ObjectStoreRegistry: Send + Sync + std::fmt::Debug + 'static { + /// Register a new store for the provided store URL + /// + /// If a store with the same URL existed before, it is replaced and returned + fn register(&self, url: Url, store: Arc) -> Option>; + + /// Resolve an object URL + /// + /// If [`ObjectStoreRegistry::register`] has been called with a URL with the same + /// scheme, and authority as the object URL, and a path that is a prefix of the object + /// URL's, it should be returned along with the trailing path. Paths should be matched + /// on a path segment basis, and in the event of multiple possibilities the longest + /// path match should be returned. + /// + /// If a store hasn't been registered, an [`ObjectStoreRegistry`] may lazily create + /// one if the URL is understood + /// + /// For example + /// + /// ``` + /// # use std::sync::Arc; + /// # use url::Url; + /// # use object_store::memory::InMemory; + /// # use object_store::ObjectStore; + /// # use object_store::prefix::PrefixStore; + /// # use object_store::registry::{DefaultObjectStoreRegistry, ObjectStoreRegistry}; + /// # + /// let registry = DefaultObjectStoreRegistry::new(); + /// + /// let bucket1 = Arc::new(InMemory::new()) as Arc; + /// let base = Url::parse("s3://bucket1/").unwrap(); + /// registry.register(base, bucket1.clone()); + /// + /// let url = Url::parse("s3://bucket1/path/to/object").unwrap(); + /// let (ret, path) = registry.resolve(&url).unwrap(); + /// assert_eq!(path.as_ref(), "path/to/object"); + /// assert!(Arc::ptr_eq(&ret, &bucket1)); + /// + /// let bucket2 = Arc::new(InMemory::new()) as Arc; + /// let base = Url::parse("https://s3.region.amazonaws.com/bucket").unwrap(); + /// registry.register(base, bucket2.clone()); + /// + /// let url = Url::parse("https://s3.region.amazonaws.com/bucket/path/to/object").unwrap(); + /// let (ret, path) = registry.resolve(&url).unwrap(); + /// assert_eq!(path.as_ref(), "path/to/object"); + /// assert!(Arc::ptr_eq(&ret, &bucket2)); + /// + /// let bucket3 = Arc::new(PrefixStore::new(InMemory::new(), "path")) as Arc; + /// let base = Url::parse("https://s3.region.amazonaws.com/bucket/path").unwrap(); + /// registry.register(base, bucket3.clone()); + /// + /// let url = Url::parse("https://s3.region.amazonaws.com/bucket/path/to/object").unwrap(); + /// let (ret, path) = registry.resolve(&url).unwrap(); + /// assert_eq!(path.as_ref(), "to/object"); + /// assert!(Arc::ptr_eq(&ret, &bucket3)); + /// ``` + fn resolve(&self, url: &Url) -> crate::Result<(Arc, Path)>; +} + +/// Error type for [`DefaultObjectStoreRegistry`] +/// +/// Crate private/opaque type to make the error handling code more ergonomic. +/// Always converted into `crate::Error` when reported externally. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +enum Error { + #[error("ObjectStore not found")] + NotFound, + + #[error("Error parsing URL path segment")] + InvalidPart(#[from] InvalidPart), +} + +impl From for crate::Error { + fn from(value: Error) -> Self { + Self::Generic { + store: "ObjectStoreRegistry", + source: Box::new(value), + } + } +} + +/// An [`ObjectStoreRegistry`] that uses [`parse_url_opts`] to create stores based on the environment +#[derive(Debug, Default)] +pub struct DefaultObjectStoreRegistry { + /// Mapping from [`url_key`] to [`PathEntry`] + map: RwLock>, +} + +/// [`PathEntry`] construct a tree of path segments starting from the root +/// +/// For example the following paths +/// +/// * `/` => store1 +/// * `/foo/bar` => store2 +/// +/// Would be represented by +/// +/// ```yaml +/// store: Some(store1) +/// children: +/// foo: +/// store: None +/// children: +/// bar: +/// store: Some(store2) +/// ``` +/// +#[derive(Debug, Default)] +struct PathEntry { + /// Store, if defined at this path + store: Option>, + /// Child [`PathEntry`], keyed by the next path segment in their path + children: HashMap, +} + +impl PathEntry { + /// Lookup a store based on URL path + /// + /// Returns the store and its path segment depth + fn lookup(&self, to_resolve: &Url) -> Option<(&Arc, usize)> { + let mut current = self; + let mut ret = self.store.as_ref().map(|store| (store, 0)); + let mut depth = 0; + // Traverse the PathEntry tree to find the longest match + for segment in path_segments(to_resolve.path()) { + match current.children.get(segment) { + Some(e) => { + current = e; + depth += 1; + if let Some(store) = ¤t.store { + ret = Some((store, depth)) + } + } + None => break, + } + } + ret + } +} + +impl DefaultObjectStoreRegistry { + /// Create a new [`DefaultObjectStoreRegistry`] + pub fn new() -> Self { + Self::default() + } +} + +impl ObjectStoreRegistry for DefaultObjectStoreRegistry { + fn register(&self, url: Url, store: Arc) -> Option> { + let mut map = self.map.write(); + let key = url_key(&url); + let mut entry = map.entry(key.to_string()).or_default(); + + for segment in path_segments(url.path()) { + entry = entry.children.entry(segment.to_string()).or_default(); + } + entry.store.replace(store) + } + + fn resolve(&self, to_resolve: &Url) -> crate::Result<(Arc, Path)> { + let key = url_key(to_resolve); + { + let map = self.map.read(); + + if let Some((store, depth)) = map.get(key).and_then(|entry| entry.lookup(to_resolve)) { + let path = path_suffix(to_resolve, depth)?; + return Ok((Arc::clone(store), path)); + } + } + + if let Ok((store, path)) = parse_url_opts(to_resolve, std::env::vars()) { + let depth = num_segments(to_resolve.path()) - num_segments(path.as_ref()); + + let mut map = self.map.write(); + let mut entry = map.entry(key.to_string()).or_default(); + for segment in path_segments(to_resolve.path()).take(depth) { + entry = entry.children.entry(segment.to_string()).or_default(); + } + let store = Arc::clone(match &entry.store { + None => entry.store.insert(Arc::from(store)), + Some(x) => x, // Racing creation - use existing + }); + + let path = path_suffix(to_resolve, depth)?; + return Ok((store, path)); + } + + Err(Error::NotFound.into()) + } +} + +/// Extracts the scheme and authority of a URL (components before the Path) +fn url_key(url: &Url) -> &str { + &url[..url::Position::AfterPort] +} + +/// Returns the non-empty segments of a path +/// +/// Note: We don't use [`Url::path_segments`] as we only want non-empty paths +fn path_segments(s: &str) -> impl Iterator { + s.split('/').filter(|x| !x.is_empty()) +} + +/// Returns the number of non-empty path segments in a path +fn num_segments(s: &str) -> usize { + path_segments(s).count() +} + +/// Returns the path of `url` skipping the first `depth` segments +fn path_suffix(url: &Url, depth: usize) -> Result { + let segments = path_segments(url.path()).skip(depth); + let path = segments.map(PathPart::parse).collect::>()?; + Ok(path) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memory::InMemory; + use crate::prefix::PrefixStore; + + #[test] + fn test_num_segments() { + assert_eq!(num_segments(""), 0); + assert_eq!(num_segments("/"), 0); + assert_eq!(num_segments("/banana"), 1); + assert_eq!(num_segments("banana"), 1); + assert_eq!(num_segments("/banana/crumble"), 2); + assert_eq!(num_segments("banana/crumble"), 2); + } + + #[test] + fn test_default_registry() { + let registry = DefaultObjectStoreRegistry::new(); + + // Should automatically register in memory store + let banana_url = Url::parse("memory:///banana").unwrap(); + let (resolved, path) = registry.resolve(&banana_url).unwrap(); + assert_eq!(path.as_ref(), "banana"); + + // Should replace store + let url = Url::parse("memory:///").unwrap(); + let root = Arc::new(InMemory::new()) as Arc; + let replaced = registry.register(url, Arc::clone(&root)).unwrap(); + assert!(Arc::ptr_eq(&resolved, &replaced)); + + // Should not replace store + let banana = Arc::new(PrefixStore::new(InMemory::new(), "banana")) as Arc; + assert!(registry + .register(banana_url.clone(), Arc::clone(&banana)) + .is_none()); + + // Should resolve to banana store + let (resolved, path) = registry.resolve(&banana_url).unwrap(); + assert_eq!(path.as_ref(), ""); + assert!(Arc::ptr_eq(&resolved, &banana)); + + // If we register another store it still resolves banana + let apples_url = Url::parse("memory:///apples").unwrap(); + let apples = Arc::new(PrefixStore::new(InMemory::new(), "apples")) as Arc; + assert!(registry.register(apples_url, Arc::clone(&apples)).is_none()); + + // Should still resolve to banana store + let (resolved, path) = registry.resolve(&banana_url).unwrap(); + assert_eq!(path.as_ref(), ""); + assert!(Arc::ptr_eq(&resolved, &banana)); + + // Should be path segment based + let banana_muffins_url = Url::parse("memory:///banana_muffins").unwrap(); + let (resolved, path) = registry.resolve(&banana_muffins_url).unwrap(); + assert_eq!(path.as_ref(), "banana_muffins"); + assert!(Arc::ptr_eq(&resolved, &root)); + + // Should resolve to root even though path contains prefix of valid store + let to_resolve = Url::parse("memory:///foo/banana").unwrap(); + let (resolved, path) = registry.resolve(&to_resolve).unwrap(); + assert_eq!(path.as_ref(), "foo/banana"); + assert!(Arc::ptr_eq(&resolved, &root)); + + let nested_url = Url::parse("memory:///apples/bananas").unwrap(); + let nested = + Arc::new(PrefixStore::new(InMemory::new(), "apples/bananas")) as Arc; + assert!(registry.register(nested_url, Arc::clone(&nested)).is_none()); + + let to_resolve = Url::parse("memory:///apples/bananas/muffins/cupcakes").unwrap(); + let (resolved, path) = registry.resolve(&to_resolve).unwrap(); + assert_eq!(path.as_ref(), "muffins/cupcakes"); + assert!(Arc::ptr_eq(&resolved, &nested)); + + let nested_url2 = Url::parse("memory:///1/2/3").unwrap(); + let nested2 = Arc::new(PrefixStore::new(InMemory::new(), "1/2/3")) as Arc; + assert!(registry + .register(nested_url2, Arc::clone(&nested2)) + .is_none()); + + let to_resolve = Url::parse("memory:///1/2/3/4/5/6").unwrap(); + let (resolved, path) = registry.resolve(&to_resolve).unwrap(); + assert_eq!(path.as_ref(), "4/5/6"); + assert!(Arc::ptr_eq(&resolved, &nested2)); + + let custom_scheme_url = Url::parse("custom:///").unwrap(); + let custom_scheme = Arc::new(InMemory::new()) as Arc; + assert!(registry + .register(custom_scheme_url, Arc::clone(&custom_scheme)) + .is_none()); + + let to_resolve = Url::parse("custom:///6/7").unwrap(); + let (resolved, path) = registry.resolve(&to_resolve).unwrap(); + assert_eq!(path.as_ref(), "6/7"); + assert!(Arc::ptr_eq(&resolved, &custom_scheme)); + } +} diff --git a/rust/object_store/src/signer.rs b/rust/object_store/src/signer.rs new file mode 100644 index 0000000000..da55c689ae --- /dev/null +++ b/rust/object_store/src/signer.rs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Abstraction of signed URL generation for those object store implementations that support it + +use crate::{path::Path, Result}; +use async_trait::async_trait; +use reqwest::Method; +use std::{fmt, time::Duration}; +use url::Url; + +/// Universal API to generate presigned URLs from multiple object store services. +#[async_trait] +pub trait Signer: Send + Sync + fmt::Debug + 'static { + /// Given the intended [`Method`] and [`Path`] to use and the desired length of time for which + /// the URL should be valid, return a signed [`Url`] created with the object store + /// implementation's credentials such that the URL can be handed to something that doesn't have + /// access to the object store's credentials, to allow limited access to the object store. + async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result; + + /// Generate signed urls for multiple paths. + /// + /// See [`Signer::signed_url`] for more details. + async fn signed_urls( + &self, + method: Method, + paths: &[Path], + expires_in: Duration, + ) -> Result> { + let mut urls = Vec::with_capacity(paths.len()); + for path in paths { + urls.push(self.signed_url(method.clone(), path, expires_in).await?); + } + Ok(urls) + } +} diff --git a/rust/object_store/src/tags.rs b/rust/object_store/src/tags.rs new file mode 100644 index 0000000000..fa6e5913f4 --- /dev/null +++ b/rust/object_store/src/tags.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use url::form_urlencoded::Serializer; + +/// A collection of key value pairs used to annotate objects +/// +/// +/// +#[derive(Debug, Clone, Default, Eq, PartialEq)] +pub struct TagSet(String); + +impl TagSet { + /// Append a key value pair to this [`TagSet`] + /// + /// Stores have different restrictions on what characters are permitted, + /// for portability it is recommended applications use no more than 10 tags, + /// and stick to alphanumeric characters, and `+ - = . _ : /` + /// + /// + /// + pub fn push(&mut self, key: &str, value: &str) { + Serializer::new(&mut self.0).append_pair(key, value); + } + + /// Return this [`TagSet`] as a URL-encoded string + pub fn encoded(&self) -> &str { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tag_set() { + let mut set = TagSet::default(); + set.push("test/foo", "value sdlks"); + set.push("foo", " sdf _ /+./sd"); + assert_eq!( + set.encoded(), + "test%2Ffoo=value+sdlks&foo=+sdf+_+%2F%2B.%2Fsd" + ); + } +} diff --git a/rust/object_store/src/throttle.rs b/rust/object_store/src/throttle.rs new file mode 100644 index 0000000000..dec642a7ba --- /dev/null +++ b/rust/object_store/src/throttle.rs @@ -0,0 +1,661 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A throttling object store wrapper +use parking_lot::Mutex; +use std::ops::Range; +use std::{convert::TryInto, sync::Arc}; + +use crate::multipart::{MultipartStore, PartId}; +use crate::{ + path::Path, GetResult, GetResultPayload, ListResult, MultipartId, MultipartUpload, ObjectMeta, + ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, +}; +use crate::{GetOptions, UploadPart}; +use async_trait::async_trait; +use bytes::Bytes; +use futures::{stream::BoxStream, FutureExt, StreamExt}; +use std::time::Duration; + +/// Configuration settings for throttled store +#[derive(Debug, Default, Clone, Copy)] +pub struct ThrottleConfig { + /// Sleep duration for every call to [`delete`](ThrottledStore::delete). + /// + /// Sleeping is done before the underlying store is called and independently of the success of + /// the operation. + pub wait_delete_per_call: Duration, + + /// Sleep duration for every byte received during [`get`](ThrottledStore::get). + /// + /// Sleeping is performed after the underlying store returned and only for successful gets. The + /// sleep duration is additive to [`wait_get_per_call`](Self::wait_get_per_call). + /// + /// Note that the per-byte sleep only happens as the user consumes the output bytes. Should + /// there be an intermediate failure (i.e. after partly consuming the output bytes), the + /// resulting sleep time will be partial as well. + pub wait_get_per_byte: Duration, + + /// Sleep duration for every call to [`get`](ThrottledStore::get). + /// + /// Sleeping is done before the underlying store is called and independently of the success of + /// the operation. The sleep duration is additive to + /// [`wait_get_per_byte`](Self::wait_get_per_byte). + pub wait_get_per_call: Duration, + + /// Sleep duration for every call to [`list`](ThrottledStore::list). + /// + /// Sleeping is done before the underlying store is called and independently of the success of + /// the operation. The sleep duration is additive to + /// [`wait_list_per_entry`](Self::wait_list_per_entry). + pub wait_list_per_call: Duration, + + /// Sleep duration for every entry received during [`list`](ThrottledStore::list). + /// + /// Sleeping is performed after the underlying store returned and only for successful lists. + /// The sleep duration is additive to [`wait_list_per_call`](Self::wait_list_per_call). + /// + /// Note that the per-entry sleep only happens as the user consumes the output entries. Should + /// there be an intermediate failure (i.e. after partly consuming the output entries), the + /// resulting sleep time will be partial as well. + pub wait_list_per_entry: Duration, + + /// Sleep duration for every call to + /// [`list_with_delimiter`](ThrottledStore::list_with_delimiter). + /// + /// Sleeping is done before the underlying store is called and independently of the success of + /// the operation. The sleep duration is additive to + /// [`wait_list_with_delimiter_per_entry`](Self::wait_list_with_delimiter_per_entry). + pub wait_list_with_delimiter_per_call: Duration, + + /// Sleep duration for every entry received during + /// [`list_with_delimiter`](ThrottledStore::list_with_delimiter). + /// + /// Sleeping is performed after the underlying store returned and only for successful gets. The + /// sleep duration is additive to + /// [`wait_list_with_delimiter_per_call`](Self::wait_list_with_delimiter_per_call). + pub wait_list_with_delimiter_per_entry: Duration, + + /// Sleep duration for every call to [`put`](ThrottledStore::put). + /// + /// Sleeping is done before the underlying store is called and independently of the success of + /// the operation. + pub wait_put_per_call: Duration, +} + +/// Sleep only if non-zero duration +async fn sleep(duration: Duration) { + if !duration.is_zero() { + tokio::time::sleep(duration).await + } +} + +/// Store wrapper that wraps an inner store with some `sleep` calls. +/// +/// This can be used for performance testing. +/// +/// **Note that the behavior of the wrapper is deterministic and might not reflect real-world +/// conditions!** +#[derive(Debug)] +pub struct ThrottledStore { + inner: T, + config: Arc>, +} + +impl ThrottledStore { + /// Create new wrapper with zero waiting times. + pub fn new(inner: T, config: ThrottleConfig) -> Self { + Self { + inner, + config: Arc::new(Mutex::new(config)), + } + } + + /// Mutate config. + pub fn config_mut(&self, f: F) + where + F: Fn(&mut ThrottleConfig), + { + let mut guard = self.config.lock(); + f(&mut guard) + } + + /// Return copy of current config. + pub fn config(&self) -> ThrottleConfig { + *self.config.lock() + } +} + +impl std::fmt::Display for ThrottledStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ThrottledStore({})", self.inner) + } +} + +#[async_trait] +impl ObjectStore for ThrottledStore { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + sleep(self.config().wait_put_per_call).await; + self.inner.put(location, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + sleep(self.config().wait_put_per_call).await; + self.inner.put_opts(location, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + let upload = self.inner.put_multipart(location).await?; + Ok(Box::new(ThrottledUpload { + upload, + sleep: self.config().wait_put_per_call, + })) + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOptions, + ) -> Result> { + let upload = self.inner.put_multipart_opts(location, opts).await?; + Ok(Box::new(ThrottledUpload { + upload, + sleep: self.config().wait_put_per_call, + })) + } + + async fn get(&self, location: &Path) -> Result { + sleep(self.config().wait_get_per_call).await; + + // need to copy to avoid moving / referencing `self` + let wait_get_per_byte = self.config().wait_get_per_byte; + + let result = self.inner.get(location).await?; + Ok(throttle_get(result, wait_get_per_byte)) + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + sleep(self.config().wait_get_per_call).await; + + // need to copy to avoid moving / referencing `self` + let wait_get_per_byte = self.config().wait_get_per_byte; + + let result = self.inner.get_opts(location, options).await?; + Ok(throttle_get(result, wait_get_per_byte)) + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + let config = self.config(); + + let sleep_duration = + config.wait_get_per_call + config.wait_get_per_byte * (range.end - range.start) as u32; + + sleep(sleep_duration).await; + + self.inner.get_range(location, range).await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + let config = self.config(); + + let total_bytes: u64 = ranges.iter().map(|range| range.end - range.start).sum(); + let sleep_duration = + config.wait_get_per_call + config.wait_get_per_byte * total_bytes as u32; + + sleep(sleep_duration).await; + + self.inner.get_ranges(location, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + sleep(self.config().wait_put_per_call).await; + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + sleep(self.config().wait_delete_per_call).await; + + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { + let stream = self.inner.list(prefix); + let config = Arc::clone(&self.config); + futures::stream::once(async move { + let config = *config.lock(); + let wait_list_per_entry = config.wait_list_per_entry; + sleep(config.wait_list_per_call).await; + throttle_stream(stream, move |_| wait_list_per_entry) + }) + .flatten() + .boxed() + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'static, Result> { + let stream = self.inner.list_with_offset(prefix, offset); + let config = Arc::clone(&self.config); + futures::stream::once(async move { + let config = *config.lock(); + let wait_list_per_entry = config.wait_list_per_entry; + sleep(config.wait_list_per_call).await; + throttle_stream(stream, move |_| wait_list_per_entry) + }) + .flatten() + .boxed() + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + sleep(self.config().wait_list_with_delimiter_per_call).await; + + match self.inner.list_with_delimiter(prefix).await { + Ok(list_result) => { + let entries_len = usize_to_u32_saturate(list_result.objects.len()); + sleep(self.config().wait_list_with_delimiter_per_entry * entries_len).await; + Ok(list_result) + } + Err(err) => Err(err), + } + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + sleep(self.config().wait_put_per_call).await; + + self.inner.copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + sleep(self.config().wait_put_per_call).await; + + self.inner.rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + sleep(self.config().wait_put_per_call).await; + + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + sleep(self.config().wait_put_per_call).await; + + self.inner.rename_if_not_exists(from, to).await + } +} + +/// Saturated `usize` to `u32` cast. +fn usize_to_u32_saturate(x: usize) -> u32 { + x.try_into().unwrap_or(u32::MAX) +} + +fn throttle_get(result: GetResult, wait_get_per_byte: Duration) -> GetResult { + #[allow(clippy::infallible_destructuring_match)] + let s = match result.payload { + GetResultPayload::Stream(s) => s, + #[cfg(all(feature = "fs", not(target_arch = "wasm32")))] + GetResultPayload::File(_, _) => unimplemented!(), + }; + + let stream = throttle_stream(s, move |bytes| { + let bytes_len: u32 = usize_to_u32_saturate(bytes.len()); + wait_get_per_byte * bytes_len + }); + + GetResult { + payload: GetResultPayload::Stream(stream), + ..result + } +} + +fn throttle_stream( + stream: BoxStream<'_, Result>, + delay: F, +) -> BoxStream<'_, Result> +where + F: Fn(&T) -> Duration + Send + Sync + 'static, +{ + stream + .then(move |result| { + let delay = result.as_ref().ok().map(&delay).unwrap_or_default(); + sleep(delay).then(|_| futures::future::ready(result)) + }) + .boxed() +} + +#[async_trait] +impl MultipartStore for ThrottledStore { + async fn create_multipart(&self, path: &Path) -> Result { + self.inner.create_multipart(path).await + } + + async fn put_part( + &self, + path: &Path, + id: &MultipartId, + part_idx: usize, + data: PutPayload, + ) -> Result { + sleep(self.config().wait_put_per_call).await; + self.inner.put_part(path, id, part_idx, data).await + } + + async fn complete_multipart( + &self, + path: &Path, + id: &MultipartId, + parts: Vec, + ) -> Result { + self.inner.complete_multipart(path, id, parts).await + } + + async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> { + self.inner.abort_multipart(path, id).await + } +} + +#[derive(Debug)] +struct ThrottledUpload { + upload: Box, + sleep: Duration, +} + +#[async_trait] +impl MultipartUpload for ThrottledUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + let duration = self.sleep; + let put = self.upload.put_part(data); + Box::pin(async move { + sleep(duration).await; + put.await + }) + } + + async fn complete(&mut self) -> Result { + self.upload.complete().await + } + + async fn abort(&mut self) -> Result<()> { + self.upload.abort().await + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[cfg(target_os = "linux")] + use crate::GetResultPayload; + use crate::{integration::*, memory::InMemory}; + use futures::TryStreamExt; + use tokio::time::Duration; + use tokio::time::Instant; + + const WAIT_TIME: Duration = Duration::from_millis(100); + const ZERO: Duration = Duration::from_millis(0); // Duration::default isn't constant + + macro_rules! assert_bounds { + ($d:expr, $lower:expr) => { + assert_bounds!($d, $lower, $lower + 2); + }; + ($d:expr, $lower:expr, $upper:expr) => { + let d = $d; + let lower = $lower * WAIT_TIME; + let upper = $upper * WAIT_TIME; + assert!(d >= lower, "{:?} must be >= than {:?}", d, lower); + assert!(d < upper, "{:?} must be < than {:?}", d, upper); + }; + } + + #[tokio::test] + async fn throttle_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + put_get_delete_list(&store).await; + list_uses_directories_correctly(&store).await; + list_with_delimiter(&store).await; + rename_and_copy(&store).await; + copy_if_not_exists(&store).await; + stream_get(&store).await; + multipart(&store, &store).await; + } + + #[tokio::test] + async fn delete_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + assert_bounds!(measure_delete(&store, None).await, 0); + assert_bounds!(measure_delete(&store, Some(0)).await, 0); + assert_bounds!(measure_delete(&store, Some(10)).await, 0); + + store.config_mut(|cfg| cfg.wait_delete_per_call = WAIT_TIME); + assert_bounds!(measure_delete(&store, None).await, 1); + assert_bounds!(measure_delete(&store, Some(0)).await, 1); + assert_bounds!(measure_delete(&store, Some(10)).await, 1); + } + + #[tokio::test] + // macos github runner is so slow it can't complete within WAIT_TIME*2 + #[cfg(target_os = "linux")] + async fn get_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + assert_bounds!(measure_get(&store, None).await, 0); + assert_bounds!(measure_get(&store, Some(0)).await, 0); + assert_bounds!(measure_get(&store, Some(10)).await, 0); + + store.config_mut(|cfg| cfg.wait_get_per_call = WAIT_TIME); + assert_bounds!(measure_get(&store, None).await, 1); + assert_bounds!(measure_get(&store, Some(0)).await, 1); + assert_bounds!(measure_get(&store, Some(10)).await, 1); + + store.config_mut(|cfg| { + cfg.wait_get_per_call = ZERO; + cfg.wait_get_per_byte = WAIT_TIME; + }); + assert_bounds!(measure_get(&store, Some(2)).await, 2); + + store.config_mut(|cfg| { + cfg.wait_get_per_call = WAIT_TIME; + cfg.wait_get_per_byte = WAIT_TIME; + }); + assert_bounds!(measure_get(&store, Some(2)).await, 3); + } + + #[tokio::test] + // macos github runner is so slow it can't complete within WAIT_TIME*2 + #[cfg(target_os = "linux")] + async fn list_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + assert_bounds!(measure_list(&store, 0).await, 0); + assert_bounds!(measure_list(&store, 10).await, 0); + + store.config_mut(|cfg| cfg.wait_list_per_call = WAIT_TIME); + assert_bounds!(measure_list(&store, 0).await, 1); + assert_bounds!(measure_list(&store, 10).await, 1); + + store.config_mut(|cfg| { + cfg.wait_list_per_call = ZERO; + cfg.wait_list_per_entry = WAIT_TIME; + }); + assert_bounds!(measure_list(&store, 2).await, 2); + + store.config_mut(|cfg| { + cfg.wait_list_per_call = WAIT_TIME; + cfg.wait_list_per_entry = WAIT_TIME; + }); + assert_bounds!(measure_list(&store, 2).await, 3); + } + + #[tokio::test] + // macos github runner is so slow it can't complete within WAIT_TIME*2 + #[cfg(target_os = "linux")] + async fn list_with_delimiter_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + assert_bounds!(measure_list_with_delimiter(&store, 0).await, 0); + assert_bounds!(measure_list_with_delimiter(&store, 10).await, 0); + + store.config_mut(|cfg| cfg.wait_list_with_delimiter_per_call = WAIT_TIME); + assert_bounds!(measure_list_with_delimiter(&store, 0).await, 1); + assert_bounds!(measure_list_with_delimiter(&store, 10).await, 1); + + store.config_mut(|cfg| { + cfg.wait_list_with_delimiter_per_call = ZERO; + cfg.wait_list_with_delimiter_per_entry = WAIT_TIME; + }); + assert_bounds!(measure_list_with_delimiter(&store, 2).await, 2); + + store.config_mut(|cfg| { + cfg.wait_list_with_delimiter_per_call = WAIT_TIME; + cfg.wait_list_with_delimiter_per_entry = WAIT_TIME; + }); + assert_bounds!(measure_list_with_delimiter(&store, 2).await, 3); + } + + #[tokio::test] + async fn put_test() { + let inner = InMemory::new(); + let store = ThrottledStore::new(inner, ThrottleConfig::default()); + + assert_bounds!(measure_put(&store, 0).await, 0); + assert_bounds!(measure_put(&store, 10).await, 0); + + store.config_mut(|cfg| cfg.wait_put_per_call = WAIT_TIME); + assert_bounds!(measure_put(&store, 0).await, 1); + assert_bounds!(measure_put(&store, 10).await, 1); + + store.config_mut(|cfg| cfg.wait_put_per_call = ZERO); + assert_bounds!(measure_put(&store, 0).await, 0); + } + + async fn place_test_object(store: &ThrottledStore, n_bytes: Option) -> Path { + let path = Path::from("foo"); + + if let Some(n_bytes) = n_bytes { + let data: Vec<_> = std::iter::repeat(1u8).take(n_bytes).collect(); + store.put(&path, data.into()).await.unwrap(); + } else { + // ensure object is absent + store.delete(&path).await.unwrap(); + } + + path + } + + #[allow(dead_code)] + async fn place_test_objects(store: &ThrottledStore, n_entries: usize) -> Path { + let prefix = Path::from("foo"); + + // clean up store + let entries: Vec<_> = store.list(Some(&prefix)).try_collect().await.unwrap(); + + for entry in entries { + store.delete(&entry.location).await.unwrap(); + } + + // create new entries + for i in 0..n_entries { + let path = prefix.child(i.to_string().as_str()); + store.put(&path, "bar".into()).await.unwrap(); + } + + prefix + } + + async fn measure_delete(store: &ThrottledStore, n_bytes: Option) -> Duration { + let path = place_test_object(store, n_bytes).await; + + let t0 = Instant::now(); + store.delete(&path).await.unwrap(); + + t0.elapsed() + } + + #[allow(dead_code)] + #[cfg(target_os = "linux")] + async fn measure_get(store: &ThrottledStore, n_bytes: Option) -> Duration { + let path = place_test_object(store, n_bytes).await; + + let t0 = Instant::now(); + let res = store.get(&path).await; + if n_bytes.is_some() { + // need to consume bytes to provoke sleep times + let s = match res.unwrap().payload { + GetResultPayload::Stream(s) => s, + GetResultPayload::File(_, _) => unimplemented!(), + }; + + s.map_ok(|b| bytes::BytesMut::from(&b[..])) + .try_concat() + .await + .unwrap(); + } else { + assert!(res.is_err()); + } + + t0.elapsed() + } + + #[allow(dead_code)] + async fn measure_list(store: &ThrottledStore, n_entries: usize) -> Duration { + let prefix = place_test_objects(store, n_entries).await; + + let t0 = Instant::now(); + store + .list(Some(&prefix)) + .try_collect::>() + .await + .unwrap(); + + t0.elapsed() + } + + #[allow(dead_code)] + async fn measure_list_with_delimiter( + store: &ThrottledStore, + n_entries: usize, + ) -> Duration { + let prefix = place_test_objects(store, n_entries).await; + + let t0 = Instant::now(); + store.list_with_delimiter(Some(&prefix)).await.unwrap(); + + t0.elapsed() + } + + async fn measure_put(store: &ThrottledStore, n_bytes: usize) -> Duration { + let data: Vec<_> = std::iter::repeat(1u8).take(n_bytes).collect(); + + let t0 = Instant::now(); + store.put(&Path::from("foo"), data.into()).await.unwrap(); + + t0.elapsed() + } +} diff --git a/rust/object_store/src/upload.rs b/rust/object_store/src/upload.rs new file mode 100644 index 0000000000..af5975a744 --- /dev/null +++ b/rust/object_store/src/upload.rs @@ -0,0 +1,341 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::task::{Context, Poll}; + +use crate::{PutPayload, PutPayloadMut, PutResult, Result}; +use async_trait::async_trait; +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::ready; +use tokio::task::JoinSet; + +/// An upload part request +pub type UploadPart = BoxFuture<'static, Result<()>>; + +/// A trait allowing writing an object in fixed size chunks +/// +/// Consecutive chunks of data can be written by calling [`MultipartUpload::put_part`] and polling +/// the returned futures to completion. Multiple futures returned by [`MultipartUpload::put_part`] +/// may be polled in parallel, allowing for concurrent uploads. +/// +/// Once all part uploads have been polled to completion, the upload can be completed by +/// calling [`MultipartUpload::complete`]. This will make the entire uploaded object visible +/// as an atomic operation.It is implementation behind behaviour if [`MultipartUpload::complete`] +/// is called before all [`UploadPart`] have been polled to completion. +#[async_trait] +pub trait MultipartUpload: Send + std::fmt::Debug { + /// Upload the next part + /// + /// Most stores require that all parts excluding the last are at least 5 MiB, and some + /// further require that all parts excluding the last be the same size, e.g. [R2]. + /// Clients wanting to maximise compatibility should therefore perform writes in + /// fixed size blocks larger than 5 MiB. + /// + /// Implementations may invoke this method multiple times and then await on the + /// returned futures in parallel + /// + /// ```no_run + /// # use futures::StreamExt; + /// # use object_store::MultipartUpload; + /// # + /// # async fn test() { + /// # + /// let mut upload: Box<&dyn MultipartUpload> = todo!(); + /// let p1 = upload.put_part(vec![0; 10 * 1024 * 1024].into()); + /// let p2 = upload.put_part(vec![1; 10 * 1024 * 1024].into()); + /// futures::future::try_join(p1, p2).await.unwrap(); + /// upload.complete().await.unwrap(); + /// # } + /// ``` + /// + /// [R2]: https://developers.cloudflare.com/r2/objects/multipart-objects/#limitations + fn put_part(&mut self, data: PutPayload) -> UploadPart; + + /// Complete the multipart upload + /// + /// It is implementation defined behaviour if this method is called before polling + /// all [`UploadPart`] returned by [`MultipartUpload::put_part`] to completion. Additionally, + /// it is implementation defined behaviour to call [`MultipartUpload::complete`] + /// on an already completed or aborted [`MultipartUpload`]. + async fn complete(&mut self) -> Result; + + /// Abort the multipart upload + /// + /// If a [`MultipartUpload`] is dropped without calling [`MultipartUpload::complete`], + /// some object stores will automatically clean up any previously uploaded parts. + /// However, some stores, such as S3 and GCS, cannot perform cleanup on drop. + /// As such [`MultipartUpload::abort`] can be invoked to perform this cleanup. + /// + /// It will not be possible to call `abort` in all failure scenarios, for example + /// non-graceful shutdown of the calling application. It is therefore recommended + /// object stores are configured with lifecycle rules to automatically cleanup + /// unused parts older than some threshold. See [crate::aws] and [crate::gcp] + /// for more information. + /// + /// It is implementation defined behaviour to call [`MultipartUpload::abort`] + /// on an already completed or aborted [`MultipartUpload`] + async fn abort(&mut self) -> Result<()>; +} + +#[async_trait] +impl MultipartUpload for Box { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + (**self).put_part(data) + } + + async fn complete(&mut self) -> Result { + (**self).complete().await + } + + async fn abort(&mut self) -> Result<()> { + (**self).abort().await + } +} + +/// A synchronous write API for uploading data in parallel in fixed size chunks +/// +/// Uses multiple tokio tasks in a [`JoinSet`] to multiplex upload tasks in parallel +/// +/// The design also takes inspiration from [`Sink`] with [`WriteMultipart::wait_for_capacity`] +/// allowing back pressure on producers, prior to buffering the next part. However, unlike +/// [`Sink`] this back pressure is optional, allowing integration with synchronous producers +/// +/// [`Sink`]: futures::sink::Sink +#[derive(Debug)] +pub struct WriteMultipart { + upload: Box, + + buffer: PutPayloadMut, + + chunk_size: usize, + + tasks: JoinSet>, +} + +impl WriteMultipart { + /// Create a new [`WriteMultipart`] that will upload using 5MB chunks + pub fn new(upload: Box) -> Self { + Self::new_with_chunk_size(upload, 5 * 1024 * 1024) + } + + /// Create a new [`WriteMultipart`] that will upload in fixed `chunk_size` sized chunks + pub fn new_with_chunk_size(upload: Box, chunk_size: usize) -> Self { + Self { + upload, + chunk_size, + buffer: PutPayloadMut::new(), + tasks: Default::default(), + } + } + + /// Polls for there to be less than `max_concurrency` [`UploadPart`] in progress + /// + /// See [`Self::wait_for_capacity`] for an async version of this function + pub fn poll_for_capacity( + &mut self, + cx: &mut Context<'_>, + max_concurrency: usize, + ) -> Poll> { + while !self.tasks.is_empty() && self.tasks.len() >= max_concurrency { + ready!(self.tasks.poll_join_next(cx)).unwrap()?? + } + Poll::Ready(Ok(())) + } + + /// Wait until there are less than `max_concurrency` [`UploadPart`] in progress + /// + /// See [`Self::poll_for_capacity`] for a [`Poll`] version of this function + pub async fn wait_for_capacity(&mut self, max_concurrency: usize) -> Result<()> { + futures::future::poll_fn(|cx| self.poll_for_capacity(cx, max_concurrency)).await + } + + /// Write data to this [`WriteMultipart`] + /// + /// Data is buffered using [`PutPayloadMut::extend_from_slice`]. Implementations looking to + /// write data from owned buffers may prefer [`Self::put`] as this avoids copying. + /// + /// Note this method is synchronous (not `async`) and will immediately + /// start new uploads as soon as the internal `chunk_size` is hit, + /// regardless of how many outstanding uploads are already in progress. + /// + /// Back pressure can optionally be applied to producers by calling + /// [`Self::wait_for_capacity`] prior to calling this method + pub fn write(&mut self, mut buf: &[u8]) { + while !buf.is_empty() { + let remaining = self.chunk_size - self.buffer.content_length(); + let to_read = buf.len().min(remaining); + self.buffer.extend_from_slice(&buf[..to_read]); + if to_read == remaining { + let buffer = std::mem::take(&mut self.buffer); + self.put_part(buffer.into()) + } + buf = &buf[to_read..] + } + } + + /// Put a chunk of data into this [`WriteMultipart`] without copying + /// + /// Data is buffered using [`PutPayloadMut::push`]. Implementations looking to + /// perform writes from non-owned buffers should prefer [`Self::write`] as this + /// will allow multiple calls to share the same underlying allocation. + /// + /// See [`Self::write`] for information on backpressure + pub fn put(&mut self, mut bytes: Bytes) { + while !bytes.is_empty() { + let remaining = self.chunk_size - self.buffer.content_length(); + if bytes.len() < remaining { + self.buffer.push(bytes); + return; + } + self.buffer.push(bytes.split_to(remaining)); + let buffer = std::mem::take(&mut self.buffer); + self.put_part(buffer.into()) + } + } + + pub(crate) fn put_part(&mut self, part: PutPayload) { + self.tasks.spawn(self.upload.put_part(part)); + } + + /// Abort this upload, attempting to clean up any successfully uploaded parts + pub async fn abort(mut self) -> Result<()> { + self.tasks.shutdown().await; + self.upload.abort().await + } + + /// Flush final chunk, and await completion of all in-flight requests + pub async fn finish(mut self) -> Result { + if !self.buffer.is_empty() { + let part = std::mem::take(&mut self.buffer); + self.put_part(part.into()) + } + + self.wait_for_capacity(0).await?; + + match self.upload.complete().await { + Err(e) => { + self.tasks.shutdown().await; + self.upload.abort().await?; + Err(e) + } + Ok(result) => Ok(result), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::time::Duration; + + use futures::FutureExt; + use parking_lot::Mutex; + use rand::prelude::StdRng; + use rand::{Rng, SeedableRng}; + + use crate::memory::InMemory; + use crate::path::Path; + use crate::throttle::{ThrottleConfig, ThrottledStore}; + use crate::ObjectStore; + + use super::*; + + #[tokio::test] + async fn test_concurrency() { + let config = ThrottleConfig { + wait_put_per_call: Duration::from_millis(1), + ..Default::default() + }; + + let path = Path::from("foo"); + let store = ThrottledStore::new(InMemory::new(), config); + let upload = store.put_multipart(&path).await.unwrap(); + let mut write = WriteMultipart::new_with_chunk_size(upload, 10); + + for _ in 0..20 { + write.write(&[0; 5]); + } + assert!(write.wait_for_capacity(10).now_or_never().is_none()); + write.wait_for_capacity(10).await.unwrap() + } + + #[derive(Debug, Default)] + struct InstrumentedUpload { + chunks: Arc>>, + } + + #[async_trait] + impl MultipartUpload for InstrumentedUpload { + fn put_part(&mut self, data: PutPayload) -> UploadPart { + self.chunks.lock().push(data); + futures::future::ready(Ok(())).boxed() + } + + async fn complete(&mut self) -> Result { + Ok(PutResult { + e_tag: None, + version: None, + }) + } + + async fn abort(&mut self) -> Result<()> { + unimplemented!() + } + } + + #[tokio::test] + async fn test_write_multipart() { + let mut rng = StdRng::seed_from_u64(42); + + for method in [0.0, 0.5, 1.0] { + for _ in 0..10 { + for chunk_size in [1, 17, 23] { + let upload = Box::::default(); + let chunks = Arc::clone(&upload.chunks); + let mut write = WriteMultipart::new_with_chunk_size(upload, chunk_size); + + let mut expected = Vec::with_capacity(1024); + + for _ in 0..50 { + let chunk_size = rng.random_range(0..30); + let data: Vec<_> = (0..chunk_size).map(|_| rng.random()).collect(); + expected.extend_from_slice(&data); + + match rng.random_bool(method) { + true => write.put(data.into()), + false => write.write(&data), + } + } + write.finish().await.unwrap(); + + let chunks = chunks.lock(); + + let actual: Vec<_> = chunks.iter().flatten().flatten().copied().collect(); + assert_eq!(expected, actual); + + for chunk in chunks.iter().take(chunks.len() - 1) { + assert_eq!(chunk.content_length(), chunk_size) + } + + let last_chunk = chunks.last().unwrap().content_length(); + assert!(last_chunk <= chunk_size, "{chunk_size}"); + } + } + } + } +} diff --git a/rust/object_store/src/util.rs b/rust/object_store/src/util.rs new file mode 100644 index 0000000000..4f297d95be --- /dev/null +++ b/rust/object_store/src/util.rs @@ -0,0 +1,491 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common logic for interacting with remote object stores +use std::{ + fmt::Display, + ops::{Range, RangeBounds}, +}; + +use super::Result; +use bytes::Bytes; +use futures::{stream::StreamExt, Stream, TryStreamExt}; + +#[cfg(any(feature = "azure", feature = "http"))] +pub(crate) static RFC1123_FMT: &str = "%a, %d %h %Y %T GMT"; + +// deserialize dates according to rfc1123 +#[cfg(any(feature = "azure", feature = "http"))] +pub(crate) fn deserialize_rfc1123<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let s: String = serde::Deserialize::deserialize(deserializer)?; + let naive = + chrono::NaiveDateTime::parse_from_str(&s, RFC1123_FMT).map_err(serde::de::Error::custom)?; + Ok(chrono::TimeZone::from_utc_datetime(&chrono::Utc, &naive)) +} + +#[cfg(any(feature = "aws", feature = "azure"))] +pub(crate) fn hmac_sha256(secret: impl AsRef<[u8]>, bytes: impl AsRef<[u8]>) -> ring::hmac::Tag { + let key = ring::hmac::Key::new(ring::hmac::HMAC_SHA256, secret.as_ref()); + ring::hmac::sign(&key, bytes.as_ref()) +} + +/// Collect a stream into [`Bytes`] avoiding copying in the event of a single chunk +pub async fn collect_bytes(mut stream: S, size_hint: Option) -> Result +where + E: Send, + S: Stream> + Send + Unpin, +{ + let first = stream.next().await.transpose()?.unwrap_or_default(); + + // Avoid copying if single response + match stream.next().await.transpose()? { + None => Ok(first), + Some(second) => { + let size_hint = size_hint.unwrap_or_else(|| first.len() as u64 + second.len() as u64); + + let mut buf = Vec::with_capacity(size_hint as usize); + buf.extend_from_slice(&first); + buf.extend_from_slice(&second); + while let Some(maybe_bytes) = stream.next().await { + buf.extend_from_slice(&maybe_bytes?); + } + + Ok(buf.into()) + } + } +} + +#[cfg(all(feature = "fs", not(target_arch = "wasm32")))] +/// Takes a function and spawns it to a tokio blocking pool if available +pub(crate) async fn maybe_spawn_blocking(f: F) -> Result +where + F: FnOnce() -> Result + Send + 'static, + T: Send + 'static, +{ + match tokio::runtime::Handle::try_current() { + Ok(runtime) => runtime.spawn_blocking(f).await?, + Err(_) => f(), + } +} + +/// Range requests with a gap less than or equal to this, +/// will be coalesced into a single request by [`coalesce_ranges`] +pub const OBJECT_STORE_COALESCE_DEFAULT: u64 = 1024 * 1024; + +/// Up to this number of range requests will be performed in parallel by [`coalesce_ranges`] +pub(crate) const OBJECT_STORE_COALESCE_PARALLEL: usize = 10; + +/// Takes a function `fetch` that can fetch a range of bytes and uses this to +/// fetch the provided byte `ranges` +/// +/// To improve performance it will: +/// +/// * Combine ranges less than `coalesce` bytes apart into a single call to `fetch` +/// * Make multiple `fetch` requests in parallel (up to maximum of 10) +/// +pub async fn coalesce_ranges( + ranges: &[Range], + fetch: F, + coalesce: u64, +) -> Result, E> +where + F: Send + FnMut(Range) -> Fut, + E: Send, + Fut: std::future::Future> + Send, +{ + let fetch_ranges = merge_ranges(ranges, coalesce); + + let fetched: Vec<_> = futures::stream::iter(fetch_ranges.iter().cloned()) + .map(fetch) + .buffered(OBJECT_STORE_COALESCE_PARALLEL) + .try_collect() + .await?; + + Ok(ranges + .iter() + .map(|range| { + let idx = fetch_ranges.partition_point(|v| v.start <= range.start) - 1; + let fetch_range = &fetch_ranges[idx]; + let fetch_bytes = &fetched[idx]; + + let start = range.start - fetch_range.start; + let end = range.end - fetch_range.start; + let range = (start as usize)..(end as usize).min(fetch_bytes.len()); + fetch_bytes.slice(range) + }) + .collect()) +} + +/// Returns a sorted list of ranges that cover `ranges` +fn merge_ranges(ranges: &[Range], coalesce: u64) -> Vec> { + if ranges.is_empty() { + return vec![]; + } + + let mut ranges = ranges.to_vec(); + ranges.sort_unstable_by_key(|range| range.start); + + let mut ret = Vec::with_capacity(ranges.len()); + let mut start_idx = 0; + let mut end_idx = 1; + + while start_idx != ranges.len() { + let mut range_end = ranges[start_idx].end; + + while end_idx != ranges.len() + && ranges[end_idx] + .start + .checked_sub(range_end) + .map(|delta| delta <= coalesce) + .unwrap_or(true) + { + range_end = range_end.max(ranges[end_idx].end); + end_idx += 1; + } + + let start = ranges[start_idx].start; + let end = range_end; + ret.push(start..end); + + start_idx = end_idx; + end_idx += 1; + } + + ret +} + +/// Request only a portion of an object's bytes +/// +/// These can be created from [usize] ranges, like +/// +/// ```rust +/// # use object_store::GetRange; +/// let range1: GetRange = (50..150).into(); +/// let range2: GetRange = (50..=150).into(); +/// let range3: GetRange = (50..).into(); +/// let range4: GetRange = (..150).into(); +/// ``` +/// +/// Implementations may wish to inspect [`GetResult`] for the exact byte +/// range returned. +/// +/// [`GetResult`]: crate::GetResult +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum GetRange { + /// Request a specific range of bytes + /// + /// If the given range is zero-length or starts after the end of the object, + /// an error will be returned. Additionally, if the range ends after the end + /// of the object, the entire remainder of the object will be returned. + /// Otherwise, the exact requested range will be returned. + /// + /// Note that range is u64 (i.e., not usize), + /// as `object_store` supports 32-bit architectures such as WASM + Bounded(Range), + /// Request all bytes starting from a given byte offset + Offset(u64), + /// Request up to the last n bytes + Suffix(u64), +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum InvalidGetRange { + #[error("Wanted range starting at {requested}, but object was only {length} bytes long")] + StartTooLarge { requested: u64, length: u64 }, + + #[error("Range started at {start} and ended at {end}")] + Inconsistent { start: u64, end: u64 }, + + #[error("Range {requested} is larger than system memory limit {max}")] + TooLarge { requested: u64, max: u64 }, +} + +impl GetRange { + /// Check if the range is valid. + pub fn is_valid(&self) -> Result<(), InvalidGetRange> { + if let Self::Bounded(r) = self { + if r.end <= r.start { + return Err(InvalidGetRange::Inconsistent { + start: r.start, + end: r.end, + }); + } + if (r.end - r.start) > usize::MAX as u64 { + return Err(InvalidGetRange::TooLarge { + requested: r.start, + max: usize::MAX as u64, + }); + } + } + Ok(()) + } + + /// Convert to a [`Range`] if [valid](Self::is_valid). + pub fn as_range(&self, len: u64) -> Result, InvalidGetRange> { + self.is_valid()?; + match self { + Self::Bounded(r) => { + if r.start >= len { + Err(InvalidGetRange::StartTooLarge { + requested: r.start, + length: len, + }) + } else if r.end > len { + Ok(r.start..len) + } else { + Ok(r.clone()) + } + } + Self::Offset(o) => { + if *o >= len { + Err(InvalidGetRange::StartTooLarge { + requested: *o, + length: len, + }) + } else { + Ok(*o..len) + } + } + Self::Suffix(n) => Ok(len.saturating_sub(*n)..len), + } + } +} + +impl Display for GetRange { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Bounded(r) => write!(f, "bytes={}-{}", r.start, r.end - 1), + Self::Offset(o) => write!(f, "bytes={o}-"), + Self::Suffix(n) => write!(f, "bytes=-{n}"), + } + } +} + +impl> From for GetRange { + fn from(value: T) -> Self { + use std::ops::Bound::*; + let first = match value.start_bound() { + Included(i) => *i, + Excluded(i) => i + 1, + Unbounded => 0, + }; + match value.end_bound() { + Included(i) => Self::Bounded(first..(i + 1)), + Excluded(i) => Self::Bounded(first..*i), + Unbounded => Self::Offset(first), + } + } +} +// http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html +// +// Do not URI-encode any of the unreserved characters that RFC 3986 defines: +// A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ). +#[cfg(any(feature = "aws", feature = "gcp"))] +pub(crate) const STRICT_ENCODE_SET: percent_encoding::AsciiSet = percent_encoding::NON_ALPHANUMERIC + .remove(b'-') + .remove(b'.') + .remove(b'_') + .remove(b'~'); + +/// Computes the SHA256 digest of `body` returned as a hex encoded string +#[cfg(any(feature = "aws", feature = "gcp"))] +pub(crate) fn hex_digest(bytes: &[u8]) -> String { + let digest = ring::digest::digest(&ring::digest::SHA256, bytes); + hex_encode(digest.as_ref()) +} + +/// Returns `bytes` as a lower-case hex encoded string +#[cfg(any(feature = "aws", feature = "gcp"))] +pub(crate) fn hex_encode(bytes: &[u8]) -> String { + use std::fmt::Write; + let mut out = String::with_capacity(bytes.len() * 2); + for byte in bytes { + // String writing is infallible + let _ = write!(out, "{byte:02x}"); + } + out +} + +#[cfg(test)] +mod tests { + use crate::Error; + + use super::*; + use rand::{rng, Rng}; + use std::ops::Range; + + /// Calls coalesce_ranges and validates the returned data is correct + /// + /// Returns the fetched ranges + async fn do_fetch(ranges: Vec>, coalesce: u64) -> Vec> { + let max = ranges.iter().map(|x| x.end).max().unwrap_or(0); + let src: Vec<_> = (0..max).map(|x| x as u8).collect(); + + let mut fetches = vec![]; + let coalesced = coalesce_ranges::<_, Error, _>( + &ranges, + |range| { + fetches.push(range.clone()); + let start = usize::try_from(range.start).unwrap(); + let end = usize::try_from(range.end).unwrap(); + futures::future::ready(Ok(Bytes::from(src[start..end].to_vec()))) + }, + coalesce, + ) + .await + .unwrap(); + + assert_eq!(ranges.len(), coalesced.len()); + for (range, bytes) in ranges.iter().zip(coalesced) { + assert_eq!( + bytes.as_ref(), + &src[usize::try_from(range.start).unwrap()..usize::try_from(range.end).unwrap()] + ); + } + fetches + } + + #[tokio::test] + async fn test_coalesce_ranges() { + let fetches = do_fetch(vec![], 0).await; + assert!(fetches.is_empty()); + + let fetches = do_fetch(vec![0..3; 1], 0).await; + assert_eq!(fetches, vec![0..3]); + + let fetches = do_fetch(vec![0..2, 3..5], 0).await; + assert_eq!(fetches, vec![0..2, 3..5]); + + let fetches = do_fetch(vec![0..1, 1..2], 0).await; + assert_eq!(fetches, vec![0..2]); + + let fetches = do_fetch(vec![0..1, 2..72], 1).await; + assert_eq!(fetches, vec![0..72]); + + let fetches = do_fetch(vec![0..1, 56..72, 73..75], 1).await; + assert_eq!(fetches, vec![0..1, 56..75]); + + let fetches = do_fetch(vec![0..1, 5..6, 7..9, 2..3, 4..6], 1).await; + assert_eq!(fetches, vec![0..9]); + + let fetches = do_fetch(vec![0..1, 5..6, 7..9, 2..3, 4..6], 1).await; + assert_eq!(fetches, vec![0..9]); + + let fetches = do_fetch(vec![0..1, 6..7, 8..9, 10..14, 9..10], 4).await; + assert_eq!(fetches, vec![0..1, 6..14]); + } + + #[tokio::test] + async fn test_coalesce_fuzz() { + let mut rand = rng(); + for _ in 0..100 { + let object_len = rand.random_range(10..250); + let range_count = rand.random_range(0..10); + let ranges: Vec<_> = (0..range_count) + .map(|_| { + let start = rand.random_range(0..object_len); + let max_len = 20.min(object_len - start); + let len = rand.random_range(0..max_len); + start..start + len + }) + .collect(); + + let coalesce = rand.random_range(1..5); + let fetches = do_fetch(ranges.clone(), coalesce).await; + + for fetch in fetches.windows(2) { + assert!( + fetch[0].start <= fetch[1].start, + "fetches should be sorted, {:?} vs {:?}", + fetch[0], + fetch[1] + ); + + let delta = fetch[1].end - fetch[0].end; + assert!( + delta > coalesce, + "fetches should not overlap by {}, {:?} vs {:?} for {:?}", + coalesce, + fetch[0], + fetch[1], + ranges + ); + } + } + } + + #[test] + fn getrange_str() { + assert_eq!(GetRange::Offset(0).to_string(), "bytes=0-"); + assert_eq!(GetRange::Bounded(10..19).to_string(), "bytes=10-18"); + assert_eq!(GetRange::Suffix(10).to_string(), "bytes=-10"); + } + + #[test] + fn getrange_from() { + assert_eq!(Into::::into(10..15), GetRange::Bounded(10..15),); + assert_eq!(Into::::into(10..=15), GetRange::Bounded(10..16),); + assert_eq!(Into::::into(10..), GetRange::Offset(10),); + assert_eq!(Into::::into(..=15), GetRange::Bounded(0..16)); + } + + #[test] + fn test_as_range() { + let range = GetRange::Bounded(2..5); + assert_eq!(range.as_range(5).unwrap(), 2..5); + + let range = range.as_range(4).unwrap(); + assert_eq!(range, 2..4); + + let range = GetRange::Bounded(3..3); + let err = range.as_range(2).unwrap_err().to_string(); + assert_eq!(err, "Range started at 3 and ended at 3"); + + let range = GetRange::Bounded(2..2); + let err = range.as_range(3).unwrap_err().to_string(); + assert_eq!(err, "Range started at 2 and ended at 2"); + + let range = GetRange::Suffix(3); + assert_eq!(range.as_range(3).unwrap(), 0..3); + assert_eq!(range.as_range(2).unwrap(), 0..2); + + let range = GetRange::Suffix(0); + assert_eq!(range.as_range(0).unwrap(), 0..0); + + let range = GetRange::Offset(2); + let err = range.as_range(2).unwrap_err().to_string(); + assert_eq!( + err, + "Wanted range starting at 2, but object was only 2 bytes long" + ); + + let err = range.as_range(1).unwrap_err().to_string(); + assert_eq!( + err, + "Wanted range starting at 2, but object was only 1 bytes long" + ); + + let range = GetRange::Offset(1); + assert_eq!(range.as_range(2).unwrap(), 1..2); + } +} diff --git a/rust/object_store/tests/get_range_file.rs b/rust/object_store/tests/get_range_file.rs new file mode 100644 index 0000000000..d5ac8e3907 --- /dev/null +++ b/rust/object_store/tests/get_range_file.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests the default implementation of get_range handles GetResult::File correctly (#4350) + +use async_trait::async_trait; +use bytes::Bytes; +use futures::stream::BoxStream; +use object_store::local::LocalFileSystem; +use object_store::path::Path; +use object_store::*; +use std::fmt::Formatter; +use tempfile::tempdir; + +#[derive(Debug)] +struct MyStore(LocalFileSystem); + +impl std::fmt::Display for MyStore { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +#[async_trait] +impl ObjectStore for MyStore { + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.0.put_opts(location, payload, opts).await + } + + async fn put_multipart_opts( + &self, + _location: &Path, + _opts: PutMultipartOptions, + ) -> Result> { + todo!() + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.0.get_opts(location, options).await + } + + async fn delete(&self, _: &Path) -> Result<()> { + todo!() + } + + fn list(&self, _: Option<&Path>) -> BoxStream<'static, Result> { + todo!() + } + + async fn list_with_delimiter(&self, _: Option<&Path>) -> Result { + todo!() + } + + async fn copy(&self, _: &Path, _: &Path) -> Result<()> { + todo!() + } + + async fn copy_if_not_exists(&self, _: &Path, _: &Path) -> Result<()> { + todo!() + } +} + +#[tokio::test] +async fn test_get_range() { + let tmp = tempdir().unwrap(); + let store = MyStore(LocalFileSystem::new_with_prefix(tmp.path()).unwrap()); + let path = Path::from("foo"); + + let expected = Bytes::from_static(b"hello world"); + store.put(&path, expected.clone().into()).await.unwrap(); + let fetched = store.get(&path).await.unwrap().bytes().await.unwrap(); + assert_eq!(expected, fetched); + + for range in [0..10, 3..5, 0..expected.len() as u64] { + let data = store.get_range(&path, range.clone()).await.unwrap(); + assert_eq!( + &data[..], + &expected[range.start as usize..range.end as usize] + ) + } + + let over_range = 0..(expected.len() as u64 * 2); + let data = store.get_range(&path, over_range.clone()).await.unwrap(); + assert_eq!(&data[..], expected) +} + +/// Test that, when a requesting a range which overhangs the end of the resource, +/// the resulting [GetResult::range] reports the returned range, +/// not the requested. +#[tokio::test] +async fn test_get_opts_over_range() { + let tmp = tempdir().unwrap(); + let store = MyStore(LocalFileSystem::new_with_prefix(tmp.path()).unwrap()); + let path = Path::from("foo"); + + let expected = Bytes::from_static(b"hello world"); + store.put(&path, expected.clone().into()).await.unwrap(); + + let opts = GetOptions { + range: Some(GetRange::Bounded(0..(expected.len() as u64 * 2))), + ..Default::default() + }; + let res = store.get_opts(&path, opts).await.unwrap(); + assert_eq!(res.range, 0..expected.len() as u64); + assert_eq!(res.bytes().await.unwrap(), expected); +} diff --git a/rust/object_store/tests/http.rs b/rust/object_store/tests/http.rs new file mode 100644 index 0000000000..cb0b7d6ba4 --- /dev/null +++ b/rust/object_store/tests/http.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests the HTTP store implementation + +#[cfg(feature = "http")] +use object_store::{http::HttpBuilder, path::Path, GetOptions, GetRange, ObjectStore}; + +#[cfg(all(feature = "http", target_arch = "wasm32", target_os = "unknown"))] +use wasm_bindgen_test::*; + +/// Tests that even when reqwest has the `gzip` feature enabled, the HTTP store +/// does not error on a missing `Content-Length` header. +#[tokio::test] +#[cfg(feature = "http")] +async fn test_http_store_gzip() { + let http_store = HttpBuilder::new() + .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main") + .build() + .unwrap(); + + let _ = http_store + .get_opts( + &Path::parse("LICENSE.txt").unwrap(), + GetOptions { + range: Some(GetRange::Bounded(0..100)), + ..Default::default() + }, + ) + .await + .unwrap(); +} + +#[cfg(all(feature = "http", target_arch = "wasm32", target_os = "unknown"))] +#[wasm_bindgen_test] +async fn basic_wasm_get() { + let http_store = HttpBuilder::new() + .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main") + .build() + .unwrap(); + + let _ = http_store + .get_opts( + &Path::parse("LICENSE.txt").unwrap(), + GetOptions { + range: Some(GetRange::Bounded(0..100)), + ..Default::default() + }, + ) + .await + .unwrap(); +} diff --git a/rust/reqwest/.github/FUNDING.yml b/rust/reqwest/.github/FUNDING.yml new file mode 100644 index 0000000000..a6b3376dae --- /dev/null +++ b/rust/reqwest/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [seanmonstar] diff --git a/rust/reqwest/.github/dependabot.yml b/rust/reqwest/.github/dependabot.yml new file mode 100644 index 0000000000..aa6d8d8c17 --- /dev/null +++ b/rust/reqwest/.github/dependabot.yml @@ -0,0 +1,19 @@ +version: 2 + +# Only enable cargo, turn off npm from wasm example +updates: + - package-ecosystem: "github-actions" + # Workflow files stored in the + # default location of `.github/workflows` + directory: "/" + schedule: + interval: "daily" + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "daily" + # todo: if only this worked, see https://github.com/dependabot/dependabot-core/issues/4009 + # only tell us if there's a new 'breaking' change we could upgrade to + # versioning-strategy: increase-if-necessary + # disable regular version updates, security updates are unaffected + open-pull-requests-limit: 0 diff --git a/rust/reqwest/.github/workflows/ci.yml b/rust/reqwest/.github/workflows/ci.yml new file mode 100644 index 0000000000..e8890c836a --- /dev/null +++ b/rust/reqwest/.github/workflows/ci.yml @@ -0,0 +1,369 @@ +name: CI + +on: + pull_request: + push: + branches: + - master + +env: + REQWEST_TEST_BODY_FULL: 1 + RUST_BACKTRACE: 1 + CARGO_INCREMENTAL: 0 + CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse + +jobs: + ci-pass: + name: CI is green + runs-on: ubuntu-latest + needs: + - style + - test + - features + - unstable + - nightly + - msrv + - android + - wasm + - docs + steps: + - run: exit 0 + + style: + name: Check Style + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + + - name: cargo fmt -- --check + run: cargo fmt -- --check + + - name: temporary workaround - fmt all files under src + # Workaround for rust-lang/cargo#7732 + run: cargo fmt -- --check $(find . -name '*.rs' -print) + + test: + name: ${{ matrix.name }} + needs: [style] + + runs-on: ${{ matrix.os || 'ubuntu-latest' }} + + # The build matrix does not yet support 'allow failures' at job level. + # See `jobs.nightly` for the active nightly job definition. + strategy: + matrix: + name: + - linux / stable + - linux / beta + # - linux / nightly + - macOS / stable + - windows / stable-x86_64-msvc + - windows / stable-i686-msvc + - windows / stable-x86_64-gnu + - windows / stable-i686-gnu + - "feat.: default-tls disabled" + - "feat.: rustls-tls" + - "feat.: rustls-tls-manual-roots" + - "feat.: rustls-tls-native-roots" + - "feat.: rustls-tls-no-provider" + - "feat.: native-tls" + - "feat.: default-tls and rustls-tls" + - "feat.: rustls-tls and rustls-tls-no-provider" + - "feat.: cookies" + - "feat.: blocking" + - "feat.: blocking only" + - "feat.: gzip" + - "feat.: brotli" + - "feat.: deflate" + - "feat.: json" + - "feat.: multipart" + - "feat.: stream" + - "feat.: socks/default-tls" + - "feat.: socks/rustls-tls" + - "feat.: hickory-dns" + + include: + - name: linux / stable + - name: linux / beta + rust: beta + # - name: linux / nightly + # rust: nightly + - name: macOS / stable + os: macOS-latest + + - name: windows / stable-x86_64-msvc + os: windows-latest + target: x86_64-pc-windows-msvc + features: "--features blocking,gzip,brotli,zstd,deflate,json,multipart,stream" + - name: windows / stable-i686-msvc + os: windows-latest + target: i686-pc-windows-msvc + features: "--features blocking,gzip,brotli,zstd,deflate,json,multipart,stream" + - name: windows / stable-x86_64-gnu + os: windows-latest + rust: stable-x86_64-pc-windows-gnu + target: x86_64-pc-windows-gnu + features: "--features blocking,gzip,brotli,zstd,deflate,json,multipart,stream" + package_name: mingw-w64-x86_64-gcc + mingw64_path: "C:\\msys64\\mingw64\\bin" + - name: windows / stable-i686-gnu + os: windows-latest + rust: stable-i686-pc-windows-gnu + target: i686-pc-windows-gnu + features: "--features blocking,gzip,brotli,zstd,deflate,json,multipart,stream" + package_name: mingw-w64-i686-gcc + mingw64_path: "C:\\msys64\\mingw32\\bin" + + - name: "feat.: default-tls disabled" + features: "--no-default-features" + - name: "feat.: rustls-tls" + features: "--no-default-features --features rustls-tls" + - name: "feat.: rustls-tls-manual-roots" + features: "--no-default-features --features rustls-tls-manual-roots" + - name: "feat.: rustls-tls-native-roots" + features: "--no-default-features --features rustls-tls-native-roots" + - name: "feat.: rustls-tls-no-provider" + features: "--no-default-features --features rustls-tls-no-provider" + - name: "feat.: native-tls" + features: "--features native-tls" + - name: "feat.: rustls-tls and rustls-tls-no-provider" + features: "--features rustls-tls,rustls-tls-no-provider" + - name: "feat.: default-tls and rustls-tls" + features: "--features rustls-tls" + - name: "feat.: cookies" + features: "--features cookies" + - name: "feat.: blocking" + features: "--features blocking" + - name: "feat.: blocking only" + features: "--no-default-features --features blocking" + - name: "feat.: gzip" + features: "--features gzip,stream" + - name: "feat.: brotli" + features: "--features brotli,stream" + - name: "feat.: zstd" + features: "--features zstd,stream" + - name: "feat.: deflate" + features: "--features deflate,stream" + - name: "feat.: json" + features: "--features json" + - name: "feat.: multipart" + features: "--features multipart" + - name: "feat.: stream" + features: "--features stream" + - name: "feat.: socks/default-tls" + features: "--features socks" + - name: "feat.: socks/rustls-tls" + features: "--features socks,rustls-tls" + - name: "feat.: hickory-dns" + features: "--features hickory-dns" + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust || 'stable' }} + targets: ${{ matrix.target }} + + - name: Add mingw-w64 to path for i686-gnu + run: | + echo "${{ matrix.mingw64_path }}" >> $GITHUB_PATH + echo "C:\msys64\usr\bin" >> $GITHUB_PATH + if: matrix.mingw64_path + shell: bash + + - name: Update gcc + if: matrix.package_name + run: pacman.exe -Sy --noconfirm ${{ matrix.package_name }} + + - name: Create Cargo.lock + run: cargo update + + - uses: Swatinem/rust-cache@v2 + + - uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest + + - name: Run tests + run: | + set -euxo pipefail + cargo nextest run --locked --workspace ${{ matrix.features }} ${{ matrix.test-features }} + cargo test --locked --workspace --doc ${{ matrix.features }} ${{ matrix.test-features }} + shell: bash + + features: + name: features + needs: [style] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install cargo-hack + uses: taiki-e/install-action@cargo-hack + + - uses: Swatinem/rust-cache@v2 + + - name: check --feature-powerset + run: cargo hack --no-dev-deps check --feature-powerset --depth 2 --skip http3,__tls,__rustls,__rustls-ring,native-tls-vendored,trust-dns + env: + RUSTFLAGS: "-D dead_code -D unused_imports" + + unstable: + name: "unstable features" + needs: [style] + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: 'stable' + + - name: Check + run: cargo test --features http3,stream + env: + RUSTFLAGS: --cfg reqwest_unstable + RUSTDOCFLAGS: --cfg reqwest_unstable + + docs: + name: Docs + needs: [test] + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Check documentation + env: + RUSTDOCFLAGS: --cfg reqwest_unstable -D warnings + run: cargo doc --no-deps --document-private-items --all-features + + # Separate build job for nightly because of the missing feature for allowed failures at + # job level. See `jobs.build.strategy.matrix`. + nightly: + name: linux / nightly + needs: [style] + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@nightly + + - name: Check minimal versions + env: + RUSTFLAGS: --cfg reqwest_unstable + # See https://github.com/rust-lang/rust/issues/113152 + # We don't force a newer openssl, but a newer one is required for + # this CI runner, because of the version of Ubuntu. + run: | + cargo clean + cargo update -Z minimal-versions + cargo update -p proc-macro2 --precise 1.0.87 + cargo update -p openssl-sys + cargo update -p openssl + cargo check + cargo check --all-features + + msrv: + name: MSRV + needs: [style] + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - uses: dtolnay/rust-toolchain@stable + - name: Resolve MSRV aware dependencies + run: cargo update + env: + CARGO_RESOLVER_INCOMPATIBLE_RUST_VERSIONS: fallback + + - name: Get MSRV package metadata + id: metadata + run: cargo metadata --no-deps --format-version 1 | jq -r '"msrv=" + .packages[0].rust_version' >> $GITHUB_OUTPUT + + - name: Install rust (${{ steps.metadata.outputs.msrv }}) + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ steps.metadata.outputs.msrv }} + + - uses: Swatinem/rust-cache@v2 + + - name: Check + run: cargo check + + android: + name: Android + needs: [style] + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@stable + with: + target: aarch64-linux-android + + - name: Build + # disable default-tls feature since cross-compiling openssl is dragons + run: cargo build --target aarch64-linux-android --no-default-features + + wasm: + name: WASM + needs: [style] + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + + - name: Install rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + + - name: Check + run: cargo check --target wasm32-unknown-unknown + + - name: Check cookies + run: cargo check --target wasm32-unknown-unknown --features cookies + + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - name: Wasm-pack test firefox + run: wasm-pack test --headless --firefox + + - name: Wasm-pack test chrome + run: wasm-pack test --headless --chrome diff --git a/rust/reqwest/.gitignore b/rust/reqwest/.gitignore new file mode 100644 index 0000000000..a57891807f --- /dev/null +++ b/rust/reqwest/.gitignore @@ -0,0 +1,4 @@ +target +Cargo.lock +*.swp +.idea \ No newline at end of file diff --git a/rust/reqwest/CHANGELOG.md b/rust/reqwest/CHANGELOG.md new file mode 100644 index 0000000000..b98bf90550 --- /dev/null +++ b/rust/reqwest/CHANGELOG.md @@ -0,0 +1,957 @@ +## v0.12.24 + +- Refactor cookie handling to an internal middleware. +- Refactor internal random generator. +- Refactor base64 encoding to reduce a copy. +- Documentation updates. + +## v0.12.23 + +- Add `ClientBuilder::unix_socket(path)` option that will force all requests over that Unix Domain Socket. +- Add `ClientBuilder::retry(policy)` and `reqwest::retry::Builder` to configure automatic retries. +- Add `ClientBuilder::dns_resolver2()` with more ergonomic argument bounds, allowing more resolver implementations. +- Add `http3_*` options to `blocking::ClientBuilder`. +- Fix default TCP timeout values to enabled and faster. +- Fix SOCKS proxies to default to port 1080 +- (wasm) Add cache methods to `RequestBuilder`. + +## v0.12.22 + +- Fix socks proxies when resolving IPv6 destinations. + +## v0.12.21 + +- Fix socks proxy to use `socks4a://` instead of `socks4h://`. +- Fix `Error::is_timeout()` to check for hyper and IO timeouts too. +- Fix request `Error` to again include URLs when possible. +- Fix socks connect error to include more context. +- (wasm) implement `Default` for `Body`. + +## v0.12.20 + +- Add `ClientBuilder::tcp_user_timeout(Duration)` option to set `TCP_USER_TIMEOUT`. +- Fix proxy headers only using the first matched proxy. +- (wasm) Fix re-adding `Error::is_status()`. + +## v0.12.19 + +- Fix redirect that changes the method to GET should remove payload headers. +- Fix redirect to only check the next scheme if the policy action is to follow. +- (wasm) Fix compilation error if `cookies` feature is enabled (by the way, it's a noop feature in wasm). + +## v0.12.18 + +- Fix compilation when `socks` enabled without TLS. + +## v0.12.17 + +- Fix compilation on macOS. + +## v0.12.16 + +- Add `ClientBuilder::http3_congestion_bbr()` to enable BBR congestion control. +- Add `ClientBuilder::http3_send_grease()` to configure whether to send use QUIC grease. +- Add `ClientBuilder::http3_max_field_section_size()` to configure the maximum response headers. +- Add `ClientBuilder::tcp_keepalive_interval()` to configure TCP probe interval. +- Add `ClientBuilder::tcp_keepalive_retries()` to configure TCP probe count. +- Add `Proxy::headers()` to add extra headers that should be sent to a proxy. +- Fix `redirect::Policy::limit()` which had an off-by-1 error, allowing 1 more redirect than specified. +- Fix HTTP/3 to support streaming request bodies. +- (wasm) Fix null bodies when calling `Response::bytes_stream()`. + +## v0.12.15 + +- Fix Windows to support both `ProxyOverride` and `NO_PROXY`. +- Fix http3 to support streaming response bodies. +- Fix http3 dependency from public API misuse. + +## v0.12.14 + +- Fix missing `fetch_mode_no_cors()`, marking as deprecated when not on WASM. + +## v0.12.13 + +- Add `Form::into_reader()` for blocking `multipart` forms. +- Add `Form::into_stream()` for async `multipart` forms. +- Add support for SOCKS4a proxies. +- Fix decoding responses with multiple zstd frames. +- Fix `RequestBuilder::form()` from overwriting a previously set `Content-Type` header, like the other builder methods. +- Fix cloning of request timeout in `blocking::Request`. +- Fix http3 synchronization of connection creation, reducing unneccesary extra connections. +- Fix Windows system proxy to use `ProxyOverride` as a `NO_PROXY` value. +- Fix blocking read to correctly reserve and zero read buffer. +- (wasm) Add support for request timeouts. +- (wasm) Fix `Error::is_timeout()` to return true when from a request timeout. + +## v0.12.12 + +- (wasm) Fix compilation by not compiler `tokio/time` on WASM. + +## v0.12.11 + +- Fix decompression returning an error when HTTP/2 ends with an empty data frame. + +## v0.12.10 + +- Add `ClientBuilder::connector_layer()` to allow customizing the connector stack. +- Add `ClientBuilder::http2_max_header_list_size()` option. +- Fix propagating body size hint (`content-length`) information when wrapping bodies. +- Fix decompression of chunked bodies so the connections can be reused more often. + +## v0.12.9 + +- Add `tls::CertificateRevocationLists` support. +- Add crate features to enable webpki roots without selecting a rustls provider. +- Fix `connection_verbose()` to output read logs. +- Fix `multipart::Part::file()` to automatically include content-length. +- Fix proxy to internally no longer cache system proxy settings. + +## v0.12.8 + +- Add support for SOCKS4 proxies. +- Add `multipart::Form::file()` method for adding files easily. +- Add `Body::wrap()` to wrap any `http_body::Body` type. +- Fix the pool configuration to use a timer to remove expired connections. + + +## v0.12.7 + +- Revert adding `impl Service>` for `Client`. + +## v0.12.6 + +- Add support for `danger_accept_invalid_hostnames` for `rustls`. +- Add `impl Service>` for `Client` and `&'_ Client`. +- Add support for `!Sync` bodies in `Body::wrap_stream()`. +- Enable happy eyeballs when `hickory-dns` is used. +- Fix `Proxy` so that `HTTP(S)_PROXY` values take precedence over `ALL_PROXY`. +- Fix `blocking::RequestBuilder::header()` from unsetting `sensitive` on passed header values. + +## v0.12.5 + +- Add `blocking::ClientBuilder::dns_resolver()` method to change DNS resolver in blocking client. +- Add `http3` feature back, still requiring `reqwest_unstable`. +- Add `rustls-tls-no-provider` Cargo feature to use rustls without a crypto provider. +- Fix `Accept-Encoding` header combinations. +- Fix http3 resolving IPv6 addresses. +- Internal: upgrade to rustls 0.23. + +## v0.12.4 + +- Add `zstd` support, enabled with `zstd` Cargo feature. +- Add `ClientBuilder::read_timeout(Duration)`, which applies the duration for each read operation. The timeout resets after a successful read. + +## v0.12.3 + +- Add `FromStr` for `dns::Name`. +- Add `ClientBuilder::built_in_webpki_certs(bool)` to enable them separately. +- Add `ClientBuilder::built_in_native_certs(bool)` to enable them separately. +- Fix sending `content-length: 0` for GET requests. +- Fix response body `content_length()` to return value when timeout is configured. +- Fix `ClientBuilder::resolve()` to use lowercase domain names. + +## v0.12.2 + +- Fix missing ALPN when connecting to socks5 proxy with rustls. +- Fix TLS version limits with rustls. +- Fix not detected ALPN h2 from server with native-tls. + +## v0.12.1 + +- Fix `ClientBuilder::interface()` when no TLS is enabled. +- Fix `TlsInfo::peer_certificate()` being truncated with rustls. +- Fix panic if `http2` feature disabled but TLS negotiated h2 in ALPN. +- Fix `Display` for `Error` to not include its source error. + +# v0.12.0 + +- Upgrade to `hyper`, `http`, and `http-body` v1. +- Add better support for converting to and from `http::Request` and `http::Response`. +- Add `http2` optional cargo feature, default on. +- Add `charset` optional cargo feature, default on. +- Add `macos-system-configuration` cargo feature, default on. +- Change all optional dependencies to no longer be exposed as implicit features. +- Add `ClientBuilder::interface(str)` to specify the local interface to bind to. +- Experimental: disables the `http3` feature temporarily. + +## v0.11.27 + +- Add `hickory-dns` feature, deprecating `trust-dns`. +- (wasm) Fix `Form::text()` to not set octet-stream for plain text fields. + +## v0.11.26 + +- Revert `system-configuration` upgrade, which broke MSRV on macOS. + +## v0.11.25 + +- Fix `Certificate::from_pem_bundle()` parsing. +- Fix Apple linker errors from detecting system proxies. + +## v0.11.24 + +- Add `Certificate::from_pem_bundle()` to add a bundle. +- Add `http3_prior_knowledge()` to blocking client builder. +- Remove `Sync` bounds requirement for `Body::wrap_stream()`. +- Fix HTTP/2 to retry `REFUSED_STREAM` requests. +- Fix instances of converting `Url` to `Uri` that could panic. + +## v0.11.23 + +- Add `Proxy::custom_http_auth(val)` for setting the raw `Proxy-Authorization` header when connecting to proxies. +- Fix redirect to reject locations that are not `http://` or `https://`. +- Fix setting `nodelay` when TLS is enabled but URL is HTTP. +- (wasm) Add `ClientBuilder::user_agent(val)`. +- (wasm) add `multipart::Form::headers(headers)`. + +## v0.11.22 + +- Fix compilation on Windows when `trust-dns` is enabled. + +## v0.11.21 + +- Add automatically detecting macOS proxy settings. +- Add `ClientBuilder::tls_info(bool)`, which will put `tls::TlsInfo` into the response extensions. +- Fix trust-dns resolver from possible hangs. +- Fix connect timeout to be split among multiple IP addresses. + +## v0.11.20 + +- Fix `deflate` decompression back to using zlib, as outlined in the spec. + +## v0.11.19 + +- Add `ClientBuilder::http1_ignore_invalid_headers_in_responses()` option. +- Add `ClientBuilder::http1_allow_spaces_after_header_name_in_responses()` option. +- Add support for `ALL_PROXY` environment variable. +- Add support for `use_preconfigured_tls` when combined with HTTP/3. +- Fix `deflate` decompression from using the zlib decoder. +- Fix `Response::{text, text_with_charset}()` to strip BOM characters. +- Fix a panic when HTTP/3 is used if UDP isn't able to connect. +- Fix some dependencies for HTTP/3. +- Increase MSRV to 1.63. + +## v0.11.18 + +- Fix `RequestBuilder::json()` method from overriding a previously set `content-type` header. An existing value will be left in place. +- Upgrade internal dependencies for rustls and compression. + +## v0.11.17 + +- Upgrade internal dependencies of Experimental HTTP/3 to use quinn v0.9 +- (wasm) Fix blob url support + +## v0.11.16 + +- Chore: set MSRV in `Cargo.toml`. +- Docs: fix build on docs.rs + +## v0.11.15 + +- Add `RequestBuilder` methods to split and reconstruct from its parts. +- Add experimental HTTP/3 support. +- Fix `connection_verbose` to log `write_vectored` calls. +- (wasm) Make requests actually cancel if the future is dropped. + +## v0.11.14 + +- Adds `Proxy::no_proxy(url)` that works like the NO_PROXY environment variable. +- Adds `multipart::Part::headers(headers)` method to add custom headers. +- (wasm) Add `Response::bytes_stream()`. +- Perf: several internal optimizations reducing copies and memory allocations. + +## v0.11.13 + +- Add `ClientBuilder::dns_resolver()` option for custom DNS resolvers. +- Add `ClientBuilder::tls_sni(bool)` option to enable or disable TLS Server Name Indication. +- Add `Identity::from_pkcs8_pem()` constructor when using `native-tls`. +- Fix `redirect::Policy::limited(0)` from following any redirects. + +## v0.11.12 + +- Add `ClientBuilder::resolve_to_addrs()` which allows a slice of IP addresses to be specified for a single host. +- Add `Response::upgrade()` to await whether the server agrees to an HTTP upgrade. + +## v0.11.11 + +- Add HTTP/2 keep-alive configuration methods on `ClientBuilder`. +- Add `ClientBuilder::http1_allow_obsolete_multiline_headers_in_responses()`. +- Add `impl Service` for `Client` and `&'_ Client`. +- (wasm) Add `RequestBuilder::basic_auth()`. +- Fix `RequestBuilder::header` to not override `sensitive` if user explicitly set on a `HeaderValue`. +- Fix rustls parsing of elliptic curve private keys. +- Fix Proxy URL parsing of some invalid targets. + +## v0.11.10 + +- Add `Error::url()` to access the URL of an error. +- Add `Response::extensions()` to access the `http::Extensions` of a response. +- Fix `rustls-native-certs` to log an error instead of panicking when loading an invalid system certificate. +- Fix passing Basic Authorization header to proxies. + +## v0.11.9 + +- Add `ClientBuilder::http09_responses(bool)` option to allow receiving HTTP/0.9 responses. +- Fix HTTP/2 to retry requests interrupted by an HTTP/2 graceful shutdown. +- Fix proxy loading from environment variables to ignore empty values. + +## v0.11.8 + +- Update internal webpki-roots dependency. + +## v0.11.7 + +- Add `blocking::ClientBuilder::resolve()` option, matching the async builder. +- Implement `From` for `Body`. +- Fix `blocking` request-scoped timeout applying to bodies as well. +- (wasm) Fix request bodies using multipart vs formdata. +- Update internal `rustls` to 0.20. + +## v0.11.6 + +- (wasm) Fix request bodies more. + +## v0.11.5 + +- Add `ClientBuilder::http1_only()` method. +- Add `tls::Version` type, and `ClientBuilder::min_tls_version()` and `ClientBuilder::max_tls_version()` methods. +- Implement `TryFrom` for `http::Request`. +- Implement `Clone` for `Identity`. +- Fix `NO_PROXY`environment variable parsing to more closely match curl's. Comma-separated entries are now trimmed for whitespace, and `*` is allowed to match everything. +- Fix redirection to respect `https_only` option. +- (wasm) Add `Body::as_bytes()` method. +- (wasm) Fix sometimes wrong conversation of bytes into a `JsValue`. +- (wasm) Avoid dependency on serde-serialize feature. + +## v0.11.4 + +- Add `ClientBuilder::resolve()` option to override DNS resolution for specific domains. +- Add `native-tls-alpn` Cargo feature to use ALPN with the native-tls backend. +- Add `ClientBuilder::deflate()` option and `deflate` Cargo feature to support decoding response bodies using deflate. +- Add `RequestBuilder::version()` to allow setting the HTTP version of a request. +- Fix allowing "invalid" certificates with the `rustls-tls` backend, when the server uses TLS v1.2 or v1.3. +- (wasm) Add `try_clone` to `Request` and `RequestBuilder` + +## v0.11.3 + +- Add `impl From for reqwest::Body`. +- (wasm) Add credentials mode methods to `RequestBuilder`. + +## v0.11.2 + +- Add `CookieStore` trait to customize the type that stores and retrieves cookies for a session. +- Add `cookie::Jar` as a default `CookieStore`, easing creating some session cookies before creating the `Client`. +- Add `ClientBuilder::http2_adaptive_window()` option to configure an adaptive HTTP2 flow control behavior. +- Add `ClientBuilder::http2_max_frame_size()` option to adjust the maximum HTTP2 frame size that can be received. +- Implement `IntoUrl` for `String`, making it more convenient to create requests with `format!`. + +## v0.11.1 + +- Add `ClientBuilder::tls_built_in_root_certs()` option to disable built-in root certificates. +- Fix `rustls-tls` glue to more often support ALPN to upgrade to HTTP/2. +- Fix proxy parsing to assume `http://` if no scheme is found. +- Fix connection pool idle reaping by enabling hyper's `runtime` feature. +- (wasm) Add `Request::new()` constructor. + +# v0.11.0 + +- Change `multipart` to be an optional cargo feature. +- Remove deprecated methods. + +- Update to Tokio v1.0. +- Update to Bytes v1.0. +- Update to hyper v0.14. + +## v0.10.10 + +- Add `tcp_keepalive` option to `blocking::ClientBuilder`. +- Add `multipart::Part::stream_with_length` constructor, to create a streaming part with a known length. +- Add `ClientBuilder::https_only` option, to allow requiring URLs to be `https`. +- Change default `tcp_keepalive` value to be disabled. + +## v0.10.9 + +- Add `rustls-tls-native-roots`, `rustls-tls-webpki-roots`, and `rustls-tls-manual-roots` Cargo features, to configure which certificate roots to use with rustls. +- Add `ClientBuilder::tcp_keepalive()` method to enable TCP keepalive. +- Add `ClientBuilder::http1_writev()` method to force enable or disable vectored writes. +- Add `Error::is_connect()` method to identify if the error is related to connection-establishment. +- Add `blocking::ClientBuilder::brotli()` method. +- Windows: Update default protocol to HTTP for HTTPS system proxies, when a protocol is not specified. +- (wasm) Add support for Cloudflare workers runtime. +- (wasm) Add `ClientBuilder::default_headers()` method. +- (wasm) Add `RequestBuilder::build()` method. + +## v0.10.8 + +- Add `must_use` to `RequestBuilder` and `ClientBuilder`. +- Fix Windows system proxy detection of Fiddler proxies. +- (wasm) Add `headers` method to `RequestBuilder`. +- (wasm) Add `execute` method to `Client`. +- (wasm) Add `TryFrom` for `Request`. +- (wasm) Fix checking for global `window` to work in non-browser environments. +- (wasm) Fix sending of an empty body when not required. + +## v0.10.7 + +- Add `NO_PROXY` environment variable support. +- Add more `Error::{is_request, is_body, is_decode}` getters. +- Add conversion of `reqwest::ClientBuilder` to `reqwest::blocking::ClientBuilder`. +- Add `headers_mut()` to `reqwest::blocking::Response`. +- (wasm) Add `form()`, `query()`, `multipart` and `bearer_auth()` to `RequestBuilder`. + +## v0.10.6 + +- Changed handling of URLs that don't have `http:` or `https:` schemes, returning an error instead. +- Fixed a potential hyper-rustls feature conflict. + +## v0.10.5 + +- Add `ClientBuilder::pool_idle_timeout` option. +- Add `ClientBuilder::pool_max_idle_per_host` option, deprecate `max_idle_per_host`. +- Add `Response::content_length` for WASM target. +- Enable TCP_NODELAY by default. +- Implement `TryFrom` for `blocking::Request`. +- Implement `TryFrom` for `Request`. + - Removes `From` for `Request`. + - This is technically a breaking change, but was a mistake. It was not valid to convert from an `http::Request` to a `reqwest::Request` in an infallible fashion. It would panic if the conversion was not possible. Instead, the implementation has been changed to `TryFrom` to indicate it could fail. + +## v0.10.4 + +- Add `trust-dns` optional feature to change DNS resolver. +- Add `bytes()` method to `reqwest::blocking::Response`. +- Add `buffer()` method to `reqwest::blocking::Body`. +- Implement `From` for `reqwest::Request`. + +## v0.10.3 + +- Upgrade internal `rustls` version. + +## v0.10.2 + +- Add Brotli support, enabled with the optional `brotli` feature. +- Add `Client::use_preconfigured_tls(tls_connector)` allowing manual configuration of TLS options. +- Implement `Default` for blocking `Client`, `ClientBuilder`, and `multipart::Form`. +- (wasm) Add `Response::error_for_status()` method. +- (wasm) Add `Response::json()` method. +- (wasm) Implement `Default` for `Client` and `ClientBuilder`. + +## v0.10.1 + +- Add `socks` optional feature to support SOCKS5 proxies. +- Add `RequestBuilder::timeout()` to configure a timeout for a single request, instead of using the client's timeout. +- Add `ClientBuilder::connection_verbose()` option to enable verbose IO logs. +- (wasm) Add `RequestBuilder::fetch_mode_no_cors()` option. +- (wasm) Add `Response::url()` getter method. + +# v0.10.0 + +- Add `std::future::Future` support. +- Add `wasm32-unknown-unknown` support (with fewer features). +- Add ability to pass async `Response` as the `body` of another `Request`. +- Add `Body::as_bytes()` method. +- Add `Response::bytes_stream()` method to get body as an `impl Stream`. +- Add `Request::try_clone()` method. + +- Change default `Client` API to async. The previous blocking client API is available at `reqwest::blocking`. +- Change to no longer send a default `User-Agent` header. Add one via `ClientBuilder::user_agent()`. +- Change to enable system/environment proxy detection by default. +- Change `default-tls` feature to only include `ClientBuilder` options that both `native-tls` and `rustls` support. +- Change default feature set to reduce unnecessary dependencies. Most features are disabled by default: + - `blocking`: The `reqwest::blocking` (synchronous) client API. + - `cookies`: Cookie store support. + - `gzip`: Automatic response body decompression. + - `json`: Request and response JSON body methods. + - `stream`: `futures::Stream` support. +- Change `Error` internal design, removing several `Error::is_*` inspector methods. +- Change Redirect API: + - Renamed types to be part of the `redirect` module (for example, `reqwest::RedirectPolicy` is now `reqwest::redirect::Policy`). + - Removed `loop_detected` and `too_many_redirect` methods from `redirect::Attempt`, replaced with a generic `error` method. + - The default policy no longer specifically looks for redirect loops (but they should be caught by the maximum limit). + +- Fix checking `HTTP_PROXY` environment variable if it the environment is from a CGI script. +- Fix removal of username/password of parsed proxy URL. + +- Update `url` to v2.0. +- Update `hyper` to v0.13. +- Update `http` to v0.2. + + +## v0.9.19 + +- Add `ClientBuilder::use_sys_proxy()` to enable automatic detect of HTTP proxies configured on the system. +- Add `ClientBuilder::no_proxy()` to disable system proxies. This is the default for 0.9, but will change to detecting system proxies by default in 0.10. +- Add support for streaming request bodies in the async client. +- Add `async::Response::text()` that returns a `Future` of the full body decoded to a `String`. +- Add `Clone` for `Certificate`. + +## v0.9.18 + +- Fix `Cookie` headers to no longer send as percent-encoded (instead, exactly as sent by the server). + +## v0.9.17 + +- Fix `Cookie` headers to not include attributes from the `Set-Cookie` (like `HttpOnly`, `Secure`, etc.) + +## v0.9.16 + +- Add `Response::text_with_charset()` to allow setting the default charset to decode. +- Add `Error::source()` implementation. +- Add `async::ClientBuilder::timeout()` option, will timeout the connect, request, and response body futures. +- Fix gzip + chunked transfer encoding issue preventing connection reuse. +- Fix `RequestBuilder::query()` to not add just `"?"` if the encoded query is empty. +- Fix including new cookie headers when response is a redirect. + +## v0.9.15 + +- Fix sending of "appended" request headers. + +## v0.9.14 + +- Add optional support for SOCKS5 proxies, by enabling the `socks5` cargo feature. +- Add Cookie Store support to `Client`, automatically handling cookies for a session. +- Add `ClientBuilder::cookie_store(enable: bool)` method to enable a cookie store that persists across requests. +- Add `Response::cookies()` accessor that allows iterating over response cookies. +- Fix `Proxy` to check the URL for a username and password. + +## v0.9.13 + +### Fixes + +- Fix panic on some invalid `Location` headers during redirects (error is logged and redirect response is returned instead). +- Fix instance when server notices streaming request body is complete before reqwest does. + +## v0.9.12 + +### Features + +- Add `ClientBuilder::tcp_nodelay()` to allow disabling Nagle's algorithm. +- Add `ClientBuilder::max_idle_per_host()` to allow reducing the number of idle pooled connections. +- Add `RequestBuilder::bearer_auth()` method to async builder. + +### Fixes + +- Fix capitalization error in async `RequestBuilder::basic_auth()`. +- Fix ALPN causing issues when using a Proxy. + +## v0.9.11 + +### Features + +- Add `multipart::Form::percent_encode_noop()` to allow for servers which don't support percent encoding of parameters. +- Add `ClientBuilder::http1_title_case_headers()` to force request headers to use Title-Case. +- Add `ClientBuilder::connect_timeout()` to allow setting only a connect timeout. + +## v0.9.10 + +### Features + +- Add `ClientBuilder::local_address()` to bind to a local IP address. +- Add `Response::error_for_status_ref()` to return an `Error` while borrowing a `Response`. + +### Fixes + +- Fix `Identity::from_pem` with `rustls-tls` backend when using RSA private keys. + +## v0.9.9 + +### Features + +- Add `ClientBuilder::h2_prior_knowledge()` option to force HTTP2. +- Add `Response::content_length()` to get the content-length of a response. +- Enable ALPN h2 with the rustls-tls backend. + +## v0.9.8 + +### Fixes + +- Revert default DNS resolver to `getaddrinfo` in a threadpool. There is now a `trust-dns` optional feature to enable the Trust-DNS resolver. +- Detect `Certificate` and `Identity` errors at construction time. + +## v0.9.7 + +### Fixes + +- Fix DNS resolver on Android (reverted back to `getaddrinfo`). +- Fix sending unicode `filename`s in `multipart/form-data` requests. + +## v0.9.6 + +### Features + +- Add `Proxy::basic_auth` method to support proxy authorization. +- Add `rustls-tls` optional feature to use rustls instead of native-tls. +- Add `try_clone` method to `Request` and `RequestBuilder`. +- Add `reqwest::async::multipart` support, similar to the synchronous API. +- Adds `default-tls-vendored` optional feature to vendor OpenSSL. + +### Fixes + +- Fix panic from top-level `reqwest::get` if client builder fails to build. +- Removed timeout waiting for `reqwest::Client` runtime to startup. +- Fix `RequestBuilder::headers` to properly append extra headers of the same name. + + +### Performance + +- Replaced DNS threadpool using `getaddrinfo` with a non-blocking DNS resolver. + +## v0.9.5 + +### Features + +- Adds `Response::remote_addr()` method to check the address of the connection used. +- Adds `default-tls` crate feature, enabled by default, which allows users to *disable* TLS. + +## v0.9.4 + +### Features + +- Adds `percent_encoding_path_segment` and `percent_encoding_attr_char` configuration to `multipart::Form`. + +### Fixes + +- Reverts `multipart::Form` default percent encoding format to `path-segment`. + +## v0.9.3 + +### Features + +- Adds `multipart::Part::bytes()` to create a part of raw bytes. +- Adds constructors for `Response` to help with testing. + +### Fixes + +- Properly percent-encoding more illegal characters in multipart filenames. +- Ensure timed out requests cancel the associated async task. + +## v0.9.2 + +### Fixes + +- Fix panic when `Location` header has UTF-8 characters. + +## v0.9.1 + +### Fixes + +- Fix large request bodies failing because of improper handling of backpressure. +- Remove body-related headers when redirect changes a `POST` into a `GET`. +- Reduce memory size of `Response` and `Error` significantly. + +# v0.9.0 + +### Features + +- Upgrade to `tokio` 0.1. +- Upgrade to `hyper` 0.12. +- Upgrade to `native-tls` 0.2. +- Add `ClientBuilder::danger_accept_invalid_certs(bool)` to disable + certificate verification. +- Add `RequestBuilder::bearer_auth(token)` to ease sending bearer tokens. +- Add `headers()` and `headers_mut()` to `multipart::Part` to allow sending + extra headers for a specific part. +- Moved `request::unstable::async` to `reqwest::async`. + +### Fixes + +- Fix panicking when passing a `Url` with a `file://` scheme. Instead, an + `Error` is returned. + +### Breaking Changes + +- Changed `ClientBuilder::danger_disable_hostname_verification()` + to `ClientBuilder::danger_accept_invalid_hostnames(bool)`. +- Changed `ClientBuilder` to be a by-value builder instead of by-ref. + + For single chains of method calls, this shouldn't affect you. For code that + conditionally uses the builder, this kind of change is needed: + + ```rust + // Old + let mut builder = ClientBuilder::new(); + if some_val { + builder.gzip(false); + } + let client = builder.build()?; + + // New + let mut builder = ClientBuilder::new(); + if some_val { + builder = builder.gzip(false); + } + let client = builder.build()?; + ``` +- Changed `RequestBuilder` to be a by-value builder of by-ref. + + See the previous note about `ClientBuilder` for affected code and + how to change it. +- Removed the `unstable` cargo-feature, and moved `reqwest::unstable::async` + to `reqwest::async`. +- Changed `multipart::Part::mime()` to `mime_str()`. + + ```rust + // Old + let part = multipart::Part::file(path)? + .mime(mime::TEXT_PLAIN); + + // New + let part = multipart::Part::file(path)? + .mime_str("text/plain")?; + ``` +- The upgrade to `hyper` 0.12 means a temporary removal of the typed headers. + + The `RequestBuilder` has simple methods to set headers using strings, which + can work in most places. + + ```rust + // Old + client + .get("https://hyper.rs") + .header(UserAgent::new("hallo")) + .send()?; + + // New + client + .get("https://hyper.rs") + .header("user-agent", "hallo") + .send()?; + ``` + + To ease the transition, there is a `hyper-011` cargo-feature that can be + enabled. + + ```toml + [dependencies] + reqwest = { version = "0.9", features = ["hyper-011"] } + ``` + + And then usage: + + ```rust + client + .get("https://hyper.rs") + .header_011(reqwest::hyper_011::header::UserAgent::new("hallo")) + .send()?; + ``` + + +## v0.8.8 + +- Fix docs.rs/reqwest build. + +## v0.8.7 + +### Fixes + +- Send an extra CRLF at the end of multipart requests, since some servers expect it. +- Removed internal dependency on `tokio-proto`, which removed unsafe `small-vec` + dependency. + +## v0.8.6 + +### Features + +- Add `RedirectAttempt::status` to check status code that triggered redirect. +- Add `RedirectPolicy::redirect` method publicly, to allow composing policies. + +## v0.8.5 + +### Features + +- Try to auto-detect encoding in `Response::text()`. +- Add `Certificate::from_pem` to load PEM encoded client certificates. +- Allow unsized types in `query`, `form`, and `json`. +- Add `unstable::async::RequestBuilder::query`, mirroring the stable builder method. + +## v0.8.4 + +### Features + +- Add `RequestBuilder::query` to easily adjust query parameters of requests. + +## v0.8.3 + +### Features + +- Upgrades internal log crate usage to v0.4 + +## v0.8.2 + +### Fixes + +- Enable hyper's `no_proto` config, fixing several bugs in hyper. + +## v0.8.1 + +### Features + +- Add `ClientBuilder::default_headers` to set headers used for every request. +- Add `async::ClientBuilder::dns_threads` to set number of threads use for DNS. +- Add `Response::text` as shortcut to read the full body into a `String`. +- Add `Response::copy_to` as shortcut for `std::io::copy`. + +# v0.8.0 + +### Features + +- Client TLS Certificates (#43) +- GZIP decoding has been added to the **async** Client (#161) +- `ClientBuilder` and `RequestBuilder` hold their errors till consumed (#189) +- `async::Response::body()` now returns a reference to the body instead of consuming the `Response` +- A default timeout for `reqwest::Client` is used set to 30 seconds (#181) + +### Breaking Changes + +- `Client::new` no longer returns a `Result`. + + To handle any panics that come from `Client::new`, the builder can be used instead. +- `ClientBuilder` and `RequestBuilder` hold their errors till consumed (#189). + + This means a bunch of `?` will be going away, but means using the builders will be far easier now. Any error encountered inside the builders will now be returned when the builder is consumed. + + To get errors back immediately, the `Request` type can be used directly, by building pieces separately and calling setters. +- `async::Response::body()` now returns a reference to the body instead of consuming the `Response`. +- A default timeout for `reqwest::Client` is used set to 30 seconds (#181) + + For uses where the timeout is too short, it can be changed on the `ClientBuilder`, using the `timeout` method. Passing `None` will disable the timeout, reverting to the pre-0.8 behavior. + +## v0.7.3 + +### Features + +- `Proxy::custom(fn)` to allow dynamically picking a proxy URL + +### Fixes + +- fix occasional panic when program exits while `Client` or `Response` are dropping. + +## v0.7.2 + +### Fixes + +- fix a panic when redirecting and a `Authorization` header was added (https://github.com/seanmonstar/reqwest/commit/cf246d072badd9b31b487e7a0b00490e4cc9584f) +- fix redirects so that a GET will follow 307/308 responses (https://github.com/seanmonstar/reqwest/commit/2d11a4bd7167e1bf3a35b62f5aeb36d5d294e56e) + +## v0.7.1 + +### Fixes + +- fix remove accidental `println`s in the sending of a body +- some documentation improvements + +# v0.7.0 + +### Features + +- Proxy support (#30) +- Self-signed TLS certificates (#97) +- Disabling TLS hostname validation (#89) +- A `Request` type that can be used instead of the `RequestBuilder` (#85) +- Add `Response::error_for_status()` to easily convert 400 and 500 status responses into an `Error` (#98) +- Upgrade hyper to 0.11 + - Synchronous `Client` remains. + - Timeouts now affect DNS and socket connection. + - Pool much better at evicting sockets when they die. + - An `unstable` Cargo feature to enable `reqwest::unstable::async`. +- A huge docs improvement! + +### Fixes + +- Publicly exports `RedirectAction` and `RedirectAttempt` +- `Error::get_ref` returns `Error + Send + Sync` + +### Breaking Changes + +- hyper has been upgraded to 0.11, so `header`, `StatusCode`, and `Method` have breaking changes. +- `mime` has been upgraded to 0.3, with a very different API. +- All configuration methods have been removed from the `Client`, and moved to the `ClientBuilder`. +- The `HttpVersion` type was completely removed. +- `Error::cause()` now returns `Error::get_ref().cause()`. +- All methods on `Client` that start a `RequestBuilder` now return a `Result` immediately, instead of delaying the URL parse error for later. +- The `RequestBuilder` methods all take `&mut self`, instead of moving the builder, and return `&mut Self`. (This shouldn't actually affect most people who are building a request in a single chain.) +- `Response::status()` returns a `StatusCode` instead of `&StatusCode`. + +## v0.6.2 + +### Features + +- adds `Client::referer(bool)` option to disable setting the `Referer` header during redirects (https://github.com/seanmonstar/reqwest/commit/bafcd7ae6fc232856dd6ddb8bf5b20dbbbfe0bc9) + +### Fixes + +- fixes filtering sensitive headers during redirects (https://github.com/seanmonstar/reqwest/issues/10) +- fixes sending of the Referer to an HTTP site when coming from HTTPS, and removes username and fragment explicitly (https://github.com/seanmonstar/reqwest/commit/d8696045b4c6bc4d9e33789cff6a9e1fa75462d7) +- documentation updates + +## v0.6.1 + +### Features + +- adds `Error::get_ref` to get the underlying error that may have occurred. Includes a `'static` bounds, which allows for downcasting (as opposed to `Error::cause`). + +# v0.6.0 + +### Features + +- Upgraded to serde `1.0` +- Added a `url` [method](https://docs.rs/reqwest/0.6.0/reqwest/struct.Error.html#method.url) to `Error`, which returns a possible associated `Url` that occurred with this error. +- Added `req.basic_auth(user, optional_pass)` [method](https://docs.rs/reqwest/0.6.0/reqwest/struct.RequestBuilder.html#method.basic_auth) to ease using `Basic` authentication. + +### Breaking Changes + +- The publicly exposed peer dependency serde was upgraded. It is now `serde@1.0`. Mismatched version will give a compiler error that a serde trait is not implemented. +- `Error` is no longer an `enum`, but an opaque struct. Details about it can be checked with `std::error::Error::cause()`, and methods on `reqwest::Error` include `is_http()`, `is_serialization()`, and `is_redirect()`. +- `RedirectPolicy::custom` receives different arguments, and returns different values. See the [docs](https://docs.rs/reqwest/0.6.0/reqwest/struct.RedirectPolicy.html#method.custom) for an example. + +## v0.5.2 + +### Fixes + +- fix panic with Gzip decoder on an empty body (https://github.com/seanmonstar/reqwest/issues/82) + +## v0.5.1 + +### Features + +- add `Clone` implementation for `Client` + +# v0.5.0 + +### Features + +- Automatic GZIP decoding: By default, `Client` will try to decode any responses that appear to be gzip encoded (based on headers). This can be disabled via `client.gzip(false)` (https://github.com/seanmonstar/reqwest/commit/ab5e477a123319efd4b17f3666b41b44ec244bee) +- Specify a timeout for requests using `client.timeout(duration)`. (https://github.com/seanmonstar/reqwest/commit/ec049fefbae7355f6e4ddbbc7ebedcadb30e1e04) +- Request bodies with a known length can be constructed with `Body::sized()` (https://github.com/seanmonstar/reqwest/commit/82f1877d4b6cba2fac432670ec306160aee5c501) +- Add `Client.put`, `Client.patch`, and `Client.delete` convenience methods (https://github.com/seanmonstar/reqwest/commit/c37b8aa0338ac4142763d206c6df79856915056d, https://github.com/seanmonstar/reqwest/commit/4d6582d22b23c27927e481a9c8a83ad08cfd1a2a, https://github.com/seanmonstar/reqwest/commit/a3983f3122b2d1495ea36bb5a8fd019a7605ae56) +- Add `reqwest::mime` (https://github.com/seanmonstar/reqwest/commit/0615c6d65e03ba9cb5364169c9e74f4f2a91554b) + +### Breaking Changes + +The only breaking change is a behavioral one, all programs should still compile without modification. The automatic GZIP decoding could interfere in cases where a user was expecting the GZIP bytes, either to save to a file or decode themselves. To restore this functionality, set `client.gzip(false)`. + +# v0.4.0 + +- updated to serde 0.9 + +# v0.3.0 + +- updated to hyper 0.10 + +# v0.2.0 + +### Features +- add `Response.json()` method (https://github.com/seanmonstar/reqwest/commit/2d10ecc99e2aaed66616294baaf65380b446e1c6) +- add `RedirectPolicy` (https://github.com/seanmonstar/reqwest/commit/e92b3e862a1a94c0b4173a7d49a315bc121da31e) +- set an `Accept: */*` header by default if no `Accept` header is set (https://github.com/seanmonstar/reqwest/commit/559ae8011a2c098f4fe1821ec1d3444a46f4bf5e) +- add support for 307 and 308 redirects (https://github.com/seanmonstar/reqwest/commit/a54447c1d9c75dab639333265f51a91a43e99c2e) +- implement `Sync` for `Client`, and `Send` for `RequestBuilder` and `Response` (https://github.com/seanmonstar/reqwest/commit/d18a53b3fcc81c4a60875755c8e95d777a343319) +- implement `Send` for `Error` (https://github.com/seanmonstar/reqwest/commit/20b161096e67d22c962e69b2656ae9741ac73c25) +- implement `std::fmt::Debug` for all public types (https://github.com/seanmonstar/reqwest/commit/d624b0ef29020c6085ec94651a990f58ccd684e2) + +### Breaking Changes +- `Error::Serialize` now has a `Box` instead of `Box` +- `RequestBuilder` no longer has an associated lifetime (was `RequestBuilder<'a>`) + +# v0.1.0 + +Initial release: http://seanmonstar.com/post/153221119046/introducing-reqwest diff --git a/rust/reqwest/Cargo.toml b/rust/reqwest/Cargo.toml new file mode 100644 index 0000000000..93568d4689 --- /dev/null +++ b/rust/reqwest/Cargo.toml @@ -0,0 +1,300 @@ +[package] +name = "reqwest" +version = "0.12.24" +description = "higher level HTTP client library" +keywords = ["http", "request", "client"] +categories = ["web-programming::http-client", "wasm"] +repository = "https://github.com/seanmonstar/reqwest" +documentation = "https://docs.rs/reqwest" +authors = ["Sean McArthur "] +readme = "README.md" +license = "MIT OR Apache-2.0" +edition = "2021" +rust-version = "1.64.0" +autotests = true +include = [ + "README.md", + "Cargo.toml", + "LICENSE-APACHE", + "LICENSE-MIT", + "src/**/*.rs" +] + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs", "--cfg", "reqwest_unstable"] +targets = ["x86_64-unknown-linux-gnu", "wasm32-unknown-unknown"] + +[package.metadata.playground] +features = [ + "blocking", + "cookies", + "json", + "multipart", +] + +[features] +default = ["default-tls", "charset", "http2", "system-proxy"] + +# Note: this doesn't enable the 'native-tls' feature, which adds specific +# functionality for it. +default-tls = ["dep:hyper-tls", "dep:native-tls-crate", "__tls", "dep:tokio-native-tls"] + +http2 = ["h2", "hyper/http2", "hyper-util/http2", "hyper-rustls?/http2"] + +# Enables native-tls specific functionality not available by default. +native-tls = ["default-tls"] +native-tls-alpn = ["native-tls", "native-tls-crate?/alpn", "hyper-tls?/alpn"] +native-tls-vendored = ["native-tls", "native-tls-crate?/vendored"] + +rustls-tls = ["rustls-tls-webpki-roots"] +rustls-tls-no-provider = ["rustls-tls-manual-roots-no-provider"] + +rustls-tls-manual-roots-no-provider = ["__rustls"] +rustls-tls-webpki-roots-no-provider = ["dep:webpki-roots", "hyper-rustls?/webpki-tokio", "__rustls"] +rustls-tls-native-roots-no-provider = ["dep:rustls-native-certs", "hyper-rustls?/native-tokio", "__rustls"] + +rustls-tls-manual-roots = ["rustls-tls-manual-roots-no-provider", "__rustls-ring"] +rustls-tls-webpki-roots = ["rustls-tls-webpki-roots-no-provider", "__rustls-ring"] +rustls-tls-native-roots = ["rustls-tls-native-roots-no-provider", "__rustls-ring"] + +blocking = ["dep:futures-channel", "futures-channel?/sink", "dep:futures-util", "futures-util?/io", "futures-util?/sink", "tokio/sync"] + +charset = ["dep:encoding_rs", "dep:mime"] + +cookies = ["dep:cookie_crate", "dep:cookie_store"] + +gzip = ["dep:async-compression", "async-compression?/gzip", "dep:futures-util", "dep:tokio-util"] + +brotli = ["dep:async-compression", "async-compression?/brotli", "dep:futures-util", "dep:tokio-util"] + +zstd = ["dep:async-compression", "async-compression?/zstd", "dep:futures-util", "dep:tokio-util"] + +deflate = ["dep:async-compression", "async-compression?/zlib", "dep:futures-util", "dep:tokio-util"] + +json = ["dep:serde_json"] + +multipart = ["dep:mime_guess", "dep:futures-util"] + +# Deprecated, remove this feature while bumping minor versions. +trust-dns = [] +hickory-dns = ["dep:hickory-resolver", "dep:once_cell"] + +stream = ["tokio/fs", "dep:futures-util", "dep:tokio-util", "dep:wasm-streams"] + +socks = [] + +# Use the system's proxy configuration. +system-proxy = ["hyper-util/client-proxy-system"] + +# Deprecated, switch to system-proxy. +macos-system-configuration = ["system-proxy"] + +# Experimental HTTP/3 client. +http3 = ["rustls-tls-manual-roots", "dep:h3", "dep:h3-quinn", "dep:quinn", "tokio/macros"] + + +# Internal (PRIVATE!) features used to aid testing. +# Don't rely on these whatsoever. They may disappear at any time. + +# Enables common types used for TLS. Useless on its own. +__tls = ["dep:rustls-pki-types", "tokio/io-util"] + +# Enables common rustls code. +# Equivalent to rustls-tls-manual-roots but shorter :) +__rustls = ["dep:hyper-rustls", "dep:tokio-rustls", "dep:rustls", "__tls"] +__rustls-ring = ["hyper-rustls?/ring", "tokio-rustls?/ring", "rustls?/ring", "quinn?/ring"] + +[dependencies] +base64 = "0.22" +http = "1.1" +url = "2.4" +bytes = "1.2" +serde = "1.0" +serde_urlencoded = "0.7.1" +futures-core = { version = "0.3.28", default-features = false } +futures-util = { version = "0.3.28", default-features = false, optional = true } +sync_wrapper = { version = "1.0", features = ["futures"] } +tracing = { version = "0.1" } + +# Optional deps... + +## json +serde_json = { version = "1.0", optional = true } +## multipart +mime_guess = { version = "2.0", default-features = false, optional = true } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +encoding_rs = { version = "0.8", optional = true } +http-body = "1" +http-body-util = "0.1" +hyper = { version = "1.1", features = ["http1", "client"] } +hyper-util = { version = "0.1.12", features = ["http1", "client", "client-legacy", "client-proxy", "tokio"] } +h2 = { version = "0.4", optional = true } +log = "0.4.17" +percent-encoding = "2.3" +tokio = { version = "1.0", default-features = false, features = ["net", "time"] } +tower = { version = "0.5.2", default-features = false, features = ["retry", "timeout", "util"] } +tower-service = "0.3" +tower-http = { version = "0.6.5", default-features = false, features = ["follow-redirect"] } +pin-project-lite = "0.2.11" + +# Optional deps... +rustls-pki-types = { version = "1.9.0", features = ["std"], optional = true } +mime = { version = "0.3.16", optional = true } + +## default-tls +hyper-tls = { version = "0.6", optional = true } +native-tls-crate = { version = "0.2.10", optional = true, package = "native-tls" } +tokio-native-tls = { version = "0.3.0", optional = true } + +# rustls-tls +hyper-rustls = { version = "0.27.0", default-features = false, optional = true, features = ["http1", "tls12"] } +rustls = { version = "0.23.4", optional = true, default-features = false, features = ["std", "tls12"] } +tokio-rustls = { version = "0.26", optional = true, default-features = false, features = ["tls12"] } +webpki-roots = { version = "1", optional = true } +rustls-native-certs = { version = "0.8.0", optional = true } + +## cookies +cookie_crate = { version = "0.18.0", package = "cookie", optional = true } +cookie_store = { version = "0.21.0", optional = true } + +## compression +async-compression = { version = "0.4.0", default-features = false, features = ["tokio"], optional = true } +tokio-util = { version = "0.7.9", default-features = false, features = ["codec", "io"], optional = true } + +## hickory-dns +hickory-resolver = { version = "0.25", optional = true, features = ["tokio"] } +once_cell = { version = "1.18", optional = true } + +# HTTP/3 experimental support +h3 = { version = "0.0.8", optional = true } +h3-quinn = { version = "0.0.10", optional = true } +quinn = { version = "0.11.1", default-features = false, features = ["rustls", "runtime-tokio"], optional = true } +futures-channel = { version = "0.3", optional = true } + +[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] +env_logger = "0.10" +hyper = { version = "1.1.0", default-features = false, features = ["http1", "http2", "client", "server"] } +hyper-util = { version = "0.1.12", features = ["http1", "http2", "client", "client-legacy", "server-auto", "server-graceful", "tokio"] } +serde = { version = "1.0", features = ["derive"] } +flate2 = "1.0.13" +brotli_crate = { package = "brotli", version = "8" } +zstd_crate = { package = "zstd", version = "0.13" } +doc-comment = "0.3" +tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread"] } +futures-util = { version = "0.3.28", default-features = false, features = ["std", "alloc"] } + +# wasm + +[target.'cfg(target_arch = "wasm32")'.dependencies] +js-sys = "0.3.77" +serde_json = "1.0" +wasm-bindgen = "0.2.89" +wasm-bindgen-futures = "0.4.18" +wasm-streams = { version = "0.4", optional = true } + +[target.'cfg(target_arch = "wasm32")'.dependencies.web-sys] +version = "0.3.28" +features = [ + "AbortController", + "AbortSignal", + "Headers", + "Request", + "RequestInit", + "RequestMode", + "Response", + "Window", + "FormData", + "Blob", + "BlobPropertyBag", + "ServiceWorkerGlobalScope", + "RequestCredentials", + "File", + "ReadableStream", + "RequestCache" +] + +[target.'cfg(target_arch = "wasm32")'.dev-dependencies] +wasm-bindgen = { version = "0.2.89", features = ["serde-serialize"] } +wasm-bindgen-test = "0.3" + +[dev-dependencies] +tower = { version = "0.5.2", default-features = false, features = ["limit"] } +num_cpus = "1.0" +libc = "0" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(reqwest_unstable)'] } + +[[example]] +name = "blocking" +path = "examples/blocking.rs" +required-features = ["blocking"] + +[[example]] +name = "json_dynamic" +path = "examples/json_dynamic.rs" +required-features = ["json"] + +[[example]] +name = "json_typed" +path = "examples/json_typed.rs" +required-features = ["json"] + +[[example]] +name = "tor_socks" +path = "examples/tor_socks.rs" +required-features = ["socks"] + +[[example]] +name = "form" +path = "examples/form.rs" + +[[example]] +name = "simple" +path = "examples/simple.rs" + +[[example]] +name = "h3_simple" +path = "examples/h3_simple.rs" +required-features = ["http3", "rustls-tls"] + +[[example]] +name = "connect_via_lower_priority_tokio_runtime" +path = "examples/connect_via_lower_priority_tokio_runtime.rs" + +[[test]] +name = "blocking" +path = "tests/blocking.rs" +required-features = ["blocking"] + +[[test]] +name = "cookie" +path = "tests/cookie.rs" +required-features = ["cookies"] + +[[test]] +name = "gzip" +path = "tests/gzip.rs" +required-features = ["gzip", "stream"] + +[[test]] +name = "brotli" +path = "tests/brotli.rs" +required-features = ["brotli", "stream"] + +[[test]] +name = "zstd" +path = "tests/zstd.rs" +required-features = ["zstd", "stream"] + +[[test]] +name = "deflate" +path = "tests/deflate.rs" +required-features = ["deflate", "stream"] + +[[test]] +name = "multipart" +path = "tests/multipart.rs" +required-features = ["multipart"] diff --git a/rust/reqwest/LICENSE-APACHE b/rust/reqwest/LICENSE-APACHE new file mode 100644 index 0000000000..348c8e163c --- /dev/null +++ b/rust/reqwest/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright 2016 Sean McArthur + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/rust/reqwest/LICENSE-MIT b/rust/reqwest/LICENSE-MIT new file mode 100644 index 0000000000..e2eb715908 --- /dev/null +++ b/rust/reqwest/LICENSE-MIT @@ -0,0 +1,20 @@ +Copyright (c) 2016-2025 Sean McArthur + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/rust/reqwest/README.md b/rust/reqwest/README.md new file mode 100644 index 0000000000..279fe5f1aa --- /dev/null +++ b/rust/reqwest/README.md @@ -0,0 +1,86 @@ +# reqwest + +[![crates.io](https://img.shields.io/crates/v/reqwest.svg)](https://crates.io/crates/reqwest) +[![Documentation](https://docs.rs/reqwest/badge.svg)](https://docs.rs/reqwest) +[![MIT/Apache-2 licensed](https://img.shields.io/crates/l/reqwest.svg)](./LICENSE-APACHE) +[![CI](https://github.com/seanmonstar/reqwest/actions/workflows/ci.yml/badge.svg)](https://github.com/seanmonstar/reqwest/actions/workflows/ci.yml) + +An ergonomic, batteries-included HTTP Client for Rust. + +- Async and blocking `Client`s +- Plain bodies, JSON, urlencoded, multipart +- Customizable redirect policy +- HTTP Proxies +- HTTPS via system-native TLS (or optionally, rustls) +- Cookie Store +- WASM + + +## Example + +This asynchronous example uses [Tokio](https://tokio.rs) and enables some +optional features, so your `Cargo.toml` could look like this: + +```toml +[dependencies] +reqwest = { version = "0.12", features = ["json"] } +tokio = { version = "1", features = ["full"] } +``` + +And then the code: + +```rust,no_run +use std::collections::HashMap; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let resp = reqwest::get("https://httpbin.org/ip") + .await? + .json::>() + .await?; + println!("{resp:#?}"); + Ok(()) +} +``` + +## Commercial Support + +For private advice, support, reviews, access to the maintainer, and the like, reach out for [commercial support][sponsor]. + +## Requirements + +On Linux: + +- OpenSSL with headers. See https://docs.rs/openssl for supported versions + and more details. Alternatively you can enable the `native-tls-vendored` + feature to compile a copy of OpenSSL. Or, you can use [rustls](https://github.com/rustls/rustls) + via `rustls-tls` or other `rustls-tls-*` features. + +On Windows and macOS: + +- Nothing. + +By default, Reqwest uses [rust-native-tls](https://github.com/sfackler/rust-native-tls), +which will use the operating system TLS framework if available, meaning Windows +and macOS. On Linux, it will use the available OpenSSL or fail to build if +not found. + + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://apache.org/licenses/LICENSE-2.0) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall +be dual licensed as above, without any additional terms or conditions. + +## Sponsors + +Support this project by becoming a [sponsor][]. + +[sponsor]: https://seanmonstar.com/sponsor diff --git a/rust/reqwest/examples/blocking.rs b/rust/reqwest/examples/blocking.rs new file mode 100644 index 0000000000..fb28487571 --- /dev/null +++ b/rust/reqwest/examples/blocking.rs @@ -0,0 +1,31 @@ +//! `cargo run --example blocking --features=blocking` +#![deny(warnings)] + +fn main() -> Result<(), Box> { + env_logger::init(); + + // Some simple CLI args requirements... + let url = match std::env::args().nth(1) { + Some(url) => url, + None => { + println!("No CLI URL provided, using default."); + "https://hyper.rs".into() + } + }; + + eprintln!("Fetching {url:?}..."); + + // reqwest::blocking::get() is a convenience function. + // + // In most cases, you should create/build a reqwest::Client and reuse + // it for all requests. + let mut res = reqwest::blocking::get(url)?; + + eprintln!("Response: {:?} {}", res.version(), res.status()); + eprintln!("Headers: {:#?}\n", res.headers()); + + // copy the response body directly to stdout + res.copy_to(&mut std::io::stdout())?; + + Ok(()) +} diff --git a/rust/reqwest/examples/connect_via_lower_priority_tokio_runtime.rs b/rust/reqwest/examples/connect_via_lower_priority_tokio_runtime.rs new file mode 100644 index 0000000000..33151d4a13 --- /dev/null +++ b/rust/reqwest/examples/connect_via_lower_priority_tokio_runtime.rs @@ -0,0 +1,264 @@ +#![deny(warnings)] +// This example demonstrates how to delegate the connect calls, which contain TLS handshakes, +// to a secondary tokio runtime of lower OS thread priority using a custom tower layer. +// This helps to ensure that long-running futures during handshake crypto operations don't block other I/O futures. +// +// This does introduce overhead of additional threads, channels, extra vtables, etc, +// so it is best suited to services with large numbers of incoming connections or that +// are otherwise very sensitive to any blocking futures. Or, you might want fewer threads +// and/or to use the current_thread runtime. +// +// This is using the `tokio` runtime and certain other dependencies: +// +// `tokio = { version = "1", features = ["full"] }` +// `num_cpus = "1.0"` +// `libc = "0"` +// `pin-project-lite = "0.2"` +// `tower = { version = "0.5", default-features = false}` + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + background_threadpool::init_background_runtime(); + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + + let client = reqwest::Client::builder() + .connector_layer(background_threadpool::BackgroundProcessorLayer::new()) + .build() + .expect("should be able to build reqwest client"); + + let url = if let Some(url) = std::env::args().nth(1) { + url + } else { + println!("No CLI URL provided, using default."); + "https://hyper.rs".into() + }; + + eprintln!("Fetching {url:?}..."); + + let res = client.get(url).send().await?; + + eprintln!("Response: {:?} {}", res.version(), res.status()); + eprintln!("Headers: {:#?}\n", res.headers()); + + let body = res.text().await?; + + println!("{body}"); + + Ok(()) +} + +// separating out for convenience to avoid a million #[cfg(not(target_arch = "wasm32"))] +#[cfg(not(target_arch = "wasm32"))] +mod background_threadpool { + use std::{ + future::Future, + pin::Pin, + sync::OnceLock, + task::{Context, Poll}, + }; + + use futures_util::TryFutureExt; + use pin_project_lite::pin_project; + use tokio::{runtime::Handle, select, sync::mpsc::error::TrySendError}; + use tower::{BoxError, Layer, Service}; + + static CPU_HEAVY_THREAD_POOL: OnceLock< + tokio::sync::mpsc::Sender + Send + 'static>>>, + > = OnceLock::new(); + + pub(crate) fn init_background_runtime() { + std::thread::Builder::new() + .name("cpu-heavy-background-threadpool".to_string()) + .spawn(move || { + let rt = tokio::runtime::Builder::new_multi_thread() + .thread_name("cpu-heavy-background-pool-thread") + .worker_threads(num_cpus::get() as usize) + // ref: https://github.com/tokio-rs/tokio/issues/4941 + // consider uncommenting if seeing heavy task contention + // .disable_lifo_slot() + .on_thread_start(move || { + #[cfg(target_os = "linux")] + unsafe { + // Increase thread pool thread niceness, so they are lower priority + // than the foreground executor and don't interfere with I/O tasks + { + *libc::__errno_location() = 0; + if libc::nice(10) == -1 && *libc::__errno_location() != 0 { + let error = std::io::Error::last_os_error(); + log::error!("failed to set threadpool niceness: {}", error); + } + } + } + }) + .enable_all() + .build() + .unwrap_or_else(|e| panic!("cpu heavy runtime failed_to_initialize: {}", e)); + rt.block_on(async { + log::debug!("starting background cpu-heavy work"); + process_cpu_work().await; + }); + }) + .unwrap_or_else(|e| panic!("cpu heavy thread failed_to_initialize: {}", e)); + } + + #[cfg(not(target_arch = "wasm32"))] + async fn process_cpu_work() { + // we only use this channel for routing work, it should move pretty quick, it can be small + let (tx, mut rx) = tokio::sync::mpsc::channel(10); + // share the handle to the background channel globally + CPU_HEAVY_THREAD_POOL.set(tx).unwrap(); + + while let Some(work) = rx.recv().await { + tokio::task::spawn(work); + } + } + + // retrieve the sender to the background channel, and send the future over to it for execution + fn send_to_background_runtime(future: impl Future + Send + 'static) { + let tx = CPU_HEAVY_THREAD_POOL.get().expect( + "start up the secondary tokio runtime before sending to `CPU_HEAVY_THREAD_POOL`", + ); + + match tx.try_send(Box::pin(future)) { + Ok(_) => (), + Err(TrySendError::Closed(_)) => { + panic!("background cpu heavy runtime channel is closed") + } + Err(TrySendError::Full(msg)) => { + log::warn!( + "background cpu heavy runtime channel is full, task spawning loop delayed" + ); + let tx = tx.clone(); + Handle::current().spawn(async move { + tx.send(msg) + .await + .expect("background cpu heavy runtime channel is closed") + }); + } + } + } + + // This tower layer injects futures with a oneshot channel, and then sends them to the background runtime for processing. + // We don't use the Buffer service because that is intended to process sequentially on a single task, whereas we want to + // spawn a new task per call. + #[derive(Copy, Clone)] + pub struct BackgroundProcessorLayer {} + impl BackgroundProcessorLayer { + pub fn new() -> Self { + Self {} + } + } + impl Layer for BackgroundProcessorLayer { + type Service = BackgroundProcessor; + fn layer(&self, service: S) -> Self::Service { + BackgroundProcessor::new(service) + } + } + + impl std::fmt::Debug for BackgroundProcessorLayer { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("BackgroundProcessorLayer").finish() + } + } + + // This tower service injects futures with a oneshot channel, and then sends them to the background runtime for processing. + #[derive(Debug, Clone)] + pub struct BackgroundProcessor { + inner: S, + } + + impl BackgroundProcessor { + pub fn new(inner: S) -> Self { + BackgroundProcessor { inner } + } + } + + impl Service for BackgroundProcessor + where + S: Service, + S::Response: Send + 'static, + S::Error: Into + Send, + S::Future: Send + 'static, + { + type Response = S::Response; + + type Error = BoxError; + + type Future = BackgroundResponseFuture; + + fn poll_ready( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + match self.inner.poll_ready(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(r) => Poll::Ready(r.map_err(Into::into)), + } + } + + fn call(&mut self, req: Request) -> Self::Future { + let response = self.inner.call(req); + + // wrap our inner service's future with a future that writes to this oneshot channel + let (mut tx, rx) = tokio::sync::oneshot::channel(); + let future = async move { + select!( + _ = tx.closed() => { + // receiver already dropped, don't need to do anything + } + result = response.map_err(|err| Into::::into(err)) => { + // if this fails, the receiver already dropped, so we don't need to do anything + let _ = tx.send(result); + } + ) + }; + // send the wrapped future to the background + send_to_background_runtime(future); + + BackgroundResponseFuture::new(rx) + } + } + + // `BackgroundProcessor` response future + pin_project! { + #[derive(Debug)] + pub struct BackgroundResponseFuture { + #[pin] + rx: tokio::sync::oneshot::Receiver>, + } + } + + impl BackgroundResponseFuture { + pub(crate) fn new(rx: tokio::sync::oneshot::Receiver>) -> Self { + BackgroundResponseFuture { rx } + } + } + + impl Future for BackgroundResponseFuture + where + S: Send + 'static, + { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + + // now poll on the receiver end of the oneshot to get the result + match this.rx.poll(cx) { + Poll::Ready(v) => match v { + Ok(v) => Poll::Ready(v.map_err(Into::into)), + Err(err) => Poll::Ready(Err(Box::new(err) as BoxError)), + }, + Poll::Pending => Poll::Pending, + } + } + } +} + +// The [cfg(not(target_arch = "wasm32"))] above prevent building the tokio::main function +// for wasm32 target, because tokio isn't compatible with wasm32. +// If you aren't building for wasm32, you don't need that line. +// The two lines below avoid the "'main' function not found" error when building for wasm32 target. +#[cfg(any(target_arch = "wasm32"))] +fn main() {} diff --git a/rust/reqwest/examples/form.rs b/rust/reqwest/examples/form.rs new file mode 100644 index 0000000000..1595ee8895 --- /dev/null +++ b/rust/reqwest/examples/form.rs @@ -0,0 +1,23 @@ +// Short example of a POST request with form data. +// +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[cfg(not(target_arch = "wasm32"))] +#[tokio::main] +async fn main() { + let response = reqwest::Client::new() + .post("http://www.baidu.com") + .form(&[("one", "1")]) + .send() + .await + .expect("send"); + println!("Response status {}", response.status()); +} + +// The [cfg(not(target_arch = "wasm32"))] above prevent building the tokio::main function +// for wasm32 target, because tokio isn't compatible with wasm32. +// If you aren't building for wasm32, you don't need that line. +// The two lines below avoid the "'main' function not found" error when building for wasm32 target. +#[cfg(target_arch = "wasm32")] +fn main() {} diff --git a/rust/reqwest/examples/h3_simple.rs b/rust/reqwest/examples/h3_simple.rs new file mode 100644 index 0000000000..53a2379e54 --- /dev/null +++ b/rust/reqwest/examples/h3_simple.rs @@ -0,0 +1,44 @@ +#![deny(warnings)] + +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[cfg(feature = "http3")] +#[cfg(not(target_arch = "wasm32"))] +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + let client = reqwest::Client::builder().http3_prior_knowledge().build()?; + + // Some simple CLI args requirements... + let url = match std::env::args().nth(1) { + Some(url) => url, + None => { + println!("No CLI URL provided, using default."); + "https://hyper.rs".into() + } + }; + + eprintln!("Fetching {url:?}..."); + + let res = client + .get(url) + .version(http::Version::HTTP_3) + .send() + .await?; + + eprintln!("Response: {:?} {}", res.version(), res.status()); + eprintln!("Headers: {:#?}\n", res.headers()); + + let body = res.text().await?; + + println!("{body}"); + + Ok(()) +} + +// The [cfg(not(target_arch = "wasm32"))] above prevent building the tokio::main function +// for wasm32 target, because tokio isn't compatible with wasm32. +// If you aren't building for wasm32, you don't need that line. +// The two lines below avoid the "'main' function not found" error when building for wasm32 target. +#[cfg(any(target_arch = "wasm32", not(feature = "http3")))] +fn main() {} diff --git a/rust/reqwest/examples/json_dynamic.rs b/rust/reqwest/examples/json_dynamic.rs new file mode 100644 index 0000000000..a9e817aba5 --- /dev/null +++ b/rust/reqwest/examples/json_dynamic.rs @@ -0,0 +1,42 @@ +//! This example illustrates the way to send and receive arbitrary JSON. +//! +//! This is useful for some ad-hoc experiments and situations when you don't +//! really care about the structure of the JSON and just need to display it or +//! process it at runtime. + +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + let echo_json: serde_json::Value = reqwest::Client::new() + .post("https://jsonplaceholder.typicode.com/posts") + .json(&serde_json::json!({ + "title": "Reqwest.rs", + "body": "https://docs.rs/reqwest", + "userId": 1 + })) + .send() + .await? + .json() + .await?; + + println!("{echo_json:#?}"); + // Object( + // { + // "body": String( + // "https://docs.rs/reqwest" + // ), + // "id": Number( + // 101 + // ), + // "title": String( + // "Reqwest.rs" + // ), + // "userId": Number( + // 1 + // ) + // } + // ) + Ok(()) +} diff --git a/rust/reqwest/examples/json_typed.rs b/rust/reqwest/examples/json_typed.rs new file mode 100644 index 0000000000..49fe37052b --- /dev/null +++ b/rust/reqwest/examples/json_typed.rs @@ -0,0 +1,48 @@ +//! This example illustrates the way to send and receive statically typed JSON. +//! +//! In contrast to the arbitrary JSON example, this brings up the full power of +//! Rust compile-time type system guaranties though it requires a little bit +//! more code. + +// These require the `serde` dependency. +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +struct Post { + id: Option, + title: String, + body: String, + #[serde(rename = "userId")] + user_id: i32, +} + +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + let new_post = Post { + id: None, + title: "Reqwest.rs".into(), + body: "https://docs.rs/reqwest".into(), + user_id: 1, + }; + let new_post: Post = reqwest::Client::new() + .post("https://jsonplaceholder.typicode.com/posts") + .json(&new_post) + .send() + .await? + .json() + .await?; + + println!("{new_post:#?}"); + // Post { + // id: Some( + // 101 + // ), + // title: "Reqwest.rs", + // body: "https://docs.rs/reqwest", + // user_id: 1 + // } + Ok(()) +} diff --git a/rust/reqwest/examples/simple.rs b/rust/reqwest/examples/simple.rs new file mode 100644 index 0000000000..3920c5fcb5 --- /dev/null +++ b/rust/reqwest/examples/simple.rs @@ -0,0 +1,40 @@ +#![deny(warnings)] + +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[cfg(not(target_arch = "wasm32"))] +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + // Some simple CLI args requirements... + let url = if let Some(url) = std::env::args().nth(1) { + url + } else { + println!("No CLI URL provided, using default."); + "https://hyper.rs".into() + }; + + eprintln!("Fetching {url:?}..."); + + // reqwest::get() is a convenience function. + // + // In most cases, you should create/build a reqwest::Client and reuse + // it for all requests. + let res = reqwest::get(url).await?; + + eprintln!("Response: {:?} {}", res.version(), res.status()); + eprintln!("Headers: {:#?}\n", res.headers()); + + let body = res.text().await?; + + println!("{body}"); + + Ok(()) +} + +// The [cfg(not(target_arch = "wasm32"))] above prevent building the tokio::main function +// for wasm32 target, because tokio isn't compatible with wasm32. +// If you aren't building for wasm32, you don't need that line. +// The two lines below avoid the "'main' function not found" error when building for wasm32 target. +#[cfg(target_arch = "wasm32")] +fn main() {} diff --git a/rust/reqwest/examples/tor_socks.rs b/rust/reqwest/examples/tor_socks.rs new file mode 100644 index 0000000000..5196756fbf --- /dev/null +++ b/rust/reqwest/examples/tor_socks.rs @@ -0,0 +1,24 @@ +#![deny(warnings)] + +// This is using the `tokio` runtime. You'll need the following dependency: +// +// `tokio = { version = "1", features = ["full"] }` +#[tokio::main] +async fn main() -> Result<(), reqwest::Error> { + // Make sure you are running tor and this is your socks port + let proxy = reqwest::Proxy::all("socks5h://127.0.0.1:9050").expect("tor proxy should be there"); + let client = reqwest::Client::builder() + .proxy(proxy) + .build() + .expect("should be able to build reqwest client"); + + let res = client.get("https://check.torproject.org").send().await?; + println!("Status: {}", res.status()); + + let text = res.text().await?; + let is_tor = text.contains("Congratulations. This browser is configured to use Tor."); + println!("Is Tor: {is_tor}"); + assert!(is_tor); + + Ok(()) +} diff --git a/rust/reqwest/examples/wasm_github_fetch/.gitignore b/rust/reqwest/examples/wasm_github_fetch/.gitignore new file mode 100644 index 0000000000..db9e84f0cb --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/.gitignore @@ -0,0 +1,5 @@ +node_modules +pkg +target +Cargo.lock +*.swp diff --git a/rust/reqwest/examples/wasm_github_fetch/Cargo.toml b/rust/reqwest/examples/wasm_github_fetch/Cargo.toml new file mode 100644 index 0000000000..193370f22f --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "wasm" +version = "0.1.0" +authors = ["John Gallagher "] +edition = "2021" + +# Config mostly pulled from: https://github.com/rustwasm/wasm-bindgen/blob/master/examples/fetch/Cargo.toml + +[lib] +crate-type = ["cdylib"] + +[dependencies] +reqwest = {path = "../../"} +serde = { version = "1.0.101", features = ["derive"] } +serde_derive = "^1.0.59" +wasm-bindgen-futures = "0.4.1" +serde_json = "1.0.41" +wasm-bindgen = { version = "0.2.51", features = ["serde-serialize"] } diff --git a/rust/reqwest/examples/wasm_github_fetch/README.md b/rust/reqwest/examples/wasm_github_fetch/README.md new file mode 100644 index 0000000000..9a00393911 --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/README.md @@ -0,0 +1,15 @@ +## Example usage of Reqwest from WASM + +Install wasm-pack with + + npm install + +Then you can build the example locally with: + + + npm run serve + +and then visiting http://localhost:8080 in a browser should run the example! + + +This example is loosely based off of [this example](https://github.com/rustwasm/wasm-bindgen/blob/master/examples/fetch/src/lib.rs), an example usage of `fetch` from `wasm-bindgen`. \ No newline at end of file diff --git a/rust/reqwest/examples/wasm_github_fetch/index.js b/rust/reqwest/examples/wasm_github_fetch/index.js new file mode 100644 index 0000000000..27493dd306 --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/index.js @@ -0,0 +1,12 @@ +const rust = import('./pkg'); + +rust + .then(m => { + return m.run().then((data) => { + console.log(data); + + console.log("The latest commit to the wasm-bindgen %s branch is:", data.name); + console.log("%s, authored by %s <%s>", data.commit.sha, data.commit.commit.author.name, data.commit.commit.author.email); + }) + }) + .catch(console.error); \ No newline at end of file diff --git a/rust/reqwest/examples/wasm_github_fetch/osv-scanner.toml b/rust/reqwest/examples/wasm_github_fetch/osv-scanner.toml new file mode 100644 index 0000000000..ed8a263b2c --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/osv-scanner.toml @@ -0,0 +1,3 @@ +[[PackageOverrides]] +ecosystem = "npm" +ignore = true diff --git a/rust/reqwest/examples/wasm_github_fetch/package-lock.json b/rust/reqwest/examples/wasm_github_fetch/package-lock.json new file mode 100644 index 0000000000..2b43a2d40f --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/package-lock.json @@ -0,0 +1,6081 @@ +{ + "requires": true, + "lockfileVersion": 1, + "dependencies": { + "@types/events": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@types/events/-/events-3.0.0.tgz", + "integrity": "sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g==", + "dev": true + }, + "@types/glob": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/@types/glob/-/glob-7.1.1.tgz", + "integrity": "sha512-1Bh06cbWJUHMC97acuD6UMG29nMt0Aqz1vF3guLfG+kHHJhy3AyohZFFxYk2f7Q1SQIrNwvncxAE0N/9s70F2w==", + "dev": true, + "requires": { + "@types/events": "*", + "@types/minimatch": "*", + "@types/node": "*" + } + }, + "@types/minimatch": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/minimatch/-/minimatch-3.0.3.tgz", + "integrity": "sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA==", + "dev": true + }, + "@types/node": { + "version": "12.11.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-12.11.5.tgz", + "integrity": "sha512-LC8ALj/24PhByn39nr5jnTvpE7MujK8y7LQmV74kHYF5iQ0odCPkMH4IZNZw+cobKfSXqaC8GgegcbIsQpffdA==", + "dev": true + }, + "@wasm-tool/wasm-pack-plugin": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@wasm-tool/wasm-pack-plugin/-/wasm-pack-plugin-1.0.1.tgz", + "integrity": "sha512-QPGoweQl6kgQMLMLboVMwUZiJ/OBoia0VaaWr6YoFGGx7tj7aG/+4HNK5Yu8RQGNFFdVnz2SWjRaGpALoZOhiQ==", + "dev": true, + "requires": { + "chalk": "^2.4.1", + "command-exists": "^1.2.7", + "watchpack": "^1.6.0" + } + }, + "@webassemblyjs/ast": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.8.5.tgz", + "integrity": "sha512-aJMfngIZ65+t71C3y2nBBg5FFG0Okt9m0XEgWZ7Ywgn1oMAT8cNwx00Uv1cQyHtidq0Xn94R4TAywO+LCQ+ZAQ==", + "dev": true, + "requires": { + "@webassemblyjs/helper-module-context": "1.8.5", + "@webassemblyjs/helper-wasm-bytecode": "1.8.5", + "@webassemblyjs/wast-parser": "1.8.5" + } + }, + "@webassemblyjs/floating-point-hex-parser": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.8.5.tgz", + "integrity": "sha512-9p+79WHru1oqBh9ewP9zW95E3XAo+90oth7S5Re3eQnECGq59ly1Ri5tsIipKGpiStHsUYmY3zMLqtk3gTcOtQ==", + "dev": true + }, + "@webassemblyjs/helper-api-error": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.8.5.tgz", + "integrity": "sha512-Za/tnzsvnqdaSPOUXHyKJ2XI7PDX64kWtURyGiJJZKVEdFOsdKUCPTNEVFZq3zJ2R0G5wc2PZ5gvdTRFgm81zA==", + "dev": true + }, + "@webassemblyjs/helper-buffer": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.8.5.tgz", + "integrity": "sha512-Ri2R8nOS0U6G49Q86goFIPNgjyl6+oE1abW1pS84BuhP1Qcr5JqMwRFT3Ah3ADDDYGEgGs1iyb1DGX+kAi/c/Q==", + "dev": true + }, + "@webassemblyjs/helper-code-frame": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.8.5.tgz", + "integrity": "sha512-VQAadSubZIhNpH46IR3yWO4kZZjMxN1opDrzePLdVKAZ+DFjkGD/rf4v1jap744uPVU6yjL/smZbRIIJTOUnKQ==", + "dev": true, + "requires": { + "@webassemblyjs/wast-printer": "1.8.5" + } + }, + "@webassemblyjs/helper-fsm": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-fsm/-/helper-fsm-1.8.5.tgz", + "integrity": "sha512-kRuX/saORcg8se/ft6Q2UbRpZwP4y7YrWsLXPbbmtepKr22i8Z4O3V5QE9DbZK908dh5Xya4Un57SDIKwB9eow==", + "dev": true + }, + "@webassemblyjs/helper-module-context": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-module-context/-/helper-module-context-1.8.5.tgz", + "integrity": "sha512-/O1B236mN7UNEU4t9X7Pj38i4VoU8CcMHyy3l2cV/kIF4U5KoHXDVqcDuOs1ltkac90IM4vZdHc52t1x8Yfs3g==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "mamacro": "^0.0.3" + } + }, + "@webassemblyjs/helper-wasm-bytecode": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.8.5.tgz", + "integrity": "sha512-Cu4YMYG3Ddl72CbmpjU/wbP6SACcOPVbHN1dI4VJNJVgFwaKf1ppeFJrwydOG3NDHxVGuCfPlLZNyEdIYlQ6QQ==", + "dev": true + }, + "@webassemblyjs/helper-wasm-section": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.8.5.tgz", + "integrity": "sha512-VV083zwR+VTrIWWtgIUpqfvVdK4ff38loRmrdDBgBT8ADXYsEZ5mPQ4Nde90N3UYatHdYoDIFb7oHzMncI02tA==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-buffer": "1.8.5", + "@webassemblyjs/helper-wasm-bytecode": "1.8.5", + "@webassemblyjs/wasm-gen": "1.8.5" + } + }, + "@webassemblyjs/ieee754": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.8.5.tgz", + "integrity": "sha512-aaCvQYrvKbY/n6wKHb/ylAJr27GglahUO89CcGXMItrOBqRarUMxWLJgxm9PJNuKULwN5n1csT9bYoMeZOGF3g==", + "dev": true, + "requires": { + "@xtuc/ieee754": "^1.2.0" + } + }, + "@webassemblyjs/leb128": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.8.5.tgz", + "integrity": "sha512-plYUuUwleLIziknvlP8VpTgO4kqNaH57Y3JnNa6DLpu/sGcP6hbVdfdX5aHAV716pQBKrfuU26BJK29qY37J7A==", + "dev": true, + "requires": { + "@xtuc/long": "4.2.2" + } + }, + "@webassemblyjs/utf8": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.8.5.tgz", + "integrity": "sha512-U7zgftmQriw37tfD934UNInokz6yTmn29inT2cAetAsaU9YeVCveWEwhKL1Mg4yS7q//NGdzy79nlXh3bT8Kjw==", + "dev": true + }, + "@webassemblyjs/wasm-edit": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.8.5.tgz", + "integrity": "sha512-A41EMy8MWw5yvqj7MQzkDjU29K7UJq1VrX2vWLzfpRHt3ISftOXqrtojn7nlPsZ9Ijhp5NwuODuycSvfAO/26Q==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-buffer": "1.8.5", + "@webassemblyjs/helper-wasm-bytecode": "1.8.5", + "@webassemblyjs/helper-wasm-section": "1.8.5", + "@webassemblyjs/wasm-gen": "1.8.5", + "@webassemblyjs/wasm-opt": "1.8.5", + "@webassemblyjs/wasm-parser": "1.8.5", + "@webassemblyjs/wast-printer": "1.8.5" + } + }, + "@webassemblyjs/wasm-gen": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.8.5.tgz", + "integrity": "sha512-BCZBT0LURC0CXDzj5FXSc2FPTsxwp3nWcqXQdOZE4U7h7i8FqtFK5Egia6f9raQLpEKT1VL7zr4r3+QX6zArWg==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-wasm-bytecode": "1.8.5", + "@webassemblyjs/ieee754": "1.8.5", + "@webassemblyjs/leb128": "1.8.5", + "@webassemblyjs/utf8": "1.8.5" + } + }, + "@webassemblyjs/wasm-opt": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.8.5.tgz", + "integrity": "sha512-HKo2mO/Uh9A6ojzu7cjslGaHaUU14LdLbGEKqTR7PBKwT6LdPtLLh9fPY33rmr5wcOMrsWDbbdCHq4hQUdd37Q==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-buffer": "1.8.5", + "@webassemblyjs/wasm-gen": "1.8.5", + "@webassemblyjs/wasm-parser": "1.8.5" + } + }, + "@webassemblyjs/wasm-parser": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.8.5.tgz", + "integrity": "sha512-pi0SYE9T6tfcMkthwcgCpL0cM9nRYr6/6fjgDtL6q/ZqKHdMWvxitRi5JcZ7RI4SNJJYnYNaWy5UUrHQy998lw==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-api-error": "1.8.5", + "@webassemblyjs/helper-wasm-bytecode": "1.8.5", + "@webassemblyjs/ieee754": "1.8.5", + "@webassemblyjs/leb128": "1.8.5", + "@webassemblyjs/utf8": "1.8.5" + } + }, + "@webassemblyjs/wast-parser": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-parser/-/wast-parser-1.8.5.tgz", + "integrity": "sha512-daXC1FyKWHF1i11obK086QRlsMsY4+tIOKgBqI1lxAnkp9xe9YMcgOxm9kLe+ttjs5aWV2KKE1TWJCN57/Btsg==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/floating-point-hex-parser": "1.8.5", + "@webassemblyjs/helper-api-error": "1.8.5", + "@webassemblyjs/helper-code-frame": "1.8.5", + "@webassemblyjs/helper-fsm": "1.8.5", + "@xtuc/long": "4.2.2" + } + }, + "@webassemblyjs/wast-printer": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.8.5.tgz", + "integrity": "sha512-w0U0pD4EhlnvRyeJzBqaVSJAo9w/ce7/WPogeXLzGkO6hzhr4GnQIZ4W4uUt5b9ooAaXPtnXlj0gzsXEOUNYMg==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/wast-parser": "1.8.5", + "@xtuc/long": "4.2.2" + } + }, + "@xtuc/ieee754": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz", + "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==", + "dev": true + }, + "@xtuc/long": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz", + "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==", + "dev": true + }, + "accepts": { + "version": "1.3.7", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", + "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", + "dev": true, + "requires": { + "mime-types": "~2.1.24", + "negotiator": "0.6.2" + } + }, + "acorn": { + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.4.1.tgz", + "integrity": "sha512-ZVA9k326Nwrj3Cj9jlh3wGFutC2ZornPNARZwsNYqQYgN0EsV2d53w5RN/co65Ohn4sUAUtb1rSUAOD6XN9idA==", + "dev": true + }, + "ajv": { + "version": "6.10.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.10.2.tgz", + "integrity": "sha512-TXtUUEYHuaTEbLZWIKUr5pmBuhDLy+8KYtPYdcV8qC+pOZL+NKqYwvWSRrVXHn+ZmRRAu8vJTAznH7Oag6RVRw==", + "dev": true, + "requires": { + "fast-deep-equal": "^2.0.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "ajv-errors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-1.0.1.tgz", + "integrity": "sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==", + "dev": true + }, + "ajv-keywords": { + "version": "3.4.1", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.4.1.tgz", + "integrity": "sha512-RO1ibKvd27e6FEShVFfPALuHI3WjSVNeK5FIsmme/LYRNxjKuNj+Dt7bucLa6NdSv3JcVTyMlm9kGR84z1XpaQ==", + "dev": true + }, + "ansi-colors": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-3.2.4.tgz", + "integrity": "sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA==", + "dev": true + }, + "ansi-html": { + "version": "0.0.7", + "resolved": "https://registry.npmjs.org/ansi-html/-/ansi-html-0.0.7.tgz", + "integrity": "sha1-gTWEAhliqenm/QOflA0S9WynhZ4=", + "dev": true + }, + "ansi-regex": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz", + "integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=", + "dev": true + }, + "ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "requires": { + "color-convert": "^1.9.0" + } + }, + "anymatch": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz", + "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==", + "dev": true, + "requires": { + "micromatch": "^3.1.4", + "normalize-path": "^2.1.1" + }, + "dependencies": { + "normalize-path": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-2.1.1.tgz", + "integrity": "sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=", + "dev": true, + "requires": { + "remove-trailing-separator": "^1.0.1" + } + } + } + }, + "aproba": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-1.2.0.tgz", + "integrity": "sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw==", + "dev": true + }, + "arr-diff": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", + "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", + "dev": true + }, + "arr-flatten": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/arr-flatten/-/arr-flatten-1.1.0.tgz", + "integrity": "sha512-L3hKV5R/p5o81R7O02IGnwpDmkp6E982XhtbuwSe3O4qOtMMMtodicASA1Cny2U+aCXcNpml+m4dPsvsJ3jatg==", + "dev": true + }, + "arr-union": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz", + "integrity": "sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ=", + "dev": true + }, + "array-flatten": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-2.1.2.tgz", + "integrity": "sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==", + "dev": true + }, + "array-union": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-1.0.2.tgz", + "integrity": "sha1-mjRBDk9OPaI96jdb5b5w8kd47Dk=", + "dev": true, + "requires": { + "array-uniq": "^1.0.1" + } + }, + "array-uniq": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/array-uniq/-/array-uniq-1.0.3.tgz", + "integrity": "sha1-r2rId6Jcx/dOBYiUdThY39sk/bY=", + "dev": true + }, + "array-unique": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", + "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", + "dev": true + }, + "asn1.js": { + "version": "4.10.1", + "resolved": "https://registry.npmjs.org/asn1.js/-/asn1.js-4.10.1.tgz", + "integrity": "sha512-p32cOF5q0Zqs9uBiONKYLm6BClCoBCM5O9JfeUSlnQLBTxYdTK+pW+nXflm8UkKd2UYlEbYz5qEi0JuZR9ckSw==", + "dev": true, + "requires": { + "bn.js": "^4.0.0", + "inherits": "^2.0.1", + "minimalistic-assert": "^1.0.0" + } + }, + "assert": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/assert/-/assert-1.5.0.tgz", + "integrity": "sha512-EDsgawzwoun2CZkCgtxJbv392v4nbk9XDD06zI+kQYoBM/3RBWLlEyJARDOmhAAosBjWACEkKL6S+lIZtcAubA==", + "dev": true, + "requires": { + "object-assign": "^4.1.1", + "util": "0.10.3" + }, + "dependencies": { + "inherits": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.1.tgz", + "integrity": "sha1-sX0I0ya0Qj5Wjv9xn5GwscvfafE=", + "dev": true + }, + "util": { + "version": "0.10.3", + "resolved": "https://registry.npmjs.org/util/-/util-0.10.3.tgz", + "integrity": "sha1-evsa/lCAUkZInj23/g7TeTNqwPk=", + "dev": true, + "requires": { + "inherits": "2.0.1" + } + } + } + }, + "assign-symbols": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assign-symbols/-/assign-symbols-1.0.0.tgz", + "integrity": "sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=", + "dev": true + }, + "async": { + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", + "integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", + "dev": true, + "requires": { + "lodash": "^4.17.14" + } + }, + "async-each": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/async-each/-/async-each-1.0.3.tgz", + "integrity": "sha512-z/WhQ5FPySLdvREByI2vZiTWwCnF0moMJ1hK9YQwDTHKh6I7/uSckMetoRGb5UBZPC1z0jlw+n/XCgjeH7y1AQ==", + "dev": true + }, + "async-limiter": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/async-limiter/-/async-limiter-1.0.1.tgz", + "integrity": "sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==", + "dev": true + }, + "atob": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.2.tgz", + "integrity": "sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==", + "dev": true + }, + "balanced-match": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", + "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=", + "dev": true + }, + "base": { + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/base/-/base-0.11.2.tgz", + "integrity": "sha512-5T6P4xPgpp0YDFvSWwEZ4NoE3aM4QBQXDzmVbraCkFj8zHM+mba8SyqB5DbZWyR7mYHo6Y7BdQo3MoA4m0TeQg==", + "dev": true, + "requires": { + "cache-base": "^1.0.1", + "class-utils": "^0.3.5", + "component-emitter": "^1.2.1", + "define-property": "^1.0.0", + "isobject": "^3.0.1", + "mixin-deep": "^1.2.0", + "pascalcase": "^0.1.1" + }, + "dependencies": { + "define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", + "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", + "dev": true, + "requires": { + "is-descriptor": "^1.0.0" + } + }, + "is-accessor-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", + "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-data-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", + "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-descriptor": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", + "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", + "dev": true, + "requires": { + "is-accessor-descriptor": "^1.0.0", + "is-data-descriptor": "^1.0.0", + "kind-of": "^6.0.2" + } + } + } + }, + "base64-js": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.3.1.tgz", + "integrity": "sha512-mLQ4i2QO1ytvGWFWmcngKO//JXAQueZvwEKtjgQFM4jIK0kU+ytMfplL8j+n5mspOfjHwoAg+9yhb7BwAHm36g==", + "dev": true + }, + "batch": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz", + "integrity": "sha1-3DQxT05nkxgJP8dgJyUl+UvyXBY=", + "dev": true + }, + "big.js": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-3.2.0.tgz", + "integrity": "sha512-+hN/Zh2D08Mx65pZ/4g5bsmNiZUuChDiQfTUQ7qJr4/kuopCr88xZsAXv6mBoZEsUI4OuGHlX59qE94K2mMW8Q==", + "dev": true + }, + "binary-extensions": { + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-1.13.1.tgz", + "integrity": "sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw==", + "dev": true + }, + "bluebird": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.1.tgz", + "integrity": "sha512-DdmyoGCleJnkbp3nkbxTLJ18rjDsE4yCggEwKNXkeV123sPNfOCYeDoeuOY+F2FrSjO1YXcTU+dsy96KMy+gcg==", + "dev": true + }, + "bn.js": { + "version": "4.11.8", + "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.11.8.tgz", + "integrity": "sha512-ItfYfPLkWHUjckQCk8xC+LwxgK8NYcXywGigJgSwOP8Y2iyWT4f2vsZnoOXTTbo+o5yXmIUJ4gn5538SO5S3gA==", + "dev": true + }, + "body-parser": { + "version": "1.19.0", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", + "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", + "dev": true, + "requires": { + "bytes": "3.1.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.2", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "on-finished": "~2.3.0", + "qs": "6.7.0", + "raw-body": "2.4.0", + "type-is": "~1.6.17" + }, + "dependencies": { + "bytes": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", + "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==", + "dev": true + } + } + }, + "bonjour": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/bonjour/-/bonjour-3.5.0.tgz", + "integrity": "sha1-jokKGD2O6aI5OzhExpGkK897yfU=", + "dev": true, + "requires": { + "array-flatten": "^2.1.0", + "deep-equal": "^1.0.1", + "dns-equal": "^1.0.0", + "dns-txt": "^2.0.2", + "multicast-dns": "^6.0.1", + "multicast-dns-service-types": "^1.1.0" + } + }, + "boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", + "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", + "dev": true, + "requires": { + "arr-flatten": "^1.1.0", + "array-unique": "^0.3.2", + "extend-shallow": "^2.0.1", + "fill-range": "^4.0.0", + "isobject": "^3.0.1", + "repeat-element": "^1.1.2", + "snapdragon": "^0.8.1", + "snapdragon-node": "^2.0.1", + "split-string": "^3.0.2", + "to-regex": "^3.0.1" + }, + "dependencies": { + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } + } + }, + "brorand": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/brorand/-/brorand-1.1.0.tgz", + "integrity": "sha1-EsJe/kCkXjwyPrhnWgoM5XsiNx8=", + "dev": true + }, + "browserify-aes": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz", + "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==", + "dev": true, + "requires": { + "buffer-xor": "^1.0.3", + "cipher-base": "^1.0.0", + "create-hash": "^1.1.0", + "evp_bytestokey": "^1.0.3", + "inherits": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "browserify-cipher": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/browserify-cipher/-/browserify-cipher-1.0.1.tgz", + "integrity": "sha512-sPhkz0ARKbf4rRQt2hTpAHqn47X3llLkUGn+xEJzLjwY8LRs2p0v7ljvI5EyoRO/mexrNunNECisZs+gw2zz1w==", + "dev": true, + "requires": { + "browserify-aes": "^1.0.4", + "browserify-des": "^1.0.0", + "evp_bytestokey": "^1.0.0" + } + }, + "browserify-des": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/browserify-des/-/browserify-des-1.0.2.tgz", + "integrity": "sha512-BioO1xf3hFwz4kc6iBhI3ieDFompMhrMlnDFC4/0/vd5MokpuAc3R+LYbwTA9A5Yc9pq9UYPqffKpW2ObuwX5A==", + "dev": true, + "requires": { + "cipher-base": "^1.0.1", + "des.js": "^1.0.0", + "inherits": "^2.0.1", + "safe-buffer": "^5.1.2" + } + }, + "browserify-rsa": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz", + "integrity": "sha1-IeCr+vbyApzy+vsTNWenAdQTVSQ=", + "dev": true, + "requires": { + "bn.js": "^4.1.0", + "randombytes": "^2.0.1" + } + }, + "browserify-sign": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/browserify-sign/-/browserify-sign-4.0.4.tgz", + "integrity": "sha1-qk62jl17ZYuqa/alfmMMvXqT0pg=", + "dev": true, + "requires": { + "bn.js": "^4.1.1", + "browserify-rsa": "^4.0.0", + "create-hash": "^1.1.0", + "create-hmac": "^1.1.2", + "elliptic": "^6.0.0", + "inherits": "^2.0.1", + "parse-asn1": "^5.0.0" + } + }, + "browserify-zlib": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.2.0.tgz", + "integrity": "sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==", + "dev": true, + "requires": { + "pako": "~1.0.5" + } + }, + "buffer": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", + "integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=", + "dev": true, + "requires": { + "base64-js": "^1.0.2", + "ieee754": "^1.1.4", + "isarray": "^1.0.0" + } + }, + "buffer-from": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", + "dev": true + }, + "buffer-indexof": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-indexof/-/buffer-indexof-1.1.1.tgz", + "integrity": "sha512-4/rOEg86jivtPTeOUUT61jJO1Ya1TrR/OkqCSZDyq84WJh3LuuiphBYJN+fm5xufIk4XAFcEwte/8WzC8If/1g==", + "dev": true + }, + "buffer-xor": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/buffer-xor/-/buffer-xor-1.0.3.tgz", + "integrity": "sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=", + "dev": true + }, + "builtin-status-codes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz", + "integrity": "sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=", + "dev": true + }, + "bytes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", + "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=", + "dev": true + }, + "cacache": { + "version": "12.0.3", + "resolved": "https://registry.npmjs.org/cacache/-/cacache-12.0.3.tgz", + "integrity": "sha512-kqdmfXEGFepesTuROHMs3MpFLWrPkSSpRqOw80RCflZXy/khxaArvFrQ7uJxSUduzAufc6G0g1VUCOZXxWavPw==", + "dev": true, + "requires": { + "bluebird": "^3.5.5", + "chownr": "^1.1.1", + "figgy-pudding": "^3.5.1", + "glob": "^7.1.4", + "graceful-fs": "^4.1.15", + "infer-owner": "^1.0.3", + "lru-cache": "^5.1.1", + "mississippi": "^3.0.0", + "mkdirp": "^0.5.1", + "move-concurrently": "^1.0.1", + "promise-inflight": "^1.0.1", + "rimraf": "^2.6.3", + "ssri": "^6.0.1", + "unique-filename": "^1.1.1", + "y18n": "^4.0.0" + } + }, + "cache-base": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/cache-base/-/cache-base-1.0.1.tgz", + "integrity": "sha512-AKcdTnFSWATd5/GCPRxr2ChwIJ85CeyrEyjRHlKxQ56d4XJMGym0uAiKn0xbLOGOl3+yRpOTi484dVCEc5AUzQ==", + "dev": true, + "requires": { + "collection-visit": "^1.0.0", + "component-emitter": "^1.2.1", + "get-value": "^2.0.6", + "has-value": "^1.0.0", + "isobject": "^3.0.1", + "set-value": "^2.0.0", + "to-object-path": "^0.3.0", + "union-value": "^1.0.0", + "unset-value": "^1.0.0" + } + }, + "camel-case": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/camel-case/-/camel-case-3.0.0.tgz", + "integrity": "sha1-yjw2iKTpzzpM2nd9xNy8cTJJz3M=", + "dev": true, + "requires": { + "no-case": "^2.2.0", + "upper-case": "^1.1.1" + } + }, + "camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "dev": true + }, + "chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + } + }, + "chokidar": { + "version": "2.1.8", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.1.8.tgz", + "integrity": "sha512-ZmZUazfOzf0Nve7duiCKD23PFSCs4JPoYyccjUFF3aQkQadqBhfzhjkwBH2mNOG9cTBwhamM37EIsIkZw3nRgg==", + "dev": true, + "requires": { + "anymatch": "^2.0.0", + "async-each": "^1.0.1", + "braces": "^2.3.2", + "fsevents": "^1.2.7", + "glob-parent": "^3.1.0", + "inherits": "^2.0.3", + "is-binary-path": "^1.0.0", + "is-glob": "^4.0.0", + "normalize-path": "^3.0.0", + "path-is-absolute": "^1.0.0", + "readdirp": "^2.2.1", + "upath": "^1.1.1" + } + }, + "chownr": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.3.tgz", + "integrity": "sha512-i70fVHhmV3DtTl6nqvZOnIjbY0Pe4kAUjwHj8z0zAdgBtYrJyYwLKCCuRBQ5ppkyL0AkN7HKRnETdmdp1zqNXw==", + "dev": true + }, + "chrome-trace-event": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.2.tgz", + "integrity": "sha512-9e/zx1jw7B4CO+c/RXoCsfg/x1AfUBioy4owYH0bJprEYAx5hRFLRhWBqHAG57D0ZM4H7vxbP7bPe0VwhQRYDQ==", + "dev": true, + "requires": { + "tslib": "^1.9.0" + } + }, + "cipher-base": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/cipher-base/-/cipher-base-1.0.4.tgz", + "integrity": "sha512-Kkht5ye6ZGmwv40uUDZztayT2ThLQGfnj/T71N/XzeZeo3nf8foyW7zGTsPYkEya3m5f3cAypH+qe7YOrM1U2Q==", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "class-utils": { + "version": "0.3.6", + "resolved": "https://registry.npmjs.org/class-utils/-/class-utils-0.3.6.tgz", + "integrity": "sha512-qOhPa/Fj7s6TY8H8esGu5QNpMMQxz79h+urzrNYN6mn+9BnxlDGf5QZ+XeCDsxSjPqsSR56XOZOJmpeurnLMeg==", + "dev": true, + "requires": { + "arr-union": "^3.1.0", + "define-property": "^0.2.5", + "isobject": "^3.0.0", + "static-extend": "^0.1.1" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + } + } + }, + "clean-css": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/clean-css/-/clean-css-4.2.1.tgz", + "integrity": "sha512-4ZxI6dy4lrY6FHzfiy1aEOXgu4LIsW2MhwG0VBKdcoGoH/XLFgaHSdLTGr4O8Be6A8r3MOphEiI8Gc1n0ecf3g==", + "dev": true, + "requires": { + "source-map": "~0.6.0" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "cliui": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-5.0.0.tgz", + "integrity": "sha512-PYeGSEmmHM6zvoef2w8TPzlrnNpXIjTipYK780YswmIP9vjxmd6Y2a3CB2Ks6/AU8NHjZugXvo8w3oWM2qnwXA==", + "dev": true, + "requires": { + "string-width": "^3.1.0", + "strip-ansi": "^5.2.0", + "wrap-ansi": "^5.1.0" + }, + "dependencies": { + "ansi-regex": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.0.tgz", + "integrity": "sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==", + "dev": true + }, + "strip-ansi": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.2.0.tgz", + "integrity": "sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==", + "dev": true, + "requires": { + "ansi-regex": "^4.1.0" + } + } + } + }, + "code-point-at": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz", + "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=", + "dev": true + }, + "collection-visit": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/collection-visit/-/collection-visit-1.0.0.tgz", + "integrity": "sha1-S8A3PBZLwykbTTaMgpzxqApZ3KA=", + "dev": true, + "requires": { + "map-visit": "^1.0.0", + "object-visit": "^1.0.0" + } + }, + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "requires": { + "color-name": "1.1.3" + } + }, + "color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", + "dev": true + }, + "command-exists": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.8.tgz", + "integrity": "sha512-PM54PkseWbiiD/mMsbvW351/u+dafwTJ0ye2qB60G1aGQP9j3xK2gmMDc+R34L3nDtx4qMCitXT75mkbkGJDLw==", + "dev": true + }, + "commander": { + "version": "2.17.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.17.1.tgz", + "integrity": "sha512-wPMUt6FnH2yzG95SA6mzjQOEKUU3aLaDEmzs1ti+1E9h+CsrZghRlqEM/EJ4KscsQVG8uNN4uVreUeT8+drlgg==", + "dev": true + }, + "commondir": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz", + "integrity": "sha1-3dgA2gxmEnOTzKWVDqloo6rxJTs=", + "dev": true + }, + "component-emitter": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/component-emitter/-/component-emitter-1.3.0.tgz", + "integrity": "sha512-Rd3se6QB+sO1TwqZjscQrurpEPIfO0/yYnSin6Q/rD3mOutHvUrCAhJub3r90uNb+SESBuE0QYoB90YdfatsRg==", + "dev": true + }, + "compressible": { + "version": "2.0.17", + "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.17.tgz", + "integrity": "sha512-BGHeLCK1GV7j1bSmQQAi26X+GgWcTjLr/0tzSvMCl3LH1w1IJ4PFSPoV5316b30cneTziC+B1a+3OjoSUcQYmw==", + "dev": true, + "requires": { + "mime-db": ">= 1.40.0 < 2" + } + }, + "compression": { + "version": "1.7.4", + "resolved": "https://registry.npmjs.org/compression/-/compression-1.7.4.tgz", + "integrity": "sha512-jaSIDzP9pZVS4ZfQ+TzvtiWhdpFhE2RDHz8QJkpX9SIpLq88VueF5jJw6t+6CUQcAoA6t+x89MLrWAqpfDE8iQ==", + "dev": true, + "requires": { + "accepts": "~1.3.5", + "bytes": "3.0.0", + "compressible": "~2.0.16", + "debug": "2.6.9", + "on-headers": "~1.0.2", + "safe-buffer": "5.1.2", + "vary": "~1.1.2" + } + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", + "dev": true + }, + "concat-stream": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", + "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^2.2.2", + "typedarray": "^0.0.6" + } + }, + "connect-history-api-fallback": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/connect-history-api-fallback/-/connect-history-api-fallback-1.6.0.tgz", + "integrity": "sha512-e54B99q/OUoH64zYYRf3HBP5z24G38h5D3qXu23JGRoigpX5Ss4r9ZnDk3g0Z8uQC2x2lPaJ+UlWBc1ZWBWdLg==", + "dev": true + }, + "console-browserify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/console-browserify/-/console-browserify-1.1.0.tgz", + "integrity": "sha1-8CQcRXMKn8YyOyBtvzjtx0HQuxA=", + "dev": true, + "requires": { + "date-now": "^0.1.4" + } + }, + "constants-browserify": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/constants-browserify/-/constants-browserify-1.0.0.tgz", + "integrity": "sha1-wguW2MYXdIqvHBYCF2DNJ/y4y3U=", + "dev": true + }, + "content-disposition": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", + "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", + "dev": true, + "requires": { + "safe-buffer": "5.1.2" + } + }, + "content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", + "dev": true + }, + "cookie": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", + "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==", + "dev": true + }, + "cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=", + "dev": true + }, + "copy-concurrently": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/copy-concurrently/-/copy-concurrently-1.0.5.tgz", + "integrity": "sha512-f2domd9fsVDFtaFcbaRZuYXwtdmnzqbADSwhSWYxYB/Q8zsdUUFMXVRwXGDMWmbEzAn1kdRrtI1T/KTFOL4X2A==", + "dev": true, + "requires": { + "aproba": "^1.1.1", + "fs-write-stream-atomic": "^1.0.8", + "iferr": "^0.1.5", + "mkdirp": "^0.5.1", + "rimraf": "^2.5.4", + "run-queue": "^1.0.0" + } + }, + "copy-descriptor": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/copy-descriptor/-/copy-descriptor-0.1.1.tgz", + "integrity": "sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=", + "dev": true + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true + }, + "create-ecdh": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.3.tgz", + "integrity": "sha512-GbEHQPMOswGpKXM9kCWVrremUcBmjteUaQ01T9rkKCPDXfUHX0IoP9LpHYo2NPFampa4e+/pFDc3jQdxrxQLaw==", + "dev": true, + "requires": { + "bn.js": "^4.1.0", + "elliptic": "^6.0.0" + } + }, + "create-hash": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz", + "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==", + "dev": true, + "requires": { + "cipher-base": "^1.0.1", + "inherits": "^2.0.1", + "md5.js": "^1.3.4", + "ripemd160": "^2.0.1", + "sha.js": "^2.4.0" + } + }, + "create-hmac": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz", + "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==", + "dev": true, + "requires": { + "cipher-base": "^1.0.3", + "create-hash": "^1.1.0", + "inherits": "^2.0.1", + "ripemd160": "^2.0.0", + "safe-buffer": "^5.0.1", + "sha.js": "^2.4.8" + } + }, + "cross-spawn": { + "version": "6.0.5", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", + "integrity": "sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==", + "dev": true, + "requires": { + "nice-try": "^1.0.4", + "path-key": "^2.0.1", + "semver": "^5.5.0", + "shebang-command": "^1.2.0", + "which": "^1.2.9" + } + }, + "crypto-browserify": { + "version": "3.12.0", + "resolved": "https://registry.npmjs.org/crypto-browserify/-/crypto-browserify-3.12.0.tgz", + "integrity": "sha512-fz4spIh+znjO2VjL+IdhEpRJ3YN6sMzITSBijk6FK2UvTqruSQW+/cCZTSNsMiZNvUeq0CqurF+dAbyiGOY6Wg==", + "dev": true, + "requires": { + "browserify-cipher": "^1.0.0", + "browserify-sign": "^4.0.0", + "create-ecdh": "^4.0.0", + "create-hash": "^1.1.0", + "create-hmac": "^1.1.0", + "diffie-hellman": "^5.0.0", + "inherits": "^2.0.1", + "pbkdf2": "^3.0.3", + "public-encrypt": "^4.0.0", + "randombytes": "^2.0.0", + "randomfill": "^1.0.3" + } + }, + "css-select": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", + "dev": true, + "requires": { + "boolbase": "~1.0.0", + "css-what": "2.1", + "domutils": "1.5.1", + "nth-check": "~1.0.1" + } + }, + "css-what": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", + "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==", + "dev": true + }, + "cyclist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/cyclist/-/cyclist-1.0.1.tgz", + "integrity": "sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk=", + "dev": true + }, + "date-now": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/date-now/-/date-now-0.1.4.tgz", + "integrity": "sha1-6vQ5/U1ISK105cx9vvIAZyueNFs=", + "dev": true + }, + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "requires": { + "ms": "2.0.0" + } + }, + "decamelize": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=", + "dev": true + }, + "decode-uri-component": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/decode-uri-component/-/decode-uri-component-0.2.0.tgz", + "integrity": "sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=", + "dev": true + }, + "deep-equal": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-1.1.0.tgz", + "integrity": "sha512-ZbfWJq/wN1Z273o7mUSjILYqehAktR2NVoSrOukDkU9kg2v/Uv89yU4Cvz8seJeAmtN5oqiefKq8FPuXOboqLw==", + "dev": true, + "requires": { + "is-arguments": "^1.0.4", + "is-date-object": "^1.0.1", + "is-regex": "^1.0.4", + "object-is": "^1.0.1", + "object-keys": "^1.1.1", + "regexp.prototype.flags": "^1.2.0" + } + }, + "default-gateway": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/default-gateway/-/default-gateway-4.2.0.tgz", + "integrity": "sha512-h6sMrVB1VMWVrW13mSc6ia/DwYYw5MN6+exNu1OaJeFac5aSAvwM7lZ0NVfTABuSkQelr4h5oebg3KB1XPdjgA==", + "dev": true, + "requires": { + "execa": "^1.0.0", + "ip-regex": "^2.1.0" + } + }, + "define-properties": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", + "dev": true, + "requires": { + "object-keys": "^1.0.12" + } + }, + "define-property": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-2.0.2.tgz", + "integrity": "sha512-jwK2UV4cnPpbcG7+VRARKTZPUWowwXA8bzH5NP6ud0oeAxyYPuGZUAC7hMugpCdz4BeSZl2Dl9k66CHJ/46ZYQ==", + "dev": true, + "requires": { + "is-descriptor": "^1.0.2", + "isobject": "^3.0.1" + }, + "dependencies": { + "is-accessor-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", + "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-data-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", + "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-descriptor": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", + "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", + "dev": true, + "requires": { + "is-accessor-descriptor": "^1.0.0", + "is-data-descriptor": "^1.0.0", + "kind-of": "^6.0.2" + } + } + } + }, + "del": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/del/-/del-4.1.1.tgz", + "integrity": "sha512-QwGuEUouP2kVwQenAsOof5Fv8K9t3D8Ca8NxcXKrIpEHjTXK5J2nXLdP+ALI1cgv8wj7KuwBhTwBkOZSJKM5XQ==", + "dev": true, + "requires": { + "@types/glob": "^7.1.1", + "globby": "^6.1.0", + "is-path-cwd": "^2.0.0", + "is-path-in-cwd": "^2.0.0", + "p-map": "^2.0.0", + "pify": "^4.0.1", + "rimraf": "^2.6.3" + } + }, + "depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=", + "dev": true + }, + "des.js": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/des.js/-/des.js-1.0.0.tgz", + "integrity": "sha1-wHTS4qpqipoH29YfmhXCzYPsjsw=", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "minimalistic-assert": "^1.0.0" + } + }, + "destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=", + "dev": true + }, + "detect-file": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/detect-file/-/detect-file-1.0.0.tgz", + "integrity": "sha1-8NZtA2cqglyxtzvbP+YjEMjlUrc=", + "dev": true + }, + "detect-node": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.0.4.tgz", + "integrity": "sha512-ZIzRpLJrOj7jjP2miAtgqIfmzbxa4ZOr5jJc601zklsfEx9oTzmmj2nVpIPRpNlRTIh8lc1kyViIY7BWSGNmKw==", + "dev": true + }, + "diffie-hellman": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", + "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==", + "dev": true, + "requires": { + "bn.js": "^4.1.0", + "miller-rabin": "^4.0.0", + "randombytes": "^2.0.0" + } + }, + "dns-equal": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/dns-equal/-/dns-equal-1.0.0.tgz", + "integrity": "sha1-s55/HabrCnW6nBcySzR1PEfgZU0=", + "dev": true + }, + "dns-packet": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-1.3.1.tgz", + "integrity": "sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg==", + "dev": true, + "requires": { + "ip": "^1.1.0", + "safe-buffer": "^5.0.1" + } + }, + "dns-txt": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/dns-txt/-/dns-txt-2.0.2.tgz", + "integrity": "sha1-uR2Ab10nGI5Ks+fRB9iBocxGQrY=", + "dev": true, + "requires": { + "buffer-indexof": "^1.0.0" + } + }, + "dom-converter": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/dom-converter/-/dom-converter-0.2.0.tgz", + "integrity": "sha512-gd3ypIPfOMr9h5jIKq8E3sHOTCjeirnl0WK5ZdS1AW0Odt0b1PaWaHdJ4Qk4klv+YB9aJBS7mESXjFoDQPu6DA==", + "dev": true, + "requires": { + "utila": "~0.4" + } + }, + "dom-serializer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.2.1.tgz", + "integrity": "sha512-sK3ujri04WyjwQXVoK4PU3y8ula1stq10GJZpqHIUgoGZdsGzAGu65BnU3d08aTVSvO7mGPZUc0wTEDL+qGE0Q==", + "dev": true, + "requires": { + "domelementtype": "^2.0.1", + "entities": "^2.0.0" + }, + "dependencies": { + "domelementtype": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.0.1.tgz", + "integrity": "sha512-5HOHUDsYZWV8FGWN0Njbr/Rn7f/eWSQi1v7+HsUVwXgn8nWWlL64zKDkS0n8ZmQ3mlWOMuXOnR+7Nx/5tMO5AQ==", + "dev": true + } + } + }, + "domain-browser": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/domain-browser/-/domain-browser-1.2.0.tgz", + "integrity": "sha512-jnjyiM6eRyZl2H+W8Q/zLMA481hzi0eszAaBUzIVnmYVDBbnLxVNnfu1HgEBvCbL+71FrxMl3E6lpKH7Ge3OXA==", + "dev": true + }, + "domelementtype": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.1.tgz", + "integrity": "sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w==", + "dev": true + }, + "domhandler": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", + "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", + "dev": true, + "requires": { + "domelementtype": "1" + } + }, + "domutils": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", + "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", + "dev": true, + "requires": { + "dom-serializer": "0", + "domelementtype": "1" + } + }, + "duplexify": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.7.1.tgz", + "integrity": "sha512-07z8uv2wMyS51kKhD1KsdXJg5WQ6t93RneqRxUHnskXVtlYYkLqM0gqStQZ3pj073g687jPCHrqNfCzawLYh5g==", + "dev": true, + "requires": { + "end-of-stream": "^1.0.0", + "inherits": "^2.0.1", + "readable-stream": "^2.0.0", + "stream-shift": "^1.0.0" + } + }, + "ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=", + "dev": true + }, + "elliptic": { + "version": "6.5.4", + "resolved": "https://registry.npmjs.org/elliptic/-/elliptic-6.5.4.tgz", + "integrity": "sha512-iLhC6ULemrljPZb+QutR5TQGB+pdW6KGD5RSegS+8sorOZT+rdQFbsQFJgvN3eRqNALqJer4oQ16YvJHlU8hzQ==", + "dev": true, + "requires": { + "bn.js": "^4.11.9", + "brorand": "^1.1.0", + "hash.js": "^1.0.0", + "hmac-drbg": "^1.0.1", + "inherits": "^2.0.4", + "minimalistic-assert": "^1.0.1", + "minimalistic-crypto-utils": "^1.0.1" + }, + "dependencies": { + "bn.js": { + "version": "4.12.0", + "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.0.tgz", + "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==", + "dev": true + } + } + }, + "emoji-regex": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-7.0.3.tgz", + "integrity": "sha512-CwBLREIQ7LvYFB0WyRvwhq5N5qPhc6PMjD6bYggFlI5YyDgl+0vxq5VHbMOFqLg7hfWzmu8T5Z1QofhmTIhItA==", + "dev": true + }, + "emojis-list": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-2.1.0.tgz", + "integrity": "sha1-TapNnbAPmBmIDHn6RXrlsJof04k=", + "dev": true + }, + "encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=", + "dev": true + }, + "end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "dev": true, + "requires": { + "once": "^1.4.0" + } + }, + "enhanced-resolve": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-4.1.1.tgz", + "integrity": "sha512-98p2zE+rL7/g/DzMHMTF4zZlCgeVdJ7yr6xzEpJRYwFYrGi9ANdn5DnJURg6RpBkyk60XYDnWIv51VfIhfNGuA==", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "memory-fs": "^0.5.0", + "tapable": "^1.0.0" + }, + "dependencies": { + "memory-fs": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/memory-fs/-/memory-fs-0.5.0.tgz", + "integrity": "sha512-jA0rdU5KoQMC0e6ppoNRtpp6vjFq6+NY7r8hywnC7V+1Xj/MtHwGIbB1QaK/dunyjWteJzmkpd7ooeWg10T7GA==", + "dev": true, + "requires": { + "errno": "^0.1.3", + "readable-stream": "^2.0.1" + } + } + } + }, + "entities": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.0.0.tgz", + "integrity": "sha512-D9f7V0JSRwIxlRI2mjMqufDrRDnx8p+eEOz7aUM9SuvF8gsBzra0/6tbjl1m8eQHrZlYj6PxqE00hZ1SAIKPLw==", + "dev": true + }, + "errno": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/errno/-/errno-0.1.7.tgz", + "integrity": "sha512-MfrRBDWzIWifgq6tJj60gkAwtLNb6sQPlcFrSOflcP1aFmmruKQ2wRnze/8V6kgyz7H3FF8Npzv78mZ7XLLflg==", + "dev": true, + "requires": { + "prr": "~1.0.1" + } + }, + "es-abstract": { + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.16.0.tgz", + "integrity": "sha512-xdQnfykZ9JMEiasTAJZJdMWCQ1Vm00NBw79/AWi7ELfZuuPCSOMDZbT9mkOfSctVtfhb+sAAzrm+j//GjjLHLg==", + "dev": true, + "requires": { + "es-to-primitive": "^1.2.0", + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.0", + "is-callable": "^1.1.4", + "is-regex": "^1.0.4", + "object-inspect": "^1.6.0", + "object-keys": "^1.1.1", + "string.prototype.trimleft": "^2.1.0", + "string.prototype.trimright": "^2.1.0" + } + }, + "es-to-primitive": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.0.tgz", + "integrity": "sha512-qZryBOJjV//LaxLTV6UC//WewneB3LcXOL9NP++ozKVXsIIIpm/2c13UDiD9Jp2eThsecw9m3jPqDwTyobcdbg==", + "dev": true, + "requires": { + "is-callable": "^1.1.4", + "is-date-object": "^1.0.1", + "is-symbol": "^1.0.2" + } + }, + "escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=", + "dev": true + }, + "escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", + "dev": true + }, + "eslint-scope": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-4.0.3.tgz", + "integrity": "sha512-p7VutNr1O/QrxysMo3E45FjYDTeXBy0iTltPFNSqKAIfjDSXC+4dj+qfyuD8bfAXrW/y6lW3O76VaYNPKfpKrg==", + "dev": true, + "requires": { + "esrecurse": "^4.1.0", + "estraverse": "^4.1.1" + } + }, + "esrecurse": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.2.1.tgz", + "integrity": "sha512-64RBB++fIOAXPw3P9cy89qfMlvZEXZkqqJkjqqXIvzP5ezRZjW+lPWjw35UX/3EhUPFYbg5ER4JYgDw4007/DQ==", + "dev": true, + "requires": { + "estraverse": "^4.1.0" + } + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=", + "dev": true + }, + "eventemitter3": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.0.tgz", + "integrity": "sha512-qerSRB0p+UDEssxTtm6EDKcE7W4OaoisfIMl4CngyEhjpYglocpNg6UEqCvemdGhosAsg4sO2dXJOdyBifPGCg==", + "dev": true + }, + "events": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.0.0.tgz", + "integrity": "sha512-Dc381HFWJzEOhQ+d8pkNon++bk9h6cdAoAj4iE6Q4y6xgTzySWXlKn05/TVNpjnfRqi/X0EpJEJohPjNI3zpVA==", + "dev": true + }, + "eventsource": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-1.0.7.tgz", + "integrity": "sha512-4Ln17+vVT0k8aWq+t/bF5arcS3EpT9gYtW66EPacdj/mAFevznsnyoHLPy2BA8gbIQeIHoPsvwmfBftfcG//BQ==", + "dev": true, + "requires": { + "original": "^1.0.0" + } + }, + "evp_bytestokey": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz", + "integrity": "sha512-/f2Go4TognH/KvCISP7OUsHn85hT9nUkxxA9BEWxFn+Oj9o8ZNLm/40hdlgSLyuOimsrTKLUMEorQexp/aPQeA==", + "dev": true, + "requires": { + "md5.js": "^1.3.4", + "safe-buffer": "^5.1.1" + } + }, + "execa": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/execa/-/execa-1.0.0.tgz", + "integrity": "sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==", + "dev": true, + "requires": { + "cross-spawn": "^6.0.0", + "get-stream": "^4.0.0", + "is-stream": "^1.1.0", + "npm-run-path": "^2.0.0", + "p-finally": "^1.0.0", + "signal-exit": "^3.0.0", + "strip-eof": "^1.0.0" + } + }, + "expand-brackets": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", + "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", + "dev": true, + "requires": { + "debug": "^2.3.3", + "define-property": "^0.2.5", + "extend-shallow": "^2.0.1", + "posix-character-classes": "^0.1.0", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.1" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + }, + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } + } + }, + "expand-tilde": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/expand-tilde/-/expand-tilde-2.0.2.tgz", + "integrity": "sha1-l+gBqgUt8CRU3kawK/YhZCzchQI=", + "dev": true, + "requires": { + "homedir-polyfill": "^1.0.1" + } + }, + "express": { + "version": "4.17.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", + "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", + "dev": true, + "requires": { + "accepts": "~1.3.7", + "array-flatten": "1.1.1", + "body-parser": "1.19.0", + "content-disposition": "0.5.3", + "content-type": "~1.0.4", + "cookie": "0.4.0", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.2", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.1.2", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.5", + "qs": "6.7.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.1.2", + "send": "0.17.1", + "serve-static": "1.14.1", + "setprototypeof": "1.1.1", + "statuses": "~1.5.0", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "dependencies": { + "array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=", + "dev": true + } + } + }, + "extend-shallow": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-3.0.2.tgz", + "integrity": "sha1-Jqcarwc7OfshJxcnRhMcJwQCjbg=", + "dev": true, + "requires": { + "assign-symbols": "^1.0.0", + "is-extendable": "^1.0.1" + }, + "dependencies": { + "is-extendable": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-1.0.1.tgz", + "integrity": "sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==", + "dev": true, + "requires": { + "is-plain-object": "^2.0.4" + } + } + } + }, + "extglob": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", + "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", + "dev": true, + "requires": { + "array-unique": "^0.3.2", + "define-property": "^1.0.0", + "expand-brackets": "^2.1.4", + "extend-shallow": "^2.0.1", + "fragment-cache": "^0.2.1", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.1" + }, + "dependencies": { + "define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", + "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", + "dev": true, + "requires": { + "is-descriptor": "^1.0.0" + } + }, + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + }, + "is-accessor-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", + "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-data-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", + "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-descriptor": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", + "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", + "dev": true, + "requires": { + "is-accessor-descriptor": "^1.0.0", + "is-data-descriptor": "^1.0.0", + "kind-of": "^6.0.2" + } + } + } + }, + "fast-deep-equal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", + "dev": true + }, + "fast-json-stable-stringify": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", + "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=", + "dev": true + }, + "faye-websocket": { + "version": "0.10.0", + "resolved": "https://registry.npmjs.org/faye-websocket/-/faye-websocket-0.10.0.tgz", + "integrity": "sha1-TkkvjQTftviQA1B/btvy1QHnxvQ=", + "dev": true, + "requires": { + "websocket-driver": ">=0.5.1" + } + }, + "figgy-pudding": { + "version": "3.5.1", + "resolved": "https://registry.npmjs.org/figgy-pudding/-/figgy-pudding-3.5.1.tgz", + "integrity": "sha512-vNKxJHTEKNThjfrdJwHc7brvM6eVevuO5nTj6ez8ZQ1qbXTvGthucRF7S4vf2cr71QVnT70V34v0S1DyQsti0w==", + "dev": true + }, + "fill-range": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", + "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", + "dev": true, + "requires": { + "extend-shallow": "^2.0.1", + "is-number": "^3.0.0", + "repeat-string": "^1.6.1", + "to-regex-range": "^2.1.0" + }, + "dependencies": { + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } + } + }, + "finalhandler": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", + "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", + "dev": true, + "requires": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "statuses": "~1.5.0", + "unpipe": "~1.0.0" + } + }, + "find-cache-dir": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-cache-dir/-/find-cache-dir-2.1.0.tgz", + "integrity": "sha512-Tq6PixE0w/VMFfCgbONnkiQIVol/JJL7nRMi20fqzA4NRs9AfeqMGeRdPi3wIhYkxjeBaWh2rxwapn5Tu3IqOQ==", + "dev": true, + "requires": { + "commondir": "^1.0.1", + "make-dir": "^2.0.0", + "pkg-dir": "^3.0.0" + } + }, + "find-up": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz", + "integrity": "sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==", + "dev": true, + "requires": { + "locate-path": "^3.0.0" + } + }, + "findup-sync": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/findup-sync/-/findup-sync-3.0.0.tgz", + "integrity": "sha512-YbffarhcicEhOrm4CtrwdKBdCuz576RLdhJDsIfvNtxUuhdRet1qZcsMjqbePtAseKdAnDyM/IyXbu7PRPRLYg==", + "dev": true, + "requires": { + "detect-file": "^1.0.0", + "is-glob": "^4.0.0", + "micromatch": "^3.0.4", + "resolve-dir": "^1.0.1" + } + }, + "flush-write-stream": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/flush-write-stream/-/flush-write-stream-1.1.1.tgz", + "integrity": "sha512-3Z4XhFZ3992uIq0XOqb9AreonueSYphE6oYbpt5+3u06JWklbsPkNv3ZKkP9Bz/r+1MWCaMoSQ28P85+1Yc77w==", + "dev": true, + "requires": { + "inherits": "^2.0.3", + "readable-stream": "^2.3.6" + } + }, + "follow-redirects": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.9.0.tgz", + "integrity": "sha512-CRcPzsSIbXyVDl0QI01muNDu69S8trU4jArW9LpOt2WtC6LyUJetcIrmfHsRBx7/Jb6GHJUiuqyYxPooFfNt6A==", + "dev": true, + "requires": { + "debug": "^3.0.0" + }, + "dependencies": { + "debug": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", + "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + } + } + }, + "for-in": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz", + "integrity": "sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=", + "dev": true + }, + "forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=", + "dev": true + }, + "fragment-cache": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz", + "integrity": "sha1-QpD60n8T6Jvn8zeZxrxaCr//DRk=", + "dev": true, + "requires": { + "map-cache": "^0.2.2" + } + }, + "fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=", + "dev": true + }, + "from2": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/from2/-/from2-2.3.0.tgz", + "integrity": "sha1-i/tVAr3kpNNs/e6gB/zKIdfjgq8=", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "readable-stream": "^2.0.0" + } + }, + "fs-write-stream-atomic": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz", + "integrity": "sha1-tH31NJPvkR33VzHnCp3tAYnbQMk=", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "iferr": "^0.1.5", + "imurmurhash": "^0.1.4", + "readable-stream": "1 || 2" + } + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", + "dev": true + }, + "fsevents": { + "version": "1.2.9", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.9.tgz", + "integrity": "sha512-oeyj2H3EjjonWcFjD5NvZNE9Rqe4UW+nQBU2HNeKw0koVLEFIhtyETyAakeAM3de7Z/SW5kcA+fZUait9EApnw==", + "dev": true, + "optional": true, + "requires": { + "nan": "^2.12.1", + "node-pre-gyp": "^0.12.0" + }, + "dependencies": { + "abbrev": { + "version": "1.1.1", + "bundled": true, + "dev": true, + "optional": true + }, + "ansi-regex": { + "version": "2.1.1", + "bundled": true, + "dev": true, + "optional": true + }, + "aproba": { + "version": "1.2.0", + "bundled": true, + "dev": true, + "optional": true + }, + "are-we-there-yet": { + "version": "1.1.5", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "delegates": "^1.0.0", + "readable-stream": "^2.0.6" + } + }, + "balanced-match": { + "version": "1.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "brace-expansion": { + "version": "1.1.11", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "chownr": { + "version": "1.1.1", + "bundled": true, + "dev": true, + "optional": true + }, + "code-point-at": { + "version": "1.1.0", + "bundled": true, + "dev": true, + "optional": true + }, + "concat-map": { + "version": "0.0.1", + "bundled": true, + "dev": true, + "optional": true + }, + "console-control-strings": { + "version": "1.1.0", + "bundled": true, + "dev": true, + "optional": true + }, + "core-util-is": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "debug": { + "version": "4.1.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "ms": "^2.1.1" + } + }, + "deep-extend": { + "version": "0.6.0", + "bundled": true, + "dev": true, + "optional": true + }, + "delegates": { + "version": "1.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "detect-libc": { + "version": "1.0.3", + "bundled": true, + "dev": true, + "optional": true + }, + "fs-minipass": { + "version": "1.2.5", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "minipass": "^2.2.1" + } + }, + "fs.realpath": { + "version": "1.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "gauge": { + "version": "2.7.4", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "aproba": "^1.0.3", + "console-control-strings": "^1.0.0", + "has-unicode": "^2.0.0", + "object-assign": "^4.1.0", + "signal-exit": "^3.0.0", + "string-width": "^1.0.1", + "strip-ansi": "^3.0.1", + "wide-align": "^1.1.0" + } + }, + "glob": { + "version": "7.1.3", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "has-unicode": { + "version": "2.0.1", + "bundled": true, + "dev": true, + "optional": true + }, + "iconv-lite": { + "version": "0.4.24", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, + "ignore-walk": { + "version": "3.0.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "minimatch": "^3.0.4" + } + }, + "inflight": { + "version": "1.0.6", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.3", + "bundled": true, + "dev": true, + "optional": true + }, + "is-fullwidth-code-point": { + "version": "1.0.0", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "number-is-nan": "^1.0.0" + } + }, + "isarray": { + "version": "1.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "minimatch": { + "version": "3.0.4", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "0.0.8", + "bundled": true, + "dev": true, + "optional": true + }, + "minipass": { + "version": "2.3.5", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "safe-buffer": "^5.1.2", + "yallist": "^3.0.0" + } + }, + "minizlib": { + "version": "1.2.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "minipass": "^2.2.1" + } + }, + "mkdirp": { + "version": "0.5.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "minimist": "0.0.8" + } + }, + "ms": { + "version": "2.1.1", + "bundled": true, + "dev": true, + "optional": true + }, + "needle": { + "version": "2.3.0", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "debug": "^4.1.0", + "iconv-lite": "^0.4.4", + "sax": "^1.2.4" + } + }, + "node-pre-gyp": { + "version": "0.12.0", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "detect-libc": "^1.0.2", + "mkdirp": "^0.5.1", + "needle": "^2.2.1", + "nopt": "^4.0.1", + "npm-packlist": "^1.1.6", + "npmlog": "^4.0.2", + "rc": "^1.2.7", + "rimraf": "^2.6.1", + "semver": "^5.3.0", + "tar": "^4" + } + }, + "nopt": { + "version": "4.0.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "abbrev": "1", + "osenv": "^0.1.4" + } + }, + "npm-bundled": { + "version": "1.0.6", + "bundled": true, + "dev": true, + "optional": true + }, + "npm-packlist": { + "version": "1.4.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "ignore-walk": "^3.0.1", + "npm-bundled": "^1.0.1" + } + }, + "npmlog": { + "version": "4.1.2", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "are-we-there-yet": "~1.1.2", + "console-control-strings": "~1.1.0", + "gauge": "~2.7.3", + "set-blocking": "~2.0.0" + } + }, + "number-is-nan": { + "version": "1.0.1", + "bundled": true, + "dev": true, + "optional": true + }, + "object-assign": { + "version": "4.1.1", + "bundled": true, + "dev": true, + "optional": true + }, + "once": { + "version": "1.4.0", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "wrappy": "1" + } + }, + "os-homedir": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "os-tmpdir": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "osenv": { + "version": "0.1.5", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "os-homedir": "^1.0.0", + "os-tmpdir": "^1.0.0" + } + }, + "path-is-absolute": { + "version": "1.0.1", + "bundled": true, + "dev": true, + "optional": true + }, + "process-nextick-args": { + "version": "2.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "rc": { + "version": "1.2.8", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "dependencies": { + "minimist": { + "version": "1.2.0", + "bundled": true, + "dev": true, + "optional": true + } + } + }, + "readable-stream": { + "version": "2.3.6", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "rimraf": { + "version": "2.6.3", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "glob": "^7.1.3" + } + }, + "safe-buffer": { + "version": "5.1.2", + "bundled": true, + "dev": true, + "optional": true + }, + "safer-buffer": { + "version": "2.1.2", + "bundled": true, + "dev": true, + "optional": true + }, + "sax": { + "version": "1.2.4", + "bundled": true, + "dev": true, + "optional": true + }, + "semver": { + "version": "5.7.0", + "bundled": true, + "dev": true, + "optional": true + }, + "set-blocking": { + "version": "2.0.0", + "bundled": true, + "dev": true, + "optional": true + }, + "signal-exit": { + "version": "3.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "string-width": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "code-point-at": "^1.0.0", + "is-fullwidth-code-point": "^1.0.0", + "strip-ansi": "^3.0.0" + } + }, + "string_decoder": { + "version": "1.1.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "safe-buffer": "~5.1.0" + } + }, + "strip-ansi": { + "version": "3.0.1", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "ansi-regex": "^2.0.0" + } + }, + "strip-json-comments": { + "version": "2.0.1", + "bundled": true, + "dev": true, + "optional": true + }, + "tar": { + "version": "4.4.8", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "chownr": "^1.1.1", + "fs-minipass": "^1.2.5", + "minipass": "^2.3.4", + "minizlib": "^1.1.1", + "mkdirp": "^0.5.0", + "safe-buffer": "^5.1.2", + "yallist": "^3.0.2" + } + }, + "util-deprecate": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "wide-align": { + "version": "1.1.3", + "bundled": true, + "dev": true, + "optional": true, + "requires": { + "string-width": "^1.0.2 || 2" + } + }, + "wrappy": { + "version": "1.0.2", + "bundled": true, + "dev": true, + "optional": true + }, + "yallist": { + "version": "3.0.3", + "bundled": true, + "dev": true, + "optional": true + } + } + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true + }, + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", + "dev": true, + "requires": { + "pump": "^3.0.0" + } + }, + "get-value": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/get-value/-/get-value-2.0.6.tgz", + "integrity": "sha1-3BXKHGcjh8p2vTesCjlbogQqLCg=", + "dev": true + }, + "glob": { + "version": "7.1.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.5.tgz", + "integrity": "sha512-J9dlskqUXK1OeTOYBEn5s8aMukWMwWfs+rPTn/jn50Ux4MNXVhubL1wu/j2t+H4NVI+cXEcCaYellqaPVGXNqQ==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "glob-parent": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", + "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", + "dev": true, + "requires": { + "is-glob": "^3.1.0", + "path-dirname": "^1.0.0" + }, + "dependencies": { + "is-glob": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", + "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", + "dev": true, + "requires": { + "is-extglob": "^2.1.0" + } + } + } + }, + "global-modules": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/global-modules/-/global-modules-2.0.0.tgz", + "integrity": "sha512-NGbfmJBp9x8IxyJSd1P+otYK8vonoJactOogrVfFRIAEY1ukil8RSKDz2Yo7wh1oihl51l/r6W4epkeKJHqL8A==", + "dev": true, + "requires": { + "global-prefix": "^3.0.0" + }, + "dependencies": { + "global-prefix": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/global-prefix/-/global-prefix-3.0.0.tgz", + "integrity": "sha512-awConJSVCHVGND6x3tmMaKcQvwXLhjdkmomy2W+Goaui8YPgYgXJZewhg3fWC+DlfqqQuWg8AwqjGTD2nAPVWg==", + "dev": true, + "requires": { + "ini": "^1.3.5", + "kind-of": "^6.0.2", + "which": "^1.3.1" + } + } + } + }, + "global-prefix": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/global-prefix/-/global-prefix-1.0.2.tgz", + "integrity": "sha1-2/dDxsFJklk8ZVVoy2btMsASLr4=", + "dev": true, + "requires": { + "expand-tilde": "^2.0.2", + "homedir-polyfill": "^1.0.1", + "ini": "^1.3.4", + "is-windows": "^1.0.1", + "which": "^1.2.14" + } + }, + "globby": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-6.1.0.tgz", + "integrity": "sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=", + "dev": true, + "requires": { + "array-union": "^1.0.1", + "glob": "^7.0.3", + "object-assign": "^4.0.1", + "pify": "^2.0.0", + "pinkie-promise": "^2.0.0" + }, + "dependencies": { + "pify": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", + "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=", + "dev": true + } + } + }, + "graceful-fs": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.2.tgz", + "integrity": "sha512-IItsdsea19BoLC7ELy13q1iJFNmd7ofZH5+X/pJr90/nRoPEX0DJo1dHDbgtYWOhJhcCgMDTOw84RZ72q6lB+Q==", + "dev": true + }, + "handle-thing": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/handle-thing/-/handle-thing-2.0.0.tgz", + "integrity": "sha512-d4sze1JNC454Wdo2fkuyzCr6aHcbL6PGGuFAz0Li/NcOm1tCHGnWDRmJP85dh9IhQErTc2svWFEX5xHIOo//kQ==", + "dev": true + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true + }, + "has-symbols": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.0.tgz", + "integrity": "sha1-uhqPGvKg/DllD1yFA2dwQSIGO0Q=", + "dev": true + }, + "has-value": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/has-value/-/has-value-1.0.0.tgz", + "integrity": "sha1-GLKB2lhbHFxR3vJMkw7SmgvmsXc=", + "dev": true, + "requires": { + "get-value": "^2.0.6", + "has-values": "^1.0.0", + "isobject": "^3.0.0" + } + }, + "has-values": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/has-values/-/has-values-1.0.0.tgz", + "integrity": "sha1-lbC2P+whRmGab+V/51Yo1aOe/k8=", + "dev": true, + "requires": { + "is-number": "^3.0.0", + "kind-of": "^4.0.0" + }, + "dependencies": { + "kind-of": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-4.0.0.tgz", + "integrity": "sha1-IIE989cSkosgc3hpGkUGb65y3Vc=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "hash-base": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/hash-base/-/hash-base-3.0.4.tgz", + "integrity": "sha1-X8hoaEfs1zSZQDMZprCj8/auSRg=", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "hash.js": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz", + "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==", + "dev": true, + "requires": { + "inherits": "^2.0.3", + "minimalistic-assert": "^1.0.1" + } + }, + "he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true + }, + "hmac-drbg": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/hmac-drbg/-/hmac-drbg-1.0.1.tgz", + "integrity": "sha1-0nRXAQJabHdabFRXk+1QL8DGSaE=", + "dev": true, + "requires": { + "hash.js": "^1.0.3", + "minimalistic-assert": "^1.0.0", + "minimalistic-crypto-utils": "^1.0.1" + } + }, + "homedir-polyfill": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/homedir-polyfill/-/homedir-polyfill-1.0.3.tgz", + "integrity": "sha512-eSmmWE5bZTK2Nou4g0AI3zZ9rswp7GRKoKXS1BLUkvPviOqs4YTN1djQIqrXy9k5gEtdLPy86JjRwsNM9tnDcA==", + "dev": true, + "requires": { + "parse-passwd": "^1.0.0" + } + }, + "hpack.js": { + "version": "2.1.6", + "resolved": "https://registry.npmjs.org/hpack.js/-/hpack.js-2.1.6.tgz", + "integrity": "sha1-h3dMCUnlE/QuhFdbPEVoH63ioLI=", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "obuf": "^1.0.0", + "readable-stream": "^2.0.1", + "wbuf": "^1.1.0" + } + }, + "html-entities": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-1.2.1.tgz", + "integrity": "sha1-DfKTUfByEWNRXfueVUPl9u7VFi8=", + "dev": true + }, + "html-minifier": { + "version": "3.5.21", + "resolved": "https://registry.npmjs.org/html-minifier/-/html-minifier-3.5.21.tgz", + "integrity": "sha512-LKUKwuJDhxNa3uf/LPR/KVjm/l3rBqtYeCOAekvG8F1vItxMUpueGd94i/asDDr8/1u7InxzFA5EeGjhhG5mMA==", + "dev": true, + "requires": { + "camel-case": "3.0.x", + "clean-css": "4.2.x", + "commander": "2.17.x", + "he": "1.2.x", + "param-case": "2.1.x", + "relateurl": "0.2.x", + "uglify-js": "3.4.x" + } + }, + "html-webpack-plugin": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/html-webpack-plugin/-/html-webpack-plugin-3.2.0.tgz", + "integrity": "sha1-sBq71yOsqqeze2r0SS69oD2d03s=", + "dev": true, + "requires": { + "html-minifier": "^3.2.3", + "loader-utils": "^0.2.16", + "lodash": "^4.17.3", + "pretty-error": "^2.0.2", + "tapable": "^1.0.0", + "toposort": "^1.0.0", + "util.promisify": "1.0.0" + } + }, + "htmlparser2": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz", + "integrity": "sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==", + "dev": true, + "requires": { + "domelementtype": "^1.3.1", + "domhandler": "^2.3.0", + "domutils": "^1.5.1", + "entities": "^1.1.1", + "inherits": "^2.0.1", + "readable-stream": "^3.1.1" + }, + "dependencies": { + "entities": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz", + "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==", + "dev": true + }, + "readable-stream": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.4.0.tgz", + "integrity": "sha512-jItXPLmrSR8jmTRmRWJXCnGJsfy85mB3Wd/uINMXA65yrnFo0cPClFIUWzo2najVNSl+mx7/4W8ttlLWJe99pQ==", + "dev": true, + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, + "http-deceiver": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/http-deceiver/-/http-deceiver-1.2.7.tgz", + "integrity": "sha1-+nFolEq5pRnTN8sL7HKE3D5yPYc=", + "dev": true + }, + "http-errors": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", + "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", + "dev": true, + "requires": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.1", + "statuses": ">= 1.5.0 < 2", + "toidentifier": "1.0.0" + }, + "dependencies": { + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=", + "dev": true + } + } + }, + "http-parser-js": { + "version": "0.4.10", + "resolved": "https://registry.npmjs.org/http-parser-js/-/http-parser-js-0.4.10.tgz", + "integrity": "sha1-ksnBN0w1CF912zWexWzCV8u5P6Q=", + "dev": true + }, + "http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "dev": true, + "requires": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + } + }, + "http-proxy-middleware": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-0.19.1.tgz", + "integrity": "sha512-yHYTgWMQO8VvwNS22eLLloAkvungsKdKTLO8AJlftYIKNfJr3GK3zK0ZCfzDDGUBttdGc8xFy1mCitvNKQtC3Q==", + "dev": true, + "requires": { + "http-proxy": "^1.17.0", + "is-glob": "^4.0.0", + "lodash": "^4.17.11", + "micromatch": "^3.1.10" + } + }, + "https-browserify": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/https-browserify/-/https-browserify-1.0.0.tgz", + "integrity": "sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=", + "dev": true + }, + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dev": true, + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, + "ieee754": { + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.13.tgz", + "integrity": "sha512-4vf7I2LYV/HaWerSo3XmlMkp5eZ83i+/CDluXi/IGTs/O1sejBNhTtnxzmRZfvOUqj7lZjqHkeTvpgSFDlWZTg==", + "dev": true + }, + "iferr": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/iferr/-/iferr-0.1.5.tgz", + "integrity": "sha1-xg7taebY/bazEEofy8ocGS3FtQE=", + "dev": true + }, + "import-local": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/import-local/-/import-local-2.0.0.tgz", + "integrity": "sha512-b6s04m3O+s3CGSbqDIyP4R6aAwAeYlVq9+WUWep6iHa8ETRf9yei1U48C5MmfJmV9AiLYYBKPMq/W+/WRpQmCQ==", + "dev": true, + "requires": { + "pkg-dir": "^3.0.0", + "resolve-cwd": "^2.0.0" + } + }, + "imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=", + "dev": true + }, + "infer-owner": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/infer-owner/-/infer-owner-1.0.4.tgz", + "integrity": "sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==", + "dev": true + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "dev": true + }, + "internal-ip": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/internal-ip/-/internal-ip-4.3.0.tgz", + "integrity": "sha512-S1zBo1D6zcsyuC6PMmY5+55YMILQ9av8lotMx447Bq6SAgo/sDK6y6uUKmuYhW7eacnIhFfsPmCNYdDzsnnDCg==", + "dev": true, + "requires": { + "default-gateway": "^4.2.0", + "ipaddr.js": "^1.9.0" + } + }, + "interpret": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.2.0.tgz", + "integrity": "sha512-mT34yGKMNceBQUoVn7iCDKDntA7SC6gycMAWzGx1z/CMCTV7b2AAtXlo3nRyHZ1FelRkQbQjprHSYGwzLtkVbw==", + "dev": true + }, + "invert-kv": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/invert-kv/-/invert-kv-2.0.0.tgz", + "integrity": "sha512-wPVv/y/QQ/Uiirj/vh3oP+1Ww+AWehmi1g5fFWGPF6IpCBCDVrhgHRMvrLfdYcwDh3QJbGXDW4JAuzxElLSqKA==", + "dev": true + }, + "ip": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.5.tgz", + "integrity": "sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=", + "dev": true + }, + "ip-regex": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz", + "integrity": "sha1-+ni/XS5pE8kRzp+BnuUUa7bYROk=", + "dev": true + }, + "ipaddr.js": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.0.tgz", + "integrity": "sha512-M4Sjn6N/+O6/IXSJseKqHoFc+5FdGJ22sXqnjTpdZweHK64MzEPAyQZyEU3R/KRv2GLoa7nNtg/C2Ev6m7z+eA==", + "dev": true + }, + "is-absolute-url": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/is-absolute-url/-/is-absolute-url-3.0.3.tgz", + "integrity": "sha512-opmNIX7uFnS96NtPmhWQgQx6/NYFgsUXYMllcfzwWKUMwfo8kku1TvE6hkNcH+Q1ts5cMVrsY7j0bxXQDciu9Q==", + "dev": true + }, + "is-accessor-descriptor": { + "version": "0.1.6", + "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", + "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", + "dev": true, + "requires": { + "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "is-arguments": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.0.4.tgz", + "integrity": "sha512-xPh0Rmt8NE65sNzvyUmWgI1tz3mKq74lGA0mL8LYZcoIzKOzDh6HmrYm3d18k60nHerC8A9Km8kYu87zfSFnLA==", + "dev": true + }, + "is-binary-path": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-1.0.1.tgz", + "integrity": "sha1-dfFmQrSA8YenEcgUFh/TpKdlWJg=", + "dev": true, + "requires": { + "binary-extensions": "^1.0.0" + } + }, + "is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", + "dev": true + }, + "is-callable": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.1.4.tgz", + "integrity": "sha512-r5p9sxJjYnArLjObpjA4xu5EKI3CuKHkJXMhT7kwbpUyIFD1n5PMAsoPvWnvtZiNz7LjkYDRZhd7FlI0eMijEA==", + "dev": true + }, + "is-data-descriptor": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", + "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", + "dev": true, + "requires": { + "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "is-date-object": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.1.tgz", + "integrity": "sha1-mqIOtq7rv/d/vTPnTKAbM1gdOhY=", + "dev": true + }, + "is-descriptor": { + "version": "0.1.6", + "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", + "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", + "dev": true, + "requires": { + "is-accessor-descriptor": "^0.1.6", + "is-data-descriptor": "^0.1.4", + "kind-of": "^5.0.0" + }, + "dependencies": { + "kind-of": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", + "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", + "dev": true + } + } + }, + "is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik=", + "dev": true + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "dev": true + }, + "is-fullwidth-code-point": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", + "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", + "dev": true + }, + "is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "dev": true, + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-number": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", + "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", + "dev": true, + "requires": { + "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "is-path-cwd": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-2.2.0.tgz", + "integrity": "sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ==", + "dev": true + }, + "is-path-in-cwd": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-path-in-cwd/-/is-path-in-cwd-2.1.0.tgz", + "integrity": "sha512-rNocXHgipO+rvnP6dk3zI20RpOtrAM/kzbB258Uw5BWr3TpXi861yzjo16Dn4hUox07iw5AyeMLHWsujkjzvRQ==", + "dev": true, + "requires": { + "is-path-inside": "^2.1.0" + } + }, + "is-path-inside": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-path-inside/-/is-path-inside-2.1.0.tgz", + "integrity": "sha512-wiyhTzfDWsvwAW53OBWF5zuvaOGlZ6PwYxAbPVDhpm+gM09xKQGjBq/8uYN12aDvMxnAnq3dxTyoSoRNmg5YFg==", + "dev": true, + "requires": { + "path-is-inside": "^1.0.2" + } + }, + "is-plain-object": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", + "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", + "dev": true, + "requires": { + "isobject": "^3.0.1" + } + }, + "is-regex": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.0.4.tgz", + "integrity": "sha1-VRdIm1RwkbCTDglWVM7SXul+lJE=", + "dev": true, + "requires": { + "has": "^1.0.1" + } + }, + "is-stream": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-1.1.0.tgz", + "integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ=", + "dev": true + }, + "is-symbol": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.2.tgz", + "integrity": "sha512-HS8bZ9ox60yCJLH9snBpIwv9pYUAkcuLhSA1oero1UB5y9aiQpRA8y2ex945AOtCZL1lJDeIk3G5LthswI46Lw==", + "dev": true, + "requires": { + "has-symbols": "^1.0.0" + } + }, + "is-windows": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-windows/-/is-windows-1.0.2.tgz", + "integrity": "sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==", + "dev": true + }, + "is-wsl": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-1.1.0.tgz", + "integrity": "sha1-HxbkqiKwTRM2tmGIpmrzxgDDpm0=", + "dev": true + }, + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=", + "dev": true + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", + "dev": true + }, + "isobject": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", + "dev": true + }, + "json-parse-better-errors": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz", + "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "json3": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/json3/-/json3-3.3.3.tgz", + "integrity": "sha512-c7/8mbUsKigAbLkD5B010BK4D9LZm7A1pNItkEwiUZRpIN66exu/e7YQWysGun+TRKaJp8MhemM+VkfWv42aCA==", + "dev": true + }, + "json5": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-0.5.1.tgz", + "integrity": "sha1-Hq3nrMASA0rYTiOWdn6tn6VJWCE=", + "dev": true + }, + "killable": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/killable/-/killable-1.0.1.tgz", + "integrity": "sha512-LzqtLKlUwirEUyl/nicirVmNiPvYs7l5n8wOPP7fyJVpUPkvCnW/vuiXGpylGUlnPDnB7311rARzAt3Mhswpjg==", + "dev": true + }, + "kind-of": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", + "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", + "dev": true + }, + "lcid": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/lcid/-/lcid-2.0.0.tgz", + "integrity": "sha512-avPEb8P8EGnwXKClwsNUgryVjllcRqtMYa49NTsbQagYuT1DcXnl1915oxWjoyGrXR6zH/Y0Zc96xWsPcoDKeA==", + "dev": true, + "requires": { + "invert-kv": "^2.0.0" + } + }, + "loader-runner": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-2.4.0.tgz", + "integrity": "sha512-Jsmr89RcXGIwivFY21FcRrisYZfvLMTWx5kOLc+JTxtpBOG6xML0vzbc6SEQG2FO9/4Fc3wW4LVcB5DmGflaRw==", + "dev": true + }, + "loader-utils": { + "version": "0.2.17", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-0.2.17.tgz", + "integrity": "sha1-+G5jdNQyBabmxg6RlvF8Apm/s0g=", + "dev": true, + "requires": { + "big.js": "^3.1.3", + "emojis-list": "^2.0.0", + "json5": "^0.5.0", + "object-assign": "^4.0.1" + } + }, + "locate-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz", + "integrity": "sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==", + "dev": true, + "requires": { + "p-locate": "^3.0.0", + "path-exists": "^3.0.0" + } + }, + "lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "dev": true + }, + "loglevel": { + "version": "1.6.4", + "resolved": "https://registry.npmjs.org/loglevel/-/loglevel-1.6.4.tgz", + "integrity": "sha512-p0b6mOGKcGa+7nnmKbpzR6qloPbrgLcnio++E+14Vo/XffOGwZtRpUhr8dTH/x2oCMmEoIU0Zwm3ZauhvYD17g==", + "dev": true + }, + "lower-case": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/lower-case/-/lower-case-1.1.4.tgz", + "integrity": "sha1-miyr0bno4K6ZOkv31YdcOcQujqw=", + "dev": true + }, + "lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "requires": { + "yallist": "^3.0.2" + } + }, + "make-dir": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-2.1.0.tgz", + "integrity": "sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==", + "dev": true, + "requires": { + "pify": "^4.0.1", + "semver": "^5.6.0" + } + }, + "mamacro": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/mamacro/-/mamacro-0.0.3.tgz", + "integrity": "sha512-qMEwh+UujcQ+kbz3T6V+wAmO2U8veoq2w+3wY8MquqwVA3jChfwY+Tk52GZKDfACEPjuZ7r2oJLejwpt8jtwTA==", + "dev": true + }, + "map-age-cleaner": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.3.tgz", + "integrity": "sha512-bJzx6nMoP6PDLPBFmg7+xRKeFZvFboMrGlxmNj9ClvX53KrmvM5bXFXEWjbz4cz1AFn+jWJ9z/DJSz7hrs0w3w==", + "dev": true, + "requires": { + "p-defer": "^1.0.0" + } + }, + "map-cache": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/map-cache/-/map-cache-0.2.2.tgz", + "integrity": "sha1-wyq9C9ZSXZsFFkW7TyasXcmKDb8=", + "dev": true + }, + "map-visit": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/map-visit/-/map-visit-1.0.0.tgz", + "integrity": "sha1-7Nyo8TFE5mDxtb1B8S80edmN+48=", + "dev": true, + "requires": { + "object-visit": "^1.0.0" + } + }, + "md5.js": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz", + "integrity": "sha512-xitP+WxNPcTTOgnTJcrhM0xvdPepipPSf3I8EIpGKeFLjt3PlJLIDG3u8EX53ZIubkb+5U2+3rELYpEhHhzdkg==", + "dev": true, + "requires": { + "hash-base": "^3.0.0", + "inherits": "^2.0.1", + "safe-buffer": "^5.1.2" + } + }, + "media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=", + "dev": true + }, + "mem": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/mem/-/mem-4.3.0.tgz", + "integrity": "sha512-qX2bG48pTqYRVmDB37rn/6PT7LcR8T7oAX3bf99u1Tt1nzxYfxkgqDwUwolPlXweM0XzBOBFzSx4kfp7KP1s/w==", + "dev": true, + "requires": { + "map-age-cleaner": "^0.1.1", + "mimic-fn": "^2.0.0", + "p-is-promise": "^2.0.0" + } + }, + "memory-fs": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/memory-fs/-/memory-fs-0.4.1.tgz", + "integrity": "sha1-OpoguEYlI+RHz7x+i7gO1me/xVI=", + "dev": true, + "requires": { + "errno": "^0.1.3", + "readable-stream": "^2.0.1" + } + }, + "merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=", + "dev": true + }, + "methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=", + "dev": true + }, + "micromatch": { + "version": "3.1.10", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", + "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", + "dev": true, + "requires": { + "arr-diff": "^4.0.0", + "array-unique": "^0.3.2", + "braces": "^2.3.1", + "define-property": "^2.0.2", + "extend-shallow": "^3.0.2", + "extglob": "^2.0.4", + "fragment-cache": "^0.2.1", + "kind-of": "^6.0.2", + "nanomatch": "^1.2.9", + "object.pick": "^1.3.0", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.2" + } + }, + "miller-rabin": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/miller-rabin/-/miller-rabin-4.0.1.tgz", + "integrity": "sha512-115fLhvZVqWwHPbClyntxEVfVDfl9DLLTuJvq3g2O/Oxi8AiNouAHvDSzHS0viUJc+V5vm3eq91Xwqn9dp4jRA==", + "dev": true, + "requires": { + "bn.js": "^4.0.0", + "brorand": "^1.0.1" + } + }, + "mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "dev": true + }, + "mime-db": { + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.40.0.tgz", + "integrity": "sha512-jYdeOMPy9vnxEqFRRo6ZvTZ8d9oPb+k18PKoYNYUe2stVEBPPwsln/qWzdbmaIvnhZ9v2P+CuecK+fpUfsV2mA==", + "dev": true + }, + "mime-types": { + "version": "2.1.24", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.24.tgz", + "integrity": "sha512-WaFHS3MCl5fapm3oLxU4eYDw77IQM2ACcxQ9RIxfaC3ooc6PFuBMGZZsYpvoXS5D5QTWPieo1jjLdAm3TBP3cQ==", + "dev": true, + "requires": { + "mime-db": "1.40.0" + } + }, + "mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true + }, + "minimalistic-assert": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz", + "integrity": "sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==", + "dev": true + }, + "minimalistic-crypto-utils": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/minimalistic-crypto-utils/-/minimalistic-crypto-utils-1.0.1.tgz", + "integrity": "sha1-9sAMHAsIIkblxNmd+4x8CDsrWCo=", + "dev": true + }, + "minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", + "dev": true + }, + "mississippi": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mississippi/-/mississippi-3.0.0.tgz", + "integrity": "sha512-x471SsVjUtBRtcvd4BzKE9kFC+/2TeWgKCgw0bZcw1b9l2X3QX5vCWgF+KaZaYm87Ss//rHnWryupDrgLvmSkA==", + "dev": true, + "requires": { + "concat-stream": "^1.5.0", + "duplexify": "^3.4.2", + "end-of-stream": "^1.1.0", + "flush-write-stream": "^1.0.0", + "from2": "^2.1.0", + "parallel-transform": "^1.1.0", + "pump": "^3.0.0", + "pumpify": "^1.3.3", + "stream-each": "^1.1.0", + "through2": "^2.0.0" + } + }, + "mixin-deep": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/mixin-deep/-/mixin-deep-1.3.2.tgz", + "integrity": "sha512-WRoDn//mXBiJ1H40rqa3vH0toePwSsGb45iInWlTySa+Uu4k3tYUSxa2v1KqAiLtvlrSzaExqS1gtk96A9zvEA==", + "dev": true, + "requires": { + "for-in": "^1.0.2", + "is-extendable": "^1.0.1" + }, + "dependencies": { + "is-extendable": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-1.0.1.tgz", + "integrity": "sha512-arnXMxT1hhoKo9k1LZdmlNyJdDDfy2v0fXjFlmok4+i8ul/6WlbVge9bhM74OpNPQPMGUToDtz+KXa1PneJxOA==", + "dev": true, + "requires": { + "is-plain-object": "^2.0.4" + } + } + } + }, + "mkdirp": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", + "dev": true, + "requires": { + "minimist": "0.0.8" + }, + "dependencies": { + "minimist": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", + "dev": true + } + } + }, + "move-concurrently": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/move-concurrently/-/move-concurrently-1.0.1.tgz", + "integrity": "sha1-viwAX9oy4LKa8fBdfEszIUxwH5I=", + "dev": true, + "requires": { + "aproba": "^1.1.1", + "copy-concurrently": "^1.0.0", + "fs-write-stream-atomic": "^1.0.8", + "mkdirp": "^0.5.1", + "rimraf": "^2.5.4", + "run-queue": "^1.0.3" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", + "dev": true + }, + "multicast-dns": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/multicast-dns/-/multicast-dns-6.2.3.tgz", + "integrity": "sha512-ji6J5enbMyGRHIAkAOu3WdV8nggqviKCEKtXcOqfphZZtQrmHKycfynJ2V7eVPUA4NhJ6V7Wf4TmGbTwKE9B6g==", + "dev": true, + "requires": { + "dns-packet": "^1.3.1", + "thunky": "^1.0.2" + } + }, + "multicast-dns-service-types": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/multicast-dns-service-types/-/multicast-dns-service-types-1.1.0.tgz", + "integrity": "sha1-iZ8R2WhuXgXLkbNdXw5jt3PPyQE=", + "dev": true + }, + "nan": { + "version": "2.14.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz", + "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg==", + "dev": true, + "optional": true + }, + "nanomatch": { + "version": "1.2.13", + "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", + "integrity": "sha512-fpoe2T0RbHwBTBUOftAfBPaDEi06ufaUai0mE6Yn1kacc3SnTErfb/h+X94VXzI64rKFHYImXSvdwGGCmwOqCA==", + "dev": true, + "requires": { + "arr-diff": "^4.0.0", + "array-unique": "^0.3.2", + "define-property": "^2.0.2", + "extend-shallow": "^3.0.2", + "fragment-cache": "^0.2.1", + "is-windows": "^1.0.2", + "kind-of": "^6.0.2", + "object.pick": "^1.3.0", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.1" + } + }, + "negotiator": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", + "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==", + "dev": true + }, + "neo-async": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.1.tgz", + "integrity": "sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw==", + "dev": true + }, + "nice-try": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", + "integrity": "sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==", + "dev": true + }, + "no-case": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/no-case/-/no-case-2.3.2.tgz", + "integrity": "sha512-rmTZ9kz+f3rCvK2TD1Ue/oZlns7OGoIWP4fc3llxxRXlOkHKoWPPWJOfFYpITabSow43QJbRIoHQXtt10VldyQ==", + "dev": true, + "requires": { + "lower-case": "^1.1.1" + } + }, + "node-forge": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.9.0.tgz", + "integrity": "sha512-7ASaDa3pD+lJ3WvXFsxekJQelBKRpne+GOVbLbtHYdd7pFspyeuJHnWfLplGf3SwKGbfs/aYl5V/JCIaHVUKKQ==", + "dev": true + }, + "node-libs-browser": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/node-libs-browser/-/node-libs-browser-2.2.1.tgz", + "integrity": "sha512-h/zcD8H9kaDZ9ALUWwlBUDo6TKF8a7qBSCSEGfjTVIYeqsioSKaAX+BN7NgiMGp6iSIXZ3PxgCu8KS3b71YK5Q==", + "dev": true, + "requires": { + "assert": "^1.1.1", + "browserify-zlib": "^0.2.0", + "buffer": "^4.3.0", + "console-browserify": "^1.1.0", + "constants-browserify": "^1.0.0", + "crypto-browserify": "^3.11.0", + "domain-browser": "^1.1.1", + "events": "^3.0.0", + "https-browserify": "^1.0.0", + "os-browserify": "^0.3.0", + "path-browserify": "0.0.1", + "process": "^0.11.10", + "punycode": "^1.2.4", + "querystring-es3": "^0.2.0", + "readable-stream": "^2.3.3", + "stream-browserify": "^2.0.1", + "stream-http": "^2.7.2", + "string_decoder": "^1.0.0", + "timers-browserify": "^2.0.4", + "tty-browserify": "0.0.0", + "url": "^0.11.0", + "util": "^0.11.0", + "vm-browserify": "^1.0.1" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "dev": true + } + } + }, + "normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true + }, + "npm-run-path": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-2.0.2.tgz", + "integrity": "sha1-NakjLfo11wZ7TLLd8jV7GHFTbF8=", + "dev": true, + "requires": { + "path-key": "^2.0.0" + } + }, + "nth-check": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz", + "integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==", + "dev": true, + "requires": { + "boolbase": "~1.0.0" + } + }, + "number-is-nan": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz", + "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=", + "dev": true + }, + "object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "dev": true + }, + "object-copy": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/object-copy/-/object-copy-0.1.0.tgz", + "integrity": "sha1-fn2Fi3gb18mRpBupde04EnVOmYw=", + "dev": true, + "requires": { + "copy-descriptor": "^0.1.0", + "define-property": "^0.2.5", + "kind-of": "^3.0.3" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "object-inspect": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.6.0.tgz", + "integrity": "sha512-GJzfBZ6DgDAmnuaM3104jR4s1Myxr3Y3zfIyN4z3UdqN69oSRacNK8UhnobDdC+7J2AHCjGwxQubNJfE70SXXQ==", + "dev": true + }, + "object-is": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/object-is/-/object-is-1.0.1.tgz", + "integrity": "sha1-CqYOyZiaCz7Xlc9NBvYs8a1lObY=", + "dev": true + }, + "object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "dev": true + }, + "object-visit": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/object-visit/-/object-visit-1.0.1.tgz", + "integrity": "sha1-95xEk68MU3e1n+OdOV5BBC3QRbs=", + "dev": true, + "requires": { + "isobject": "^3.0.0" + } + }, + "object.getownpropertydescriptors": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/object.getownpropertydescriptors/-/object.getownpropertydescriptors-2.0.3.tgz", + "integrity": "sha1-h1jIRvW0B62rDyNuCYbxSwUcqhY=", + "dev": true, + "requires": { + "define-properties": "^1.1.2", + "es-abstract": "^1.5.1" + } + }, + "object.pick": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/object.pick/-/object.pick-1.3.0.tgz", + "integrity": "sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c=", + "dev": true, + "requires": { + "isobject": "^3.0.1" + } + }, + "obuf": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz", + "integrity": "sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg==", + "dev": true + }, + "on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "dev": true, + "requires": { + "ee-first": "1.1.1" + } + }, + "on-headers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.2.tgz", + "integrity": "sha512-pZAE+FJLoyITytdqK0U5s+FIpjN0JP3OzFi/u8Rx+EV5/W+JTWGXG8xFzevE7AjBfDqHv/8vL8qQsIhHnqRkrA==", + "dev": true + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "opn": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/opn/-/opn-5.5.0.tgz", + "integrity": "sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==", + "dev": true, + "requires": { + "is-wsl": "^1.1.0" + } + }, + "original": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/original/-/original-1.0.2.tgz", + "integrity": "sha512-hyBVl6iqqUOJ8FqRe+l/gS8H+kKYjrEndd5Pm1MfBtsEKA038HkkdbAl/72EAXGyonD/PFsvmVG+EvcIpliMBg==", + "dev": true, + "requires": { + "url-parse": "^1.4.3" + } + }, + "os-browserify": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/os-browserify/-/os-browserify-0.3.0.tgz", + "integrity": "sha1-hUNzx/XCMVkU/Jv8a9gjj92h7Cc=", + "dev": true + }, + "os-locale": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-3.1.0.tgz", + "integrity": "sha512-Z8l3R4wYWM40/52Z+S265okfFj8Kt2cC2MKY+xNi3kFs+XGI7WXu/I309QQQYbRW4ijiZ+yxs9pqEhJh0DqW3Q==", + "dev": true, + "requires": { + "execa": "^1.0.0", + "lcid": "^2.0.0", + "mem": "^4.0.0" + } + }, + "p-defer": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-defer/-/p-defer-1.0.0.tgz", + "integrity": "sha1-n26xgvbJqozXQwBKfU+WsZaw+ww=", + "dev": true + }, + "p-finally": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", + "integrity": "sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=", + "dev": true + }, + "p-is-promise": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-is-promise/-/p-is-promise-2.1.0.tgz", + "integrity": "sha512-Y3W0wlRPK8ZMRbNq97l4M5otioeA5lm1z7bkNkxCka8HSPjR0xRWmpCmc9utiaLP9Jb1eD8BgeIxTW4AIF45Pg==", + "dev": true + }, + "p-limit": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.2.1.tgz", + "integrity": "sha512-85Tk+90UCVWvbDavCLKPOLC9vvY8OwEX/RtKF+/1OADJMVlFfEHOiMTPVyxg7mk/dKa+ipdHm0OUkTvCpMTuwg==", + "dev": true, + "requires": { + "p-try": "^2.0.0" + } + }, + "p-locate": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-3.0.0.tgz", + "integrity": "sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==", + "dev": true, + "requires": { + "p-limit": "^2.0.0" + } + }, + "p-map": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-map/-/p-map-2.1.0.tgz", + "integrity": "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==", + "dev": true + }, + "p-retry": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-3.0.1.tgz", + "integrity": "sha512-XE6G4+YTTkT2a0UWb2kjZe8xNwf8bIbnqpc/IS/idOBVhyves0mK5OJgeocjx7q5pvX/6m23xuzVPYT1uGM73w==", + "dev": true, + "requires": { + "retry": "^0.12.0" + } + }, + "p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true + }, + "pako": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.10.tgz", + "integrity": "sha512-0DTvPVU3ed8+HNXOu5Bs+o//Mbdj9VNQMUOe9oKCwh8l0GNwpTDMKCWbRjgtD291AWnkAgkqA/LOnQS8AmS1tw==", + "dev": true + }, + "parallel-transform": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/parallel-transform/-/parallel-transform-1.2.0.tgz", + "integrity": "sha512-P2vSmIu38uIlvdcU7fDkyrxj33gTUy/ABO5ZUbGowxNCopBq/OoD42bP4UmMrJoPyk4Uqf0mu3mtWBhHCZD8yg==", + "dev": true, + "requires": { + "cyclist": "^1.0.1", + "inherits": "^2.0.3", + "readable-stream": "^2.1.5" + } + }, + "param-case": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/param-case/-/param-case-2.1.1.tgz", + "integrity": "sha1-35T9jPZTHs915r75oIWPvHK+Ikc=", + "dev": true, + "requires": { + "no-case": "^2.2.0" + } + }, + "parse-asn1": { + "version": "5.1.5", + "resolved": "https://registry.npmjs.org/parse-asn1/-/parse-asn1-5.1.5.tgz", + "integrity": "sha512-jkMYn1dcJqF6d5CpU689bq7w/b5ALS9ROVSpQDPrZsqqesUJii9qutvoT5ltGedNXMO2e16YUWIghG9KxaViTQ==", + "dev": true, + "requires": { + "asn1.js": "^4.0.0", + "browserify-aes": "^1.0.0", + "create-hash": "^1.1.0", + "evp_bytestokey": "^1.0.0", + "pbkdf2": "^3.0.3", + "safe-buffer": "^5.1.1" + } + }, + "parse-passwd": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/parse-passwd/-/parse-passwd-1.0.0.tgz", + "integrity": "sha1-bVuTSkVpk7I9N/QKOC1vFmao5cY=", + "dev": true + }, + "parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "dev": true + }, + "pascalcase": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/pascalcase/-/pascalcase-0.1.1.tgz", + "integrity": "sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ=", + "dev": true + }, + "path-browserify": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-0.0.1.tgz", + "integrity": "sha512-BapA40NHICOS+USX9SN4tyhq+A2RrN/Ws5F0Z5aMHDp98Fl86lX8Oti8B7uN93L4Ifv4fHOEA+pQw87gmMO/lQ==", + "dev": true + }, + "path-dirname": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/path-dirname/-/path-dirname-1.0.2.tgz", + "integrity": "sha1-zDPSTVJeCZpTiMAzbG4yuRYGCeA=", + "dev": true + }, + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "dev": true + }, + "path-is-inside": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz", + "integrity": "sha1-NlQX3t5EQw0cEa9hAn+s8HS9/FM=", + "dev": true + }, + "path-key": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-2.0.1.tgz", + "integrity": "sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A=", + "dev": true + }, + "path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=", + "dev": true + }, + "pbkdf2": { + "version": "3.0.17", + "resolved": "https://registry.npmjs.org/pbkdf2/-/pbkdf2-3.0.17.tgz", + "integrity": "sha512-U/il5MsrZp7mGg3mSQfn742na2T+1/vHDCG5/iTI3X9MKUuYUZVLQhyRsg06mCgDBTd57TxzgZt7P+fYfjRLtA==", + "dev": true, + "requires": { + "create-hash": "^1.1.2", + "create-hmac": "^1.1.4", + "ripemd160": "^2.0.1", + "safe-buffer": "^5.0.1", + "sha.js": "^2.4.8" + } + }, + "pify": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/pify/-/pify-4.0.1.tgz", + "integrity": "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==", + "dev": true + }, + "pinkie": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/pinkie/-/pinkie-2.0.4.tgz", + "integrity": "sha1-clVrgM+g1IqXToDnckjoDtT3+HA=", + "dev": true + }, + "pinkie-promise": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pinkie-promise/-/pinkie-promise-2.0.1.tgz", + "integrity": "sha1-ITXW36ejWMBprJsXh3YogihFD/o=", + "dev": true, + "requires": { + "pinkie": "^2.0.0" + } + }, + "pkg-dir": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-3.0.0.tgz", + "integrity": "sha512-/E57AYkoeQ25qkxMj5PBOVgF8Kiu/h7cYS30Z5+R7WaiCCBfLq58ZI/dSeaEKb9WVJV5n/03QwrN3IeWIFllvw==", + "dev": true, + "requires": { + "find-up": "^3.0.0" + } + }, + "portfinder": { + "version": "1.0.25", + "resolved": "https://registry.npmjs.org/portfinder/-/portfinder-1.0.25.tgz", + "integrity": "sha512-6ElJnHBbxVA1XSLgBp7G1FiCkQdlqGzuF7DswL5tcea+E8UpuvPU7beVAjjRwCioTS9ZluNbu+ZyRvgTsmqEBg==", + "dev": true, + "requires": { + "async": "^2.6.2", + "debug": "^3.1.1", + "mkdirp": "^0.5.1" + }, + "dependencies": { + "debug": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", + "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + } + } + }, + "posix-character-classes": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/posix-character-classes/-/posix-character-classes-0.1.1.tgz", + "integrity": "sha1-AerA/jta9xoqbAL+q7jB/vfgDqs=", + "dev": true + }, + "pretty-error": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/pretty-error/-/pretty-error-2.1.1.tgz", + "integrity": "sha1-X0+HyPkeWuPzuoerTPXgOxoX8aM=", + "dev": true, + "requires": { + "renderkid": "^2.0.1", + "utila": "~0.4" + } + }, + "process": { + "version": "0.11.10", + "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz", + "integrity": "sha1-czIwDoQBYb2j5podHZGn1LwW8YI=", + "dev": true + }, + "process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "dev": true + }, + "promise-inflight": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/promise-inflight/-/promise-inflight-1.0.1.tgz", + "integrity": "sha1-mEcocL8igTL8vdhoEputEsPAKeM=", + "dev": true + }, + "proxy-addr": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.5.tgz", + "integrity": "sha512-t/7RxHXPH6cJtP0pRG6smSr9QJidhB+3kXu0KgXnbGYMgzEnUxRQ4/LDdfOwZEMyIh3/xHb8PX3t+lfL9z+YVQ==", + "dev": true, + "requires": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.9.0" + } + }, + "prr": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/prr/-/prr-1.0.1.tgz", + "integrity": "sha1-0/wRS6BplaRexok/SEzrHXj19HY=", + "dev": true + }, + "public-encrypt": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/public-encrypt/-/public-encrypt-4.0.3.tgz", + "integrity": "sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==", + "dev": true, + "requires": { + "bn.js": "^4.1.0", + "browserify-rsa": "^4.0.0", + "create-hash": "^1.1.0", + "parse-asn1": "^5.0.0", + "randombytes": "^2.0.1", + "safe-buffer": "^5.1.2" + } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "pumpify": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/pumpify/-/pumpify-1.5.1.tgz", + "integrity": "sha512-oClZI37HvuUJJxSKKrC17bZ9Cu0ZYhEAGPsPUy9KlMUmv9dKX2o77RUmq7f3XjIxbwyGwYzbzQ1L2Ks8sIradQ==", + "dev": true, + "requires": { + "duplexify": "^3.6.0", + "inherits": "^2.0.3", + "pump": "^2.0.0" + }, + "dependencies": { + "pump": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", + "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + } + } + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true + }, + "qs": { + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", + "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==", + "dev": true + }, + "querystring": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz", + "integrity": "sha1-sgmEkgO7Jd+CDadW50cAWHhSFiA=", + "dev": true + }, + "querystring-es3": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/querystring-es3/-/querystring-es3-0.2.1.tgz", + "integrity": "sha1-nsYfeQSYdXB9aUFFlv2Qek1xHnM=", + "dev": true + }, + "querystringify": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.1.1.tgz", + "integrity": "sha512-w7fLxIRCRT7U8Qu53jQnJyPkYZIaR4n5151KMfcJlO/A9397Wxb1amJvROTK6TOnp7PfoAmg/qXiNHI+08jRfA==", + "dev": true + }, + "randombytes": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "dev": true, + "requires": { + "safe-buffer": "^5.1.0" + } + }, + "randomfill": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/randomfill/-/randomfill-1.0.4.tgz", + "integrity": "sha512-87lcbR8+MhcWcUiQ+9e+Rwx8MyR2P7qnt15ynUlbm3TU/fjbgz4GsvfSUDTemtCCtVCqb4ZcEFlyPNTh9bBTLw==", + "dev": true, + "requires": { + "randombytes": "^2.0.5", + "safe-buffer": "^5.1.0" + } + }, + "range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "dev": true + }, + "raw-body": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", + "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", + "dev": true, + "requires": { + "bytes": "3.1.0", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + }, + "dependencies": { + "bytes": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", + "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==", + "dev": true + } + } + }, + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "dev": true, + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "readdirp": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-2.2.1.tgz", + "integrity": "sha512-1JU/8q+VgFZyxwrJ+SVIOsh+KywWGpds3NTqikiKpDMZWScmAYyKIgqkO+ARvNWJfXeXR1zxz7aHF4u4CyH6vQ==", + "dev": true, + "requires": { + "graceful-fs": "^4.1.11", + "micromatch": "^3.1.10", + "readable-stream": "^2.0.2" + } + }, + "regex-not": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/regex-not/-/regex-not-1.0.2.tgz", + "integrity": "sha512-J6SDjUgDxQj5NusnOtdFxDwN/+HWykR8GELwctJ7mdqhcyy1xEc4SRFHUXvxTp661YaVKAjfRLZ9cCqS6tn32A==", + "dev": true, + "requires": { + "extend-shallow": "^3.0.2", + "safe-regex": "^1.1.0" + } + }, + "regexp.prototype.flags": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.2.0.tgz", + "integrity": "sha512-ztaw4M1VqgMwl9HlPpOuiYgItcHlunW0He2fE6eNfT6E/CF2FtYi9ofOYe4mKntstYk0Fyh/rDRBdS3AnxjlrA==", + "dev": true, + "requires": { + "define-properties": "^1.1.2" + } + }, + "relateurl": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/relateurl/-/relateurl-0.2.7.tgz", + "integrity": "sha1-VNvzd+UUQKypCkzSdGANP/LYiKk=", + "dev": true + }, + "remove-trailing-separator": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz", + "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=", + "dev": true + }, + "renderkid": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/renderkid/-/renderkid-2.0.3.tgz", + "integrity": "sha512-z8CLQp7EZBPCwCnncgf9C4XAi3WR0dv+uWu/PjIyhhAb5d6IJ/QZqlHFprHeKT+59//V6BNUsLbvN8+2LarxGA==", + "dev": true, + "requires": { + "css-select": "^1.1.0", + "dom-converter": "^0.2", + "htmlparser2": "^3.3.0", + "strip-ansi": "^3.0.0", + "utila": "^0.4.0" + } + }, + "repeat-element": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/repeat-element/-/repeat-element-1.1.3.tgz", + "integrity": "sha512-ahGq0ZnV5m5XtZLMb+vP76kcAM5nkLqk0lpqAuojSKGgQtn4eRi4ZZGm2olo2zKFH+sMsWaqOCW1dqAnOru72g==", + "dev": true + }, + "repeat-string": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz", + "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=", + "dev": true + }, + "require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", + "dev": true + }, + "require-main-filename": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz", + "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==", + "dev": true + }, + "requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=", + "dev": true + }, + "resolve-cwd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-2.0.0.tgz", + "integrity": "sha1-AKn3OHVW4nA46uIyyqNypqWbZlo=", + "dev": true, + "requires": { + "resolve-from": "^3.0.0" + } + }, + "resolve-dir": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/resolve-dir/-/resolve-dir-1.0.1.tgz", + "integrity": "sha1-eaQGRMNivoLybv/nOcm7U4IEb0M=", + "dev": true, + "requires": { + "expand-tilde": "^2.0.0", + "global-modules": "^1.0.0" + }, + "dependencies": { + "global-modules": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/global-modules/-/global-modules-1.0.0.tgz", + "integrity": "sha512-sKzpEkf11GpOFuw0Zzjzmt4B4UZwjOcG757PPvrfhxcLFbq0wpsgpOqxpxtxFiCG4DtG93M6XRVbF2oGdev7bg==", + "dev": true, + "requires": { + "global-prefix": "^1.0.1", + "is-windows": "^1.0.1", + "resolve-dir": "^1.0.0" + } + } + } + }, + "resolve-from": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-3.0.0.tgz", + "integrity": "sha1-six699nWiBvItuZTM17rywoYh0g=", + "dev": true + }, + "resolve-url": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz", + "integrity": "sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=", + "dev": true + }, + "ret": { + "version": "0.1.15", + "resolved": "https://registry.npmjs.org/ret/-/ret-0.1.15.tgz", + "integrity": "sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==", + "dev": true + }, + "retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", + "integrity": "sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=", + "dev": true + }, + "rimraf": { + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz", + "integrity": "sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==", + "dev": true, + "requires": { + "glob": "^7.1.3" + } + }, + "ripemd160": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/ripemd160/-/ripemd160-2.0.2.tgz", + "integrity": "sha512-ii4iagi25WusVoiC4B4lq7pbXfAp3D9v5CwfkY33vffw2+pkDjY1D8GaN7spsxvCSx8dkPqOZCEZyfxcmJG2IA==", + "dev": true, + "requires": { + "hash-base": "^3.0.0", + "inherits": "^2.0.1" + } + }, + "run-queue": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/run-queue/-/run-queue-1.0.3.tgz", + "integrity": "sha1-6Eg5bwV9Ij8kOGkkYY4laUFh7Ec=", + "dev": true, + "requires": { + "aproba": "^1.1.1" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true + }, + "safe-regex": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz", + "integrity": "sha1-QKNmnzsHfR6UPURinhV91IAjvy4=", + "dev": true, + "requires": { + "ret": "~0.1.10" + } + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "dev": true + }, + "schema-utils": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-1.0.0.tgz", + "integrity": "sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==", + "dev": true, + "requires": { + "ajv": "^6.1.0", + "ajv-errors": "^1.0.0", + "ajv-keywords": "^3.1.0" + } + }, + "select-hose": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/select-hose/-/select-hose-2.0.0.tgz", + "integrity": "sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo=", + "dev": true + }, + "selfsigned": { + "version": "1.10.7", + "resolved": "https://registry.npmjs.org/selfsigned/-/selfsigned-1.10.7.tgz", + "integrity": "sha512-8M3wBCzeWIJnQfl43IKwOmC4H/RAp50S8DF60znzjW5GVqTcSe2vWclt7hmYVPkKPlHWOu5EaWOMZ2Y6W8ZXTA==", + "dev": true, + "requires": { + "node-forge": "0.9.0" + } + }, + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true + }, + "send": { + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", + "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", + "dev": true, + "requires": { + "debug": "2.6.9", + "depd": "~1.1.2", + "destroy": "~1.0.4", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.7.2", + "mime": "1.6.0", + "ms": "2.1.1", + "on-finished": "~2.3.0", + "range-parser": "~1.2.1", + "statuses": "~1.5.0" + }, + "dependencies": { + "ms": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", + "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==", + "dev": true + } + } + }, + "serialize-javascript": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.9.1.tgz", + "integrity": "sha512-0Vb/54WJ6k5v8sSWN09S0ora+Hnr+cX40r9F170nT+mSkaxltoE/7R3OrIdBSUv1OoiobH1QoWQbCnAO+e8J1A==", + "dev": true + }, + "serve-index": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/serve-index/-/serve-index-1.9.1.tgz", + "integrity": "sha1-03aNabHn2C5c4FD/9bRTvqEqkjk=", + "dev": true, + "requires": { + "accepts": "~1.3.4", + "batch": "0.6.1", + "debug": "2.6.9", + "escape-html": "~1.0.3", + "http-errors": "~1.6.2", + "mime-types": "~2.1.17", + "parseurl": "~1.3.2" + }, + "dependencies": { + "http-errors": { + "version": "1.6.3", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz", + "integrity": "sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=", + "dev": true, + "requires": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.0", + "statuses": ">= 1.4.0 < 2" + } + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=", + "dev": true + }, + "setprototypeof": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.0.tgz", + "integrity": "sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==", + "dev": true + } + } + }, + "serve-static": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", + "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", + "dev": true, + "requires": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.17.1" + } + }, + "set-blocking": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", + "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc=", + "dev": true + }, + "set-value": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/set-value/-/set-value-2.0.1.tgz", + "integrity": "sha512-JxHc1weCN68wRY0fhCoXpyK55m/XPHafOmK4UWD7m2CI14GMcFypt4w/0+NV5f/ZMby2F6S2wwA7fgynh9gWSw==", + "dev": true, + "requires": { + "extend-shallow": "^2.0.1", + "is-extendable": "^0.1.1", + "is-plain-object": "^2.0.3", + "split-string": "^3.0.1" + }, + "dependencies": { + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } + } + }, + "setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU=", + "dev": true + }, + "setprototypeof": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", + "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==", + "dev": true + }, + "sha.js": { + "version": "2.4.11", + "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", + "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", + "dev": true, + "requires": { + "inherits": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "shebang-command": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-1.2.0.tgz", + "integrity": "sha1-RKrGW2lbAzmJaMOfNj/uXer98eo=", + "dev": true, + "requires": { + "shebang-regex": "^1.0.0" + } + }, + "shebang-regex": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-1.0.0.tgz", + "integrity": "sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM=", + "dev": true + }, + "signal-exit": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz", + "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=", + "dev": true + }, + "snapdragon": { + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/snapdragon/-/snapdragon-0.8.2.tgz", + "integrity": "sha512-FtyOnWN/wCHTVXOMwvSv26d+ko5vWlIDD6zoUJ7LW8vh+ZBC8QdljveRP+crNrtBwioEUWy/4dMtbBjA4ioNlg==", + "dev": true, + "requires": { + "base": "^0.11.1", + "debug": "^2.2.0", + "define-property": "^0.2.5", + "extend-shallow": "^2.0.1", + "map-cache": "^0.2.2", + "source-map": "^0.5.6", + "source-map-resolve": "^0.5.0", + "use": "^3.1.0" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + }, + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } + } + }, + "snapdragon-node": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/snapdragon-node/-/snapdragon-node-2.1.1.tgz", + "integrity": "sha512-O27l4xaMYt/RSQ5TR3vpWCAB5Kb/czIcqUFOM/C4fYcLnbZUc1PkjTAMjof2pBWaSTwOUd6qUHcFGVGj7aIwnw==", + "dev": true, + "requires": { + "define-property": "^1.0.0", + "isobject": "^3.0.0", + "snapdragon-util": "^3.0.1" + }, + "dependencies": { + "define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", + "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", + "dev": true, + "requires": { + "is-descriptor": "^1.0.0" + } + }, + "is-accessor-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", + "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-data-descriptor": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", + "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", + "dev": true, + "requires": { + "kind-of": "^6.0.0" + } + }, + "is-descriptor": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", + "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", + "dev": true, + "requires": { + "is-accessor-descriptor": "^1.0.0", + "is-data-descriptor": "^1.0.0", + "kind-of": "^6.0.2" + } + } + } + }, + "snapdragon-util": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/snapdragon-util/-/snapdragon-util-3.0.1.tgz", + "integrity": "sha512-mbKkMdQKsjX4BAL4bRYTj21edOf8cN7XHdYUJEe+Zn99hVEYcMvKPct1IqNe7+AZPirn8BCDOQBHQZknqmKlZQ==", + "dev": true, + "requires": { + "kind-of": "^3.2.0" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "sockjs": { + "version": "0.3.19", + "resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.19.tgz", + "integrity": "sha512-V48klKZl8T6MzatbLlzzRNhMepEys9Y4oGFpypBFFn1gLI/QQ9HtLLyWJNbPlwGLelOVOEijUbTTJeLLI59jLw==", + "dev": true, + "requires": { + "faye-websocket": "^0.10.0", + "uuid": "^3.0.1" + } + }, + "sockjs-client": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/sockjs-client/-/sockjs-client-1.4.0.tgz", + "integrity": "sha512-5zaLyO8/nri5cua0VtOrFXBPK1jbL4+1cebT/mmKA1E1ZXOvJrII75bPu0l0k843G/+iAbhEqzyKr0w/eCCj7g==", + "dev": true, + "requires": { + "debug": "^3.2.5", + "eventsource": "^1.0.7", + "faye-websocket": "~0.11.1", + "inherits": "^2.0.3", + "json3": "^3.3.2", + "url-parse": "^1.4.3" + }, + "dependencies": { + "debug": { + "version": "3.2.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", + "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "faye-websocket": { + "version": "0.11.3", + "resolved": "https://registry.npmjs.org/faye-websocket/-/faye-websocket-0.11.3.tgz", + "integrity": "sha512-D2y4bovYpzziGgbHYtGCMjlJM36vAl/y+xUyn1C+FVx8szd1E+86KwVw6XvYSzOP8iMpm1X0I4xJD+QtUb36OA==", + "dev": true, + "requires": { + "websocket-driver": ">=0.5.1" + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + } + } + }, + "source-list-map": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/source-list-map/-/source-list-map-2.0.1.tgz", + "integrity": "sha512-qnQ7gVMxGNxsiL4lEuJwe/To8UnK7fAnmbGEEH8RpLouuKbeEm0lhbQVFIrNSuB+G7tVrAlVsZgETT5nljf+Iw==", + "dev": true + }, + "source-map": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", + "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", + "dev": true + }, + "source-map-resolve": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/source-map-resolve/-/source-map-resolve-0.5.2.tgz", + "integrity": "sha512-MjqsvNwyz1s0k81Goz/9vRBe9SZdB09Bdw+/zYyO+3CuPk6fouTaxscHkgtE8jKvf01kVfl8riHzERQ/kefaSA==", + "dev": true, + "requires": { + "atob": "^2.1.1", + "decode-uri-component": "^0.2.0", + "resolve-url": "^0.2.1", + "source-map-url": "^0.4.0", + "urix": "^0.1.0" + } + }, + "source-map-support": { + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", + "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "source-map-url": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/source-map-url/-/source-map-url-0.4.0.tgz", + "integrity": "sha1-PpNdfd1zYxuXZZlW1VEo6HtQhKM=", + "dev": true + }, + "spdy": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/spdy/-/spdy-4.0.1.tgz", + "integrity": "sha512-HeZS3PBdMA+sZSu0qwpCxl3DeALD5ASx8pAX0jZdKXSpPWbQ6SYGnlg3BBmYLx5LtiZrmkAZfErCm2oECBcioA==", + "dev": true, + "requires": { + "debug": "^4.1.0", + "handle-thing": "^2.0.0", + "http-deceiver": "^1.2.7", + "select-hose": "^2.0.0", + "spdy-transport": "^3.0.0" + }, + "dependencies": { + "debug": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", + "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + } + } + }, + "spdy-transport": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/spdy-transport/-/spdy-transport-3.0.0.tgz", + "integrity": "sha512-hsLVFE5SjA6TCisWeJXFKniGGOpBgMLmerfO2aCyCU5s7nJ/rpAepqmFifv/GCbSbueEeAJJnmSQ2rKC/g8Fcw==", + "dev": true, + "requires": { + "debug": "^4.1.0", + "detect-node": "^2.0.4", + "hpack.js": "^2.1.6", + "obuf": "^1.1.2", + "readable-stream": "^3.0.6", + "wbuf": "^1.7.3" + }, + "dependencies": { + "debug": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", + "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "readable-stream": { + "version": "3.4.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.4.0.tgz", + "integrity": "sha512-jItXPLmrSR8jmTRmRWJXCnGJsfy85mB3Wd/uINMXA65yrnFo0cPClFIUWzo2najVNSl+mx7/4W8ttlLWJe99pQ==", + "dev": true, + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + } + } + }, + "split-string": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/split-string/-/split-string-3.1.0.tgz", + "integrity": "sha512-NzNVhJDYpwceVVii8/Hu6DKfD2G+NrQHlS/V/qgv763EYudVwEcMQNxd2lh+0VrUByXN/oJkl5grOhYWvQUYiw==", + "dev": true, + "requires": { + "extend-shallow": "^3.0.0" + } + }, + "ssri": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/ssri/-/ssri-6.0.2.tgz", + "integrity": "sha512-cepbSq/neFK7xB6A50KHN0xHDotYzq58wWCa5LeWqnPrHG8GzfEjO/4O8kpmcGW+oaxkvhEJCWgbgNk4/ZV93Q==", + "dev": true, + "requires": { + "figgy-pudding": "^3.5.1" + } + }, + "static-extend": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz", + "integrity": "sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY=", + "dev": true, + "requires": { + "define-property": "^0.2.5", + "object-copy": "^0.1.0" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + } + } + }, + "statuses": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", + "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=", + "dev": true + }, + "stream-browserify": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.2.tgz", + "integrity": "sha512-nX6hmklHs/gr2FuxYDltq8fJA1GDlxKQCz8O/IM4atRqBH8OORmBNgfvW5gG10GT/qQ9u0CzIvr2X5Pkt6ntqg==", + "dev": true, + "requires": { + "inherits": "~2.0.1", + "readable-stream": "^2.0.2" + } + }, + "stream-each": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/stream-each/-/stream-each-1.2.3.tgz", + "integrity": "sha512-vlMC2f8I2u/bZGqkdfLQW/13Zihpej/7PmSiMQsbYddxuTsJp8vRe2x2FvVExZg7FaOds43ROAuFJwPR4MTZLw==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "stream-shift": "^1.0.0" + } + }, + "stream-http": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/stream-http/-/stream-http-2.8.3.tgz", + "integrity": "sha512-+TSkfINHDo4J+ZobQLWiMouQYB+UVYFttRA94FpEzzJ7ZdqcL4uUUQ7WkdkI4DSozGmgBUE/a47L+38PenXhUw==", + "dev": true, + "requires": { + "builtin-status-codes": "^3.0.0", + "inherits": "^2.0.1", + "readable-stream": "^2.3.6", + "to-arraybuffer": "^1.0.0", + "xtend": "^4.0.0" + } + }, + "stream-shift": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.0.tgz", + "integrity": "sha1-1cdSgl5TZ+eG944Y5EXqIjoVWVI=", + "dev": true + }, + "string-width": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-3.1.0.tgz", + "integrity": "sha512-vafcv6KjVZKSgz06oM/H6GDBrAtz8vdhQakGjFIvNrHA6y3HCF1CInLy+QLq8dTJPQ1b+KDUqDFctkdRW44e1w==", + "dev": true, + "requires": { + "emoji-regex": "^7.0.1", + "is-fullwidth-code-point": "^2.0.0", + "strip-ansi": "^5.1.0" + }, + "dependencies": { + "ansi-regex": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.0.tgz", + "integrity": "sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==", + "dev": true + }, + "strip-ansi": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.2.0.tgz", + "integrity": "sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==", + "dev": true, + "requires": { + "ansi-regex": "^4.1.0" + } + } + } + }, + "string.prototype.trimleft": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/string.prototype.trimleft/-/string.prototype.trimleft-2.1.0.tgz", + "integrity": "sha512-FJ6b7EgdKxxbDxc79cOlok6Afd++TTs5szo+zJTUyow3ycrRfJVE2pq3vcN53XexvKZu/DJMDfeI/qMiZTrjTw==", + "dev": true, + "requires": { + "define-properties": "^1.1.3", + "function-bind": "^1.1.1" + } + }, + "string.prototype.trimright": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/string.prototype.trimright/-/string.prototype.trimright-2.1.0.tgz", + "integrity": "sha512-fXZTSV55dNBwv16uw+hh5jkghxSnc5oHq+5K/gXgizHwAvMetdAJlHqqoFC1FSDVPYWLkAKl2cxpUT41sV7nSg==", + "dev": true, + "requires": { + "define-properties": "^1.1.3", + "function-bind": "^1.1.1" + } + }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dev": true, + "requires": { + "safe-buffer": "~5.1.0" + } + }, + "strip-ansi": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", + "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=", + "dev": true, + "requires": { + "ansi-regex": "^2.0.0" + } + }, + "strip-eof": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", + "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", + "dev": true + }, + "supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + }, + "tapable": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-1.1.3.tgz", + "integrity": "sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA==", + "dev": true + }, + "terser": { + "version": "4.3.9", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.3.9.tgz", + "integrity": "sha512-NFGMpHjlzmyOtPL+fDw3G7+6Ueh/sz4mkaUYa4lJCxOPTNzd0Uj0aZJOmsDYoSQyfuVoWDMSWTPU3huyOm2zdA==", + "dev": true, + "requires": { + "commander": "^2.20.0", + "source-map": "~0.6.1", + "source-map-support": "~0.5.12" + }, + "dependencies": { + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "dev": true + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "terser-webpack-plugin": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.4.1.tgz", + "integrity": "sha512-ZXmmfiwtCLfz8WKZyYUuuHf3dMYEjg8NrjHMb0JqHVHVOSkzp3cW2/XG1fP3tRhqEqSzMwzzRQGtAPbs4Cncxg==", + "dev": true, + "requires": { + "cacache": "^12.0.2", + "find-cache-dir": "^2.1.0", + "is-wsl": "^1.1.0", + "schema-utils": "^1.0.0", + "serialize-javascript": "^1.7.0", + "source-map": "^0.6.1", + "terser": "^4.1.2", + "webpack-sources": "^1.4.0", + "worker-farm": "^1.7.0" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "text-encoding": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/text-encoding/-/text-encoding-0.7.0.tgz", + "integrity": "sha512-oJQ3f1hrOnbRLOcwKz0Liq2IcrvDeZRHXhd9RgLrsT+DjWY/nty1Hi7v3dtkaEYbPYe0mUoOfzRrMwfXXwgPUA==", + "dev": true + }, + "through2": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", + "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==", + "dev": true, + "requires": { + "readable-stream": "~2.3.6", + "xtend": "~4.0.1" + } + }, + "thunky": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/thunky/-/thunky-1.1.0.tgz", + "integrity": "sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA==", + "dev": true + }, + "timers-browserify": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/timers-browserify/-/timers-browserify-2.0.11.tgz", + "integrity": "sha512-60aV6sgJ5YEbzUdn9c8kYGIqOubPoUdqQCul3SBAsRCZ40s6Y5cMcrW4dt3/k/EsbLVJNl9n6Vz3fTc+k2GeKQ==", + "dev": true, + "requires": { + "setimmediate": "^1.0.4" + } + }, + "to-arraybuffer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/to-arraybuffer/-/to-arraybuffer-1.0.1.tgz", + "integrity": "sha1-fSKbH8xjfkZsoIEYCDanqr/4P0M=", + "dev": true + }, + "to-object-path": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/to-object-path/-/to-object-path-0.3.0.tgz", + "integrity": "sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68=", + "dev": true, + "requires": { + "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } + } + }, + "to-regex": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/to-regex/-/to-regex-3.0.2.tgz", + "integrity": "sha512-FWtleNAtZ/Ki2qtqej2CXTOayOH9bHDQF+Q48VpWyDXjbYxA4Yz8iDB31zXOBUlOHHKidDbqGVrTUvQMPmBGBw==", + "dev": true, + "requires": { + "define-property": "^2.0.2", + "extend-shallow": "^3.0.2", + "regex-not": "^1.0.2", + "safe-regex": "^1.1.0" + } + }, + "to-regex-range": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-2.1.1.tgz", + "integrity": "sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg=", + "dev": true, + "requires": { + "is-number": "^3.0.0", + "repeat-string": "^1.6.1" + } + }, + "toidentifier": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", + "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==", + "dev": true + }, + "toposort": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/toposort/-/toposort-1.0.7.tgz", + "integrity": "sha1-LmhELZ9k7HILjMieZEOsbKqVACk=", + "dev": true + }, + "tslib": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.10.0.tgz", + "integrity": "sha512-qOebF53frne81cf0S9B41ByenJ3/IuH8yJKngAX35CmiZySA0khhkovshKK+jGCaMnVomla7gVlIcc3EvKPbTQ==", + "dev": true + }, + "tty-browserify": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.0.tgz", + "integrity": "sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=", + "dev": true + }, + "type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "dev": true, + "requires": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + } + }, + "typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=", + "dev": true + }, + "uglify-js": { + "version": "3.4.10", + "resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.4.10.tgz", + "integrity": "sha512-Y2VsbPVs0FIshJztycsO2SfPk7/KAF/T72qzv9u5EpQ4kB2hQoHlhNQTsNyy6ul7lQtqJN/AoWeS23OzEiEFxw==", + "dev": true, + "requires": { + "commander": "~2.19.0", + "source-map": "~0.6.1" + }, + "dependencies": { + "commander": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.19.0.tgz", + "integrity": "sha512-6tvAOO+D6OENvRAh524Dh9jcfKTYDQAqvqezbCW82xj5X0pSrcpxtvRKHLG0yBY6SD7PSDrJaj+0AiOcKVd1Xg==", + "dev": true + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "union-value": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/union-value/-/union-value-1.0.1.tgz", + "integrity": "sha512-tJfXmxMeWYnczCVs7XAEvIV7ieppALdyepWMkHkwciRpZraG/xwT+s2JN8+pr1+8jCRf80FFzvr+MpQeeoF4Xg==", + "dev": true, + "requires": { + "arr-union": "^3.1.0", + "get-value": "^2.0.6", + "is-extendable": "^0.1.1", + "set-value": "^2.0.1" + } + }, + "unique-filename": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/unique-filename/-/unique-filename-1.1.1.tgz", + "integrity": "sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ==", + "dev": true, + "requires": { + "unique-slug": "^2.0.0" + } + }, + "unique-slug": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/unique-slug/-/unique-slug-2.0.2.tgz", + "integrity": "sha512-zoWr9ObaxALD3DOPfjPSqxt4fnZiWblxHIgeWqW8x7UqDzEtHEQLzji2cuJYQFCU6KmoJikOYAZlrTHHebjx2w==", + "dev": true, + "requires": { + "imurmurhash": "^0.1.4" + } + }, + "unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=", + "dev": true + }, + "unset-value": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unset-value/-/unset-value-1.0.0.tgz", + "integrity": "sha1-g3aHP30jNRef+x5vw6jtDfyKtVk=", + "dev": true, + "requires": { + "has-value": "^0.3.1", + "isobject": "^3.0.0" + }, + "dependencies": { + "has-value": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/has-value/-/has-value-0.3.1.tgz", + "integrity": "sha1-ex9YutpiyoJ+wKIHgCVlSEWZXh8=", + "dev": true, + "requires": { + "get-value": "^2.0.3", + "has-values": "^0.1.4", + "isobject": "^2.0.0" + }, + "dependencies": { + "isobject": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-2.1.0.tgz", + "integrity": "sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=", + "dev": true, + "requires": { + "isarray": "1.0.0" + } + } + } + }, + "has-values": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/has-values/-/has-values-0.1.4.tgz", + "integrity": "sha1-bWHeldkd/Km5oCCJrThL/49it3E=", + "dev": true + } + } + }, + "upath": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/upath/-/upath-1.2.0.tgz", + "integrity": "sha512-aZwGpamFO61g3OlfT7OQCHqhGnW43ieH9WZeP7QxN/G/jS4jfqUkZxoryvJgVPEcrl5NL/ggHsSmLMHuH64Lhg==", + "dev": true + }, + "upper-case": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/upper-case/-/upper-case-1.1.3.tgz", + "integrity": "sha1-9rRQHC7EzdJrp4vnIilh3ndiFZg=", + "dev": true + }, + "uri-js": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", + "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "urix": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/urix/-/urix-0.1.0.tgz", + "integrity": "sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=", + "dev": true + }, + "url": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/url/-/url-0.11.0.tgz", + "integrity": "sha1-ODjpfPxgUh63PFJajlW/3Z4uKPE=", + "dev": true, + "requires": { + "punycode": "1.3.2", + "querystring": "0.2.0" + }, + "dependencies": { + "punycode": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.3.2.tgz", + "integrity": "sha1-llOgNvt8HuQjQvIyXM7v6jkmxI0=", + "dev": true + } + } + }, + "url-parse": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz", + "integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==", + "dev": true, + "requires": { + "querystringify": "^2.1.1", + "requires-port": "^1.0.0" + } + }, + "use": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/use/-/use-3.1.1.tgz", + "integrity": "sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==", + "dev": true + }, + "util": { + "version": "0.11.1", + "resolved": "https://registry.npmjs.org/util/-/util-0.11.1.tgz", + "integrity": "sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==", + "dev": true, + "requires": { + "inherits": "2.0.3" + }, + "dependencies": { + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=", + "dev": true + } + } + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", + "dev": true + }, + "util.promisify": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/util.promisify/-/util.promisify-1.0.0.tgz", + "integrity": "sha512-i+6qA2MPhvoKLuxnJNpXAGhg7HphQOSUq2LKMZD0m15EiskXUkMvKdF4Uui0WYeCUGea+o2cw/ZuwehtfsrNkA==", + "dev": true, + "requires": { + "define-properties": "^1.1.2", + "object.getownpropertydescriptors": "^2.0.3" + } + }, + "utila": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/utila/-/utila-0.4.0.tgz", + "integrity": "sha1-ihagXURWV6Oupe7MWxKk+lN5dyw=", + "dev": true + }, + "utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=", + "dev": true + }, + "uuid": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.3.tgz", + "integrity": "sha512-pW0No1RGHgzlpHJO1nsVrHKpOEIxkGg1xB+v0ZmdNH5OAeAwzAVrCnI2/6Mtx+Uys6iaylxa+D3g4j63IKKjSQ==", + "dev": true + }, + "v8-compile-cache": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.0.3.tgz", + "integrity": "sha512-CNmdbwQMBjwr9Gsmohvm0pbL954tJrNzf6gWL3K+QMQf00PF7ERGrEiLgjuU3mKreLC2MeGhUsNV9ybTbLgd3w==", + "dev": true + }, + "vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=", + "dev": true + }, + "vm-browserify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-1.1.0.tgz", + "integrity": "sha512-iq+S7vZJE60yejDYM0ek6zg308+UZsdtPExWP9VZoCFCz1zkJoXFnAX7aZfd/ZwrkidzdUZL0C/ryW+JwAiIGw==", + "dev": true + }, + "watchpack": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.6.0.tgz", + "integrity": "sha512-i6dHe3EyLjMmDlU1/bGQpEw25XSjkJULPuAVKCbNRefQVq48yXKUpwg538F7AZTf9kyr57zj++pQFltUa5H7yA==", + "dev": true, + "requires": { + "chokidar": "^2.0.2", + "graceful-fs": "^4.1.2", + "neo-async": "^2.5.0" + } + }, + "wbuf": { + "version": "1.7.3", + "resolved": "https://registry.npmjs.org/wbuf/-/wbuf-1.7.3.tgz", + "integrity": "sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA==", + "dev": true, + "requires": { + "minimalistic-assert": "^1.0.0" + } + }, + "webpack": { + "version": "4.41.2", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.41.2.tgz", + "integrity": "sha512-Zhw69edTGfbz9/8JJoyRQ/pq8FYUoY0diOXqW0T6yhgdhCv6wr0hra5DwwWexNRns2Z2+gsnrNcbe9hbGBgk/A==", + "dev": true, + "requires": { + "@webassemblyjs/ast": "1.8.5", + "@webassemblyjs/helper-module-context": "1.8.5", + "@webassemblyjs/wasm-edit": "1.8.5", + "@webassemblyjs/wasm-parser": "1.8.5", + "acorn": "^6.2.1", + "ajv": "^6.10.2", + "ajv-keywords": "^3.4.1", + "chrome-trace-event": "^1.0.2", + "enhanced-resolve": "^4.1.0", + "eslint-scope": "^4.0.3", + "json-parse-better-errors": "^1.0.2", + "loader-runner": "^2.4.0", + "loader-utils": "^1.2.3", + "memory-fs": "^0.4.1", + "micromatch": "^3.1.10", + "mkdirp": "^0.5.1", + "neo-async": "^2.6.1", + "node-libs-browser": "^2.2.1", + "schema-utils": "^1.0.0", + "tapable": "^1.1.3", + "terser-webpack-plugin": "^1.4.1", + "watchpack": "^1.6.0", + "webpack-sources": "^1.4.1" + }, + "dependencies": { + "big.js": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz", + "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==", + "dev": true + }, + "json5": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "dev": true, + "requires": { + "minimist": "^1.2.0" + } + }, + "loader-utils": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz", + "integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^2.0.0", + "json5": "^1.0.1" + } + } + } + }, + "webpack-cli": { + "version": "3.3.9", + "resolved": "https://registry.npmjs.org/webpack-cli/-/webpack-cli-3.3.9.tgz", + "integrity": "sha512-xwnSxWl8nZtBl/AFJCOn9pG7s5CYUYdZxmmukv+fAHLcBIHM36dImfpQg3WfShZXeArkWlf6QRw24Klcsv8a5A==", + "dev": true, + "requires": { + "chalk": "2.4.2", + "cross-spawn": "6.0.5", + "enhanced-resolve": "4.1.0", + "findup-sync": "3.0.0", + "global-modules": "2.0.0", + "import-local": "2.0.0", + "interpret": "1.2.0", + "loader-utils": "1.2.3", + "supports-color": "6.1.0", + "v8-compile-cache": "2.0.3", + "yargs": "13.2.4" + }, + "dependencies": { + "big.js": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz", + "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==", + "dev": true + }, + "enhanced-resolve": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-4.1.0.tgz", + "integrity": "sha512-F/7vkyTtyc/llOIn8oWclcB25KdRaiPBpZYDgJHgh/UHtpgT2p2eldQgtQnLtUvfMKPKxbRaQM/hHkvLHt1Vng==", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "memory-fs": "^0.4.0", + "tapable": "^1.0.0" + } + }, + "json5": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "dev": true, + "requires": { + "minimist": "^1.2.0" + } + }, + "loader-utils": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz", + "integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==", + "dev": true, + "requires": { + "big.js": "^5.2.2", + "emojis-list": "^2.0.0", + "json5": "^1.0.1" + } + }, + "supports-color": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-6.1.0.tgz", + "integrity": "sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + } + } + }, + "webpack-dev-middleware": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-3.7.2.tgz", + "integrity": "sha512-1xC42LxbYoqLNAhV6YzTYacicgMZQTqRd27Sim9wn5hJrX3I5nxYy1SxSd4+gjUFsz1dQFj+yEe6zEVmSkeJjw==", + "dev": true, + "requires": { + "memory-fs": "^0.4.1", + "mime": "^2.4.4", + "mkdirp": "^0.5.1", + "range-parser": "^1.2.1", + "webpack-log": "^2.0.0" + }, + "dependencies": { + "mime": { + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/mime/-/mime-2.4.4.tgz", + "integrity": "sha512-LRxmNwziLPT828z+4YkNzloCFC2YM4wrB99k+AV5ZbEyfGNWfG8SO1FUXLmLDBSo89NrJZ4DIWeLjy1CHGhMGA==", + "dev": true + } + } + }, + "webpack-dev-server": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-3.9.0.tgz", + "integrity": "sha512-E6uQ4kRrTX9URN9s/lIbqTAztwEPdvzVrcmHE8EQ9YnuT9J8Es5Wrd8n9BKg1a0oZ5EgEke/EQFgUsp18dSTBw==", + "dev": true, + "requires": { + "ansi-html": "0.0.7", + "bonjour": "^3.5.0", + "chokidar": "^2.1.8", + "compression": "^1.7.4", + "connect-history-api-fallback": "^1.6.0", + "debug": "^4.1.1", + "del": "^4.1.1", + "express": "^4.17.1", + "html-entities": "^1.2.1", + "http-proxy-middleware": "0.19.1", + "import-local": "^2.0.0", + "internal-ip": "^4.3.0", + "ip": "^1.1.5", + "is-absolute-url": "^3.0.3", + "killable": "^1.0.1", + "loglevel": "^1.6.4", + "opn": "^5.5.0", + "p-retry": "^3.0.1", + "portfinder": "^1.0.25", + "schema-utils": "^1.0.0", + "selfsigned": "^1.10.7", + "semver": "^6.3.0", + "serve-index": "^1.9.1", + "sockjs": "0.3.19", + "sockjs-client": "1.4.0", + "spdy": "^4.0.1", + "strip-ansi": "^3.0.1", + "supports-color": "^6.1.0", + "url": "^0.11.0", + "webpack-dev-middleware": "^3.7.2", + "webpack-log": "^2.0.0", + "ws": "^6.2.1", + "yargs": "12.0.5" + }, + "dependencies": { + "ansi-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", + "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", + "dev": true + }, + "cliui": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", + "integrity": "sha512-4FG+RSG9DL7uEwRUZXZn3SS34DiDPfzP0VOiEwtUWlE+AR2EIg+hSyvrIgUUfhdgR/UkAeW2QHgeP+hWrXs7jQ==", + "dev": true, + "requires": { + "string-width": "^2.1.1", + "strip-ansi": "^4.0.0", + "wrap-ansi": "^2.0.0" + }, + "dependencies": { + "strip-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz", + "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=", + "dev": true, + "requires": { + "ansi-regex": "^3.0.0" + } + } + } + }, + "debug": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", + "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + }, + "get-caller-file": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-1.0.3.tgz", + "integrity": "sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==", + "dev": true + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "require-main-filename": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-1.0.1.tgz", + "integrity": "sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE=", + "dev": true + }, + "semver": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", + "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "dev": true + }, + "string-width": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", + "integrity": "sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==", + "dev": true, + "requires": { + "is-fullwidth-code-point": "^2.0.0", + "strip-ansi": "^4.0.0" + }, + "dependencies": { + "strip-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz", + "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=", + "dev": true, + "requires": { + "ansi-regex": "^3.0.0" + } + } + } + }, + "supports-color": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-6.1.0.tgz", + "integrity": "sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + }, + "wrap-ansi": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", + "integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=", + "dev": true, + "requires": { + "string-width": "^1.0.1", + "strip-ansi": "^3.0.1" + }, + "dependencies": { + "is-fullwidth-code-point": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz", + "integrity": "sha1-754xOG8DGn8NZDr4L95QxFfvAMs=", + "dev": true, + "requires": { + "number-is-nan": "^1.0.0" + } + }, + "string-width": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", + "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=", + "dev": true, + "requires": { + "code-point-at": "^1.0.0", + "is-fullwidth-code-point": "^1.0.0", + "strip-ansi": "^3.0.0" + } + } + } + }, + "yargs": { + "version": "12.0.5", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.5.tgz", + "integrity": "sha512-Lhz8TLaYnxq/2ObqHDql8dX8CJi97oHxrjUcYtzKbbykPtVW9WB+poxI+NM2UIzsMgNCZTIf0AQwsjK5yMAqZw==", + "dev": true, + "requires": { + "cliui": "^4.0.0", + "decamelize": "^1.2.0", + "find-up": "^3.0.0", + "get-caller-file": "^1.0.1", + "os-locale": "^3.0.0", + "require-directory": "^2.1.1", + "require-main-filename": "^1.0.1", + "set-blocking": "^2.0.0", + "string-width": "^2.0.0", + "which-module": "^2.0.0", + "y18n": "^3.2.1 || ^4.0.0", + "yargs-parser": "^11.1.1" + } + }, + "yargs-parser": { + "version": "11.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-11.1.1.tgz", + "integrity": "sha512-C6kB/WJDiaxONLJQnF8ccx9SEeoTTLek8RVbaOIsrAUS8VrBEXfmeSnCZxygc+XC2sNMBIwOOnfcxiynjHsVSQ==", + "dev": true, + "requires": { + "camelcase": "^5.0.0", + "decamelize": "^1.2.0" + } + } + } + }, + "webpack-log": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/webpack-log/-/webpack-log-2.0.0.tgz", + "integrity": "sha512-cX8G2vR/85UYG59FgkoMamwHUIkSSlV3bBMRsbxVXVUk2j6NleCKjQ/WE9eYg9WY4w25O9w8wKP4rzNZFmUcUg==", + "dev": true, + "requires": { + "ansi-colors": "^3.0.0", + "uuid": "^3.3.2" + } + }, + "webpack-sources": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-1.4.3.tgz", + "integrity": "sha512-lgTS3Xhv1lCOKo7SA5TjKXMjpSM4sBjNV5+q2bqesbSPs5FjGmU6jjtBSkX9b4qW87vDIsCIlUPOEhbZrMdjeQ==", + "dev": true, + "requires": { + "source-list-map": "^2.0.0", + "source-map": "~0.6.1" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "websocket-driver": { + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/websocket-driver/-/websocket-driver-0.7.3.tgz", + "integrity": "sha512-bpxWlvbbB459Mlipc5GBzzZwhoZgGEZLuqPaR0INBGnPAY1vdBX6hPnoFXiw+3yWxDuHyQjO2oXTMyS8A5haFg==", + "dev": true, + "requires": { + "http-parser-js": ">=0.4.0 <0.4.11", + "safe-buffer": ">=5.1.0", + "websocket-extensions": ">=0.1.1" + } + }, + "websocket-extensions": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/websocket-extensions/-/websocket-extensions-0.1.4.tgz", + "integrity": "sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg==", + "dev": true + }, + "which": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", + "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==", + "dev": true, + "requires": { + "isexe": "^2.0.0" + } + }, + "which-module": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", + "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", + "dev": true + }, + "worker-farm": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/worker-farm/-/worker-farm-1.7.0.tgz", + "integrity": "sha512-rvw3QTZc8lAxyVrqcSGVm5yP/IJ2UcB3U0graE3LCFoZ0Yn2x4EoVSqJKdB/T5M+FLcRPjz4TDacRf3OCfNUzw==", + "dev": true, + "requires": { + "errno": "~0.1.7" + } + }, + "wrap-ansi": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-5.1.0.tgz", + "integrity": "sha512-QC1/iN/2/RPVJ5jYK8BGttj5z83LmSKmvbvrXPNCLZSEb32KKVDJDl/MOt2N01qU2H/FkzEa9PKto1BqDjtd7Q==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.0", + "string-width": "^3.0.0", + "strip-ansi": "^5.0.0" + }, + "dependencies": { + "ansi-regex": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-4.1.0.tgz", + "integrity": "sha512-1apePfXM1UOSqw0o9IiFAovVz9M5S1Dg+4TrDwfMewQ6p/rmMueb7tWZjQ1rx4Loy1ArBggoqGpfqqdI4rondg==", + "dev": true + }, + "strip-ansi": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.2.0.tgz", + "integrity": "sha512-DuRs1gKbBqsMKIZlrffwlug8MHkcnpjs5VPmL1PAh+mA30U0DTotfDZ0d2UUsXpPmPmMMJ6W773MaA3J+lbiWA==", + "dev": true, + "requires": { + "ansi-regex": "^4.1.0" + } + } + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", + "dev": true + }, + "ws": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-6.2.1.tgz", + "integrity": "sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA==", + "dev": true, + "requires": { + "async-limiter": "~1.0.0" + } + }, + "xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "dev": true + }, + "y18n": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.1.tgz", + "integrity": "sha512-wNcy4NvjMYL8gogWWYAO7ZFWFfHcbdbE57tZO8e4cbpj8tfUcwrwqSl3ad8HxpYWCdXcJUCeKKZS62Av1affwQ==", + "dev": true + }, + "yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true + }, + "yargs": { + "version": "13.2.4", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-13.2.4.tgz", + "integrity": "sha512-HG/DWAJa1PAnHT9JAhNa8AbAv3FPaiLzioSjCcmuXXhP8MlpHO5vwls4g4j6n30Z74GVQj8Xa62dWVx1QCGklg==", + "dev": true, + "requires": { + "cliui": "^5.0.0", + "find-up": "^3.0.0", + "get-caller-file": "^2.0.1", + "os-locale": "^3.1.0", + "require-directory": "^2.1.1", + "require-main-filename": "^2.0.0", + "set-blocking": "^2.0.0", + "string-width": "^3.0.0", + "which-module": "^2.0.0", + "y18n": "^4.0.0", + "yargs-parser": "^13.1.0" + } + }, + "yargs-parser": { + "version": "13.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-13.1.1.tgz", + "integrity": "sha512-oVAVsHz6uFrg3XQheFII8ESO2ssAf9luWuAd6Wexsu4F3OtIW0o8IribPXYrD4WC24LWtPrJlGy87y5udK+dxQ==", + "dev": true, + "requires": { + "camelcase": "^5.0.0", + "decamelize": "^1.2.0" + } + } + } +} diff --git a/rust/reqwest/examples/wasm_github_fetch/package.json b/rust/reqwest/examples/wasm_github_fetch/package.json new file mode 100644 index 0000000000..2a84b1f75b --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/package.json @@ -0,0 +1,14 @@ +{ + "scripts": { + "build": "webpack", + "serve": "webpack-dev-server" + }, + "devDependencies": { + "@wasm-tool/wasm-pack-plugin": "1.0.1", + "text-encoding": "^0.7.0", + "html-webpack-plugin": "^3.2.0", + "webpack": "^4.29.4", + "webpack-cli": "^3.1.1", + "webpack-dev-server": "^3.1.0" + } +} diff --git a/rust/reqwest/examples/wasm_github_fetch/src/lib.rs b/rust/reqwest/examples/wasm_github_fetch/src/lib.rs new file mode 100644 index 0000000000..b487344ed2 --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/src/lib.rs @@ -0,0 +1,47 @@ +use serde::{Deserialize, Serialize}; +use wasm_bindgen::prelude::*; + +// NOTE: This test is a clone of https://github.com/rustwasm/wasm-bindgen/blob/master/examples/fetch/src/lib.rs +// but uses Reqwest instead of the web_sys fetch api directly + +/// A struct to hold some data from the GitHub Branch API. +/// +/// Note how we don't have to define every member -- serde will ignore extra +/// data when deserializing +#[derive(Debug, Serialize, Deserialize)] +pub struct Branch { + pub name: String, + pub commit: Commit, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Commit { + pub sha: String, + pub commit: CommitDetails, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct CommitDetails { + pub author: Signature, + pub committer: Signature, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Signature { + pub name: String, + pub email: String, +} + +#[wasm_bindgen] +pub async fn run() -> Result { + let res = reqwest::Client::new() + .get("https://api.github.com/repos/rustwasm/wasm-bindgen/branches/master") + .header("Accept", "application/vnd.github.v3+json") + .send() + .await?; + + let text = res.text().await?; + let branch_info: Branch = serde_json::from_str(&text).unwrap(); + + Ok(JsValue::from_serde(&branch_info).unwrap()) +} diff --git a/rust/reqwest/examples/wasm_github_fetch/webpack.config.js b/rust/reqwest/examples/wasm_github_fetch/webpack.config.js new file mode 100644 index 0000000000..a6c7ba7028 --- /dev/null +++ b/rust/reqwest/examples/wasm_github_fetch/webpack.config.js @@ -0,0 +1,25 @@ +const path = require('path'); +const HtmlWebpackPlugin = require('html-webpack-plugin'); +const webpack = require('webpack'); +const WasmPackPlugin = require("@wasm-tool/wasm-pack-plugin"); + +module.exports = { + entry: './index.js', + output: { + path: path.resolve(__dirname, 'dist'), + filename: 'index.js', + }, + plugins: [ + new HtmlWebpackPlugin(), + new WasmPackPlugin({ + crateDirectory: path.resolve(__dirname, ".") + }), + // Have this example work in Edge which doesn't ship `TextEncoder` or + // `TextDecoder` at this time. + new webpack.ProvidePlugin({ + TextDecoder: ['text-encoding', 'TextDecoder'], + TextEncoder: ['text-encoding', 'TextEncoder'] + }) + ], + mode: 'development' +}; \ No newline at end of file diff --git a/rust/reqwest/src/async_impl/body.rs b/rust/reqwest/src/async_impl/body.rs new file mode 100644 index 0000000000..2a7d8f8216 --- /dev/null +++ b/rust/reqwest/src/async_impl/body.rs @@ -0,0 +1,520 @@ +use std::fmt; +use std::future::Future; +use std::pin::Pin; +use std::task::{ready, Context, Poll}; +use std::time::Duration; + +use bytes::Bytes; +use http_body::Body as HttpBody; +use http_body_util::combinators::BoxBody; +use pin_project_lite::pin_project; +#[cfg(feature = "stream")] +use tokio::fs::File; +use tokio::time::Sleep; +#[cfg(feature = "stream")] +use tokio_util::io::ReaderStream; + +/// An asynchronous request body. +pub struct Body { + inner: Inner, +} + +enum Inner { + Reusable(Bytes), + Streaming(BoxBody>), +} + +pin_project! { + /// A body with a total timeout. + /// + /// The timeout does not reset upon each chunk, but rather requires the whole + /// body be streamed before the deadline is reached. + pub(crate) struct TotalTimeoutBody { + #[pin] + inner: B, + timeout: Pin>, + } +} + +pin_project! { + pub(crate) struct ReadTimeoutBody { + #[pin] + inner: B, + #[pin] + sleep: Option, + timeout: Duration, + } +} + +/// Converts any `impl Body` into a `impl Stream` of just its DATA frames. +#[cfg(any(feature = "stream", feature = "multipart",))] +pub(crate) struct DataStream(pub(crate) B); + +impl Body { + /// Returns a reference to the internal data of the `Body`. + /// + /// `None` is returned, if the underlying data is a stream. + pub fn as_bytes(&self) -> Option<&[u8]> { + match &self.inner { + Inner::Reusable(bytes) => Some(bytes.as_ref()), + Inner::Streaming(..) => None, + } + } + + /// Wrap a futures `Stream` in a box inside `Body`. + /// + /// # Example + /// + /// ``` + /// # use reqwest::Body; + /// # use futures_util; + /// # fn main() { + /// let chunks: Vec> = vec![ + /// Ok("hello"), + /// Ok(" "), + /// Ok("world"), + /// ]; + /// + /// let stream = futures_util::stream::iter(chunks); + /// + /// let body = Body::wrap_stream(stream); + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `stream` feature to be enabled. + #[cfg(feature = "stream")] + #[cfg_attr(docsrs, doc(cfg(feature = "stream")))] + pub fn wrap_stream(stream: S) -> Body + where + S: futures_core::stream::TryStream + Send + 'static, + S::Error: Into>, + Bytes: From, + { + Body::stream(stream) + } + + #[cfg(any(feature = "stream", feature = "multipart", feature = "blocking"))] + pub(crate) fn stream(stream: S) -> Body + where + S: futures_core::stream::TryStream + Send + 'static, + S::Error: Into>, + Bytes: From, + { + use futures_util::TryStreamExt; + use http_body::Frame; + use http_body_util::StreamBody; + + let body = http_body_util::BodyExt::boxed(StreamBody::new(sync_wrapper::SyncStream::new( + stream + .map_ok(|d| Frame::data(Bytes::from(d))) + .map_err(Into::into), + ))); + Body { + inner: Inner::Streaming(body), + } + } + + pub(crate) fn empty() -> Body { + Body::reusable(Bytes::new()) + } + + pub(crate) fn reusable(chunk: Bytes) -> Body { + Body { + inner: Inner::Reusable(chunk), + } + } + + /// Wrap a [`HttpBody`] in a box inside `Body`. + /// + /// # Example + /// + /// ``` + /// # use reqwest::Body; + /// # use futures_util; + /// # fn main() { + /// let content = "hello,world!".to_string(); + /// + /// let body = Body::wrap(content); + /// # } + /// ``` + pub fn wrap(inner: B) -> Body + where + B: HttpBody + Send + Sync + 'static, + B::Data: Into, + B::Error: Into>, + { + use http_body_util::BodyExt; + + let boxed = IntoBytesBody { inner }.map_err(Into::into).boxed(); + + Body { + inner: Inner::Streaming(boxed), + } + } + + pub(crate) fn try_clone(&self) -> Option { + match self.inner { + Inner::Reusable(ref chunk) => Some(Body::reusable(chunk.clone())), + Inner::Streaming { .. } => None, + } + } + + #[cfg(feature = "multipart")] + pub(crate) fn into_stream(self) -> DataStream { + DataStream(self) + } + + #[cfg(feature = "multipart")] + pub(crate) fn content_length(&self) -> Option { + match self.inner { + Inner::Reusable(ref bytes) => Some(bytes.len() as u64), + Inner::Streaming(ref body) => body.size_hint().exact(), + } + } +} + +impl Default for Body { + #[inline] + fn default() -> Body { + Body::empty() + } +} + +/* +impl From for Body { + #[inline] + fn from(body: hyper::Body) -> Body { + Self { + inner: Inner::Streaming { + body: Box::pin(WrapHyper(body)), + }, + } + } +} +*/ + +impl From for Body { + #[inline] + fn from(bytes: Bytes) -> Body { + Body::reusable(bytes) + } +} + +impl From> for Body { + #[inline] + fn from(vec: Vec) -> Body { + Body::reusable(vec.into()) + } +} + +impl From<&'static [u8]> for Body { + #[inline] + fn from(s: &'static [u8]) -> Body { + Body::reusable(Bytes::from_static(s)) + } +} + +impl From for Body { + #[inline] + fn from(s: String) -> Body { + Body::reusable(s.into()) + } +} + +impl From<&'static str> for Body { + #[inline] + fn from(s: &'static str) -> Body { + s.as_bytes().into() + } +} + +#[cfg(feature = "stream")] +#[cfg_attr(docsrs, doc(cfg(feature = "stream")))] +impl From for Body { + #[inline] + fn from(file: File) -> Body { + Body::wrap_stream(ReaderStream::new(file)) + } +} + +impl fmt::Debug for Body { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Body").finish() + } +} + +impl HttpBody for Body { + type Data = Bytes; + type Error = crate::Error; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + match self.inner { + Inner::Reusable(ref mut bytes) => { + let out = bytes.split_off(0); + if out.is_empty() { + Poll::Ready(None) + } else { + Poll::Ready(Some(Ok(hyper::body::Frame::data(out)))) + } + } + Inner::Streaming(ref mut body) => Poll::Ready( + ready!(Pin::new(body).poll_frame(cx)) + .map(|opt_chunk| opt_chunk.map_err(crate::error::body)), + ), + } + } + + fn size_hint(&self) -> http_body::SizeHint { + match self.inner { + Inner::Reusable(ref bytes) => http_body::SizeHint::with_exact(bytes.len() as u64), + Inner::Streaming(ref body) => body.size_hint(), + } + } + + fn is_end_stream(&self) -> bool { + match self.inner { + Inner::Reusable(ref bytes) => bytes.is_empty(), + Inner::Streaming(ref body) => body.is_end_stream(), + } + } +} + +// ===== impl TotalTimeoutBody ===== + +pub(crate) fn total_timeout(body: B, timeout: Pin>) -> TotalTimeoutBody { + TotalTimeoutBody { + inner: body, + timeout, + } +} + +pub(crate) fn with_read_timeout(body: B, timeout: Duration) -> ReadTimeoutBody { + ReadTimeoutBody { + inner: body, + sleep: None, + timeout, + } +} + +impl hyper::body::Body for TotalTimeoutBody +where + B: hyper::body::Body, + B::Error: Into>, +{ + type Data = B::Data; + type Error = crate::Error; + + fn poll_frame( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + let this = self.project(); + if let Poll::Ready(()) = this.timeout.as_mut().poll(cx) { + return Poll::Ready(Some(Err(crate::error::body(crate::error::TimedOut)))); + } + Poll::Ready( + ready!(this.inner.poll_frame(cx)) + .map(|opt_chunk| opt_chunk.map_err(crate::error::body)), + ) + } + + #[inline] + fn size_hint(&self) -> http_body::SizeHint { + self.inner.size_hint() + } + + #[inline] + fn is_end_stream(&self) -> bool { + self.inner.is_end_stream() + } +} + +impl hyper::body::Body for ReadTimeoutBody +where + B: hyper::body::Body, + B::Error: Into>, +{ + type Data = B::Data; + type Error = crate::Error; + + fn poll_frame( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + let mut this = self.project(); + + // Start the `Sleep` if not active. + let sleep_pinned = if let Some(some) = this.sleep.as_mut().as_pin_mut() { + some + } else { + this.sleep.set(Some(tokio::time::sleep(*this.timeout))); + this.sleep.as_mut().as_pin_mut().unwrap() + }; + + // Error if the timeout has expired. + if let Poll::Ready(()) = sleep_pinned.poll(cx) { + return Poll::Ready(Some(Err(crate::error::body(crate::error::TimedOut)))); + } + + let item = ready!(this.inner.poll_frame(cx)) + .map(|opt_chunk| opt_chunk.map_err(crate::error::body)); + // a ready frame means timeout is reset + this.sleep.set(None); + Poll::Ready(item) + } + + #[inline] + fn size_hint(&self) -> http_body::SizeHint { + self.inner.size_hint() + } + + #[inline] + fn is_end_stream(&self) -> bool { + self.inner.is_end_stream() + } +} + +pub(crate) type ResponseBody = + http_body_util::combinators::BoxBody>; + +pub(crate) fn boxed(body: B) -> ResponseBody +where + B: hyper::body::Body + Send + Sync + 'static, + B::Error: Into>, +{ + use http_body_util::BodyExt; + + body.map_err(box_err).boxed() +} + +pub(crate) fn response( + body: B, + deadline: Option>>, + read_timeout: Option, +) -> ResponseBody +where + B: hyper::body::Body + Send + Sync + 'static, + B::Error: Into>, +{ + use http_body_util::BodyExt; + + match (deadline, read_timeout) { + (Some(total), Some(read)) => { + let body = with_read_timeout(body, read).map_err(box_err); + total_timeout(body, total).map_err(box_err).boxed() + } + (Some(total), None) => total_timeout(body, total).map_err(box_err).boxed(), + (None, Some(read)) => with_read_timeout(body, read).map_err(box_err).boxed(), + (None, None) => body.map_err(box_err).boxed(), + } +} + +fn box_err(err: E) -> Box +where + E: Into>, +{ + err.into() +} + +// ===== impl DataStream ===== + +#[cfg(any(feature = "stream", feature = "multipart",))] +impl futures_core::Stream for DataStream +where + B: HttpBody + Unpin, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + loop { + return match ready!(Pin::new(&mut self.0).poll_frame(cx)) { + Some(Ok(frame)) => { + // skip non-data frames + if let Ok(buf) = frame.into_data() { + Poll::Ready(Some(Ok(buf))) + } else { + continue; + } + } + Some(Err(err)) => Poll::Ready(Some(Err(err))), + None => Poll::Ready(None), + }; + } + } +} + +// ===== impl IntoBytesBody ===== + +pin_project! { + struct IntoBytesBody { + #[pin] + inner: B, + } +} + +// We can't use `map_frame()` because that loses the hint data (for good reason). +// But we aren't transforming the data. +impl hyper::body::Body for IntoBytesBody +where + B: hyper::body::Body, + B::Data: Into, +{ + type Data = Bytes; + type Error = B::Error; + + fn poll_frame( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + match ready!(self.project().inner.poll_frame(cx)) { + Some(Ok(f)) => Poll::Ready(Some(Ok(f.map_data(Into::into)))), + Some(Err(e)) => Poll::Ready(Some(Err(e))), + None => Poll::Ready(None), + } + } + + #[inline] + fn size_hint(&self) -> http_body::SizeHint { + self.inner.size_hint() + } + + #[inline] + fn is_end_stream(&self) -> bool { + self.inner.is_end_stream() + } +} + +#[cfg(test)] +mod tests { + use http_body::Body as _; + + use super::Body; + + #[test] + fn test_as_bytes() { + let test_data = b"Test body"; + let body = Body::from(&test_data[..]); + assert_eq!(body.as_bytes(), Some(&test_data[..])); + } + + #[test] + fn body_exact_length() { + let empty_body = Body::empty(); + assert!(empty_body.is_end_stream()); + assert_eq!(empty_body.size_hint().exact(), Some(0)); + + let bytes_body = Body::reusable("abc".into()); + assert!(!bytes_body.is_end_stream()); + assert_eq!(bytes_body.size_hint().exact(), Some(3)); + + // can delegate even when wrapped + let stream_body = Body::wrap(empty_body); + assert!(stream_body.is_end_stream()); + assert_eq!(stream_body.size_hint().exact(), Some(0)); + } +} diff --git a/rust/reqwest/src/async_impl/client.rs b/rust/reqwest/src/async_impl/client.rs new file mode 100644 index 0000000000..1c30d21d37 --- /dev/null +++ b/rust/reqwest/src/async_impl/client.rs @@ -0,0 +1,3031 @@ +#[cfg(any(feature = "native-tls", feature = "__rustls",))] +use std::any::Any; +use std::future::Future; +use std::net::IpAddr; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{ready, Context, Poll}; +use std::time::Duration; +use std::{collections::HashMap, convert::TryInto, net::SocketAddr}; +use std::{fmt, str}; + +use super::decoder::Accepts; +use super::request::{Request, RequestBuilder}; +use super::response::Response; +use super::Body; +#[cfg(feature = "http3")] +use crate::async_impl::h3_client::connect::{H3ClientConfig, H3Connector}; +#[cfg(feature = "http3")] +use crate::async_impl::h3_client::H3Client; +use crate::config::{RequestConfig, TotalTimeout}; +#[cfg(unix)] +use crate::connect::uds::UnixSocketProvider; +use crate::connect::{ + sealed::{Conn, Unnameable}, + BoxedConnectorLayer, BoxedConnectorService, Connector, ConnectorBuilder, +}; +#[cfg(feature = "cookies")] +use crate::cookie; +#[cfg(feature = "cookies")] +use crate::cookie::service::CookieService; +#[cfg(feature = "hickory-dns")] +use crate::dns::hickory::HickoryDnsResolver; +use crate::dns::{gai::GaiResolver, DnsResolverWithOverrides, DynResolver, Resolve}; +use crate::error::{self, BoxError}; +use crate::into_url::try_uri; +use crate::proxy::Matcher as ProxyMatcher; +use crate::redirect::{self, TowerRedirectPolicy}; +#[cfg(feature = "__rustls")] +use crate::tls::CertificateRevocationList; +#[cfg(feature = "__tls")] +use crate::tls::{self, TlsBackend}; +#[cfg(feature = "__tls")] +use crate::Certificate; +#[cfg(any(feature = "native-tls", feature = "__rustls"))] +use crate::Identity; +use crate::{IntoUrl, Method, Proxy, Url}; + +use http::header::{ + Entry, HeaderMap, HeaderValue, ACCEPT, ACCEPT_ENCODING, PROXY_AUTHORIZATION, RANGE, USER_AGENT, +}; +use http::uri::Scheme; +use http::Uri; +use hyper_util::client::legacy::connect::HttpConnector; +#[cfg(feature = "default-tls")] +use native_tls_crate::TlsConnector; +use pin_project_lite::pin_project; +#[cfg(feature = "http3")] +use quinn::TransportConfig; +#[cfg(feature = "http3")] +use quinn::VarInt; +use tokio::time::Sleep; +use tower::util::BoxCloneSyncServiceLayer; +use tower::{Layer, Service}; +use tower_http::follow_redirect::FollowRedirect; + +/// An asynchronous `Client` to make Requests with. +/// +/// The Client has various configuration values to tweak, but the defaults +/// are set to what is usually the most commonly desired value. To configure a +/// `Client`, use `Client::builder()`. +/// +/// The `Client` holds a connection pool internally to improve performance +/// by reusing connections and avoiding setup overhead, so it is advised that +/// you create one and **reuse** it. +/// +/// You do **not** have to wrap the `Client` in an [`Rc`] or [`Arc`] to **reuse** it, +/// because it already uses an [`Arc`] internally. +/// +/// # Connection Pooling +/// +/// The connection pool can be configured using [`ClientBuilder`] methods +/// with the `pool_` prefix, such as [`ClientBuilder::pool_idle_timeout`] +/// and [`ClientBuilder::pool_max_idle_per_host`]. +/// +/// [`Rc`]: std::rc::Rc +#[derive(Clone)] +pub struct Client { + inner: Arc, +} + +/// A `ClientBuilder` can be used to create a `Client` with custom configuration. +#[must_use] +pub struct ClientBuilder { + config: Config, +} + +enum HttpVersionPref { + Http1, + #[cfg(feature = "http2")] + Http2, + #[cfg(feature = "http3")] + Http3, + All, +} + +#[derive(Clone)] +struct HyperService { + hyper: HyperClient, +} + +impl Service> for HyperService { + type Error = crate::Error; + type Response = http::Response; + type Future = Pin> + Send + Sync>>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.hyper.poll_ready(cx).map_err(crate::error::request) + } + + fn call(&mut self, req: hyper::Request) -> Self::Future { + let clone = self.hyper.clone(); + let mut inner = std::mem::replace(&mut self.hyper, clone); + Box::pin(async move { inner.call(req).await.map_err(crate::error::request) }) + } +} + +struct Config { + // NOTE: When adding a new field, update `fmt::Debug for ClientBuilder` + accepts: Accepts, + headers: HeaderMap, + #[cfg(feature = "__tls")] + hostname_verification: bool, + #[cfg(feature = "__tls")] + certs_verification: bool, + #[cfg(feature = "__tls")] + tls_sni: bool, + connect_timeout: Option, + connection_verbose: bool, + pool_idle_timeout: Option, + pool_max_idle_per_host: usize, + tcp_keepalive: Option, + tcp_keepalive_interval: Option, + tcp_keepalive_retries: Option, + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + tcp_user_timeout: Option, + #[cfg(any(feature = "native-tls", feature = "__rustls"))] + identity: Option, + proxies: Vec, + auto_sys_proxy: bool, + redirect_policy: redirect::Policy, + retry_policy: crate::retry::Builder, + referer: bool, + read_timeout: Option, + timeout: Option, + #[cfg(feature = "__tls")] + root_certs: Vec, + #[cfg(feature = "__tls")] + tls_built_in_root_certs: bool, + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + tls_built_in_certs_webpki: bool, + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + tls_built_in_certs_native: bool, + #[cfg(feature = "__rustls")] + crls: Vec, + #[cfg(feature = "__tls")] + min_tls_version: Option, + #[cfg(feature = "__tls")] + max_tls_version: Option, + #[cfg(feature = "__tls")] + tls_info: bool, + #[cfg(feature = "__tls")] + tls: TlsBackend, + connector_layers: Vec, + http_version_pref: HttpVersionPref, + http09_responses: bool, + http1_title_case_headers: bool, + http1_allow_obsolete_multiline_headers_in_responses: bool, + http1_ignore_invalid_headers_in_responses: bool, + http1_allow_spaces_after_header_name_in_responses: bool, + #[cfg(feature = "http2")] + http2_initial_stream_window_size: Option, + #[cfg(feature = "http2")] + http2_initial_connection_window_size: Option, + #[cfg(feature = "http2")] + http2_adaptive_window: bool, + #[cfg(feature = "http2")] + http2_max_frame_size: Option, + #[cfg(feature = "http2")] + http2_max_header_list_size: Option, + #[cfg(feature = "http2")] + http2_keep_alive_interval: Option, + #[cfg(feature = "http2")] + http2_keep_alive_timeout: Option, + #[cfg(feature = "http2")] + http2_keep_alive_while_idle: bool, + local_address: Option, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: Option, + nodelay: bool, + #[cfg(feature = "cookies")] + cookie_store: Option>, + hickory_dns: bool, + error: Option, + https_only: bool, + #[cfg(feature = "http3")] + tls_enable_early_data: bool, + #[cfg(feature = "http3")] + quic_max_idle_timeout: Option, + #[cfg(feature = "http3")] + quic_stream_receive_window: Option, + #[cfg(feature = "http3")] + quic_receive_window: Option, + #[cfg(feature = "http3")] + quic_send_window: Option, + #[cfg(feature = "http3")] + quic_congestion_bbr: bool, + #[cfg(feature = "http3")] + h3_max_field_section_size: Option, + #[cfg(feature = "http3")] + h3_send_grease: Option, + dns_overrides: HashMap>, + dns_resolver: Option>, + + #[cfg(unix)] + unix_socket: Option>, +} + +impl Default for ClientBuilder { + fn default() -> Self { + Self::new() + } +} + +impl ClientBuilder { + /// Constructs a new `ClientBuilder`. + /// + /// This is the same as `Client::builder()`. + pub fn new() -> Self { + let mut headers: HeaderMap = HeaderMap::with_capacity(2); + headers.insert(ACCEPT, HeaderValue::from_static("*/*")); + + ClientBuilder { + config: Config { + error: None, + accepts: Accepts::default(), + headers, + #[cfg(feature = "__tls")] + hostname_verification: true, + #[cfg(feature = "__tls")] + certs_verification: true, + #[cfg(feature = "__tls")] + tls_sni: true, + connect_timeout: None, + connection_verbose: false, + pool_idle_timeout: Some(Duration::from_secs(90)), + pool_max_idle_per_host: usize::MAX, + tcp_keepalive: Some(Duration::from_secs(15)), + tcp_keepalive_interval: Some(Duration::from_secs(15)), + tcp_keepalive_retries: Some(3), + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + tcp_user_timeout: Some(Duration::from_secs(30)), + proxies: Vec::new(), + auto_sys_proxy: true, + redirect_policy: redirect::Policy::default(), + retry_policy: crate::retry::Builder::default(), + referer: true, + read_timeout: None, + timeout: None, + #[cfg(feature = "__tls")] + root_certs: Vec::new(), + #[cfg(feature = "__tls")] + tls_built_in_root_certs: true, + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + tls_built_in_certs_webpki: true, + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + tls_built_in_certs_native: true, + #[cfg(any(feature = "native-tls", feature = "__rustls"))] + identity: None, + #[cfg(feature = "__rustls")] + crls: vec![], + #[cfg(feature = "__tls")] + min_tls_version: None, + #[cfg(feature = "__tls")] + max_tls_version: None, + #[cfg(feature = "__tls")] + tls_info: false, + #[cfg(feature = "__tls")] + tls: TlsBackend::default(), + connector_layers: Vec::new(), + http_version_pref: HttpVersionPref::All, + http09_responses: false, + http1_title_case_headers: false, + http1_allow_obsolete_multiline_headers_in_responses: false, + http1_ignore_invalid_headers_in_responses: false, + http1_allow_spaces_after_header_name_in_responses: false, + #[cfg(feature = "http2")] + http2_initial_stream_window_size: None, + #[cfg(feature = "http2")] + http2_initial_connection_window_size: None, + #[cfg(feature = "http2")] + http2_adaptive_window: false, + #[cfg(feature = "http2")] + http2_max_frame_size: None, + #[cfg(feature = "http2")] + http2_max_header_list_size: None, + #[cfg(feature = "http2")] + http2_keep_alive_interval: None, + #[cfg(feature = "http2")] + http2_keep_alive_timeout: None, + #[cfg(feature = "http2")] + http2_keep_alive_while_idle: false, + local_address: None, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: None, + nodelay: true, + hickory_dns: cfg!(feature = "hickory-dns"), + #[cfg(feature = "cookies")] + cookie_store: None, + https_only: false, + dns_overrides: HashMap::new(), + #[cfg(feature = "http3")] + tls_enable_early_data: false, + #[cfg(feature = "http3")] + quic_max_idle_timeout: None, + #[cfg(feature = "http3")] + quic_stream_receive_window: None, + #[cfg(feature = "http3")] + quic_receive_window: None, + #[cfg(feature = "http3")] + quic_send_window: None, + #[cfg(feature = "http3")] + quic_congestion_bbr: false, + #[cfg(feature = "http3")] + h3_max_field_section_size: None, + #[cfg(feature = "http3")] + h3_send_grease: None, + dns_resolver: None, + #[cfg(unix)] + unix_socket: None, + }, + } + } +} + +impl ClientBuilder { + /// Returns a `Client` that uses this `ClientBuilder` configuration. + /// + /// # Errors + /// + /// This method fails if a TLS backend cannot be initialized, or the resolver + /// cannot load the system configuration. + pub fn build(self) -> crate::Result { + let config = self.config; + + if let Some(err) = config.error { + return Err(err); + } + + let mut proxies = config.proxies; + if config.auto_sys_proxy { + proxies.push(ProxyMatcher::system()); + } + let proxies = Arc::new(proxies); + + #[allow(unused)] + #[cfg(feature = "http3")] + let mut h3_connector = None; + + let resolver = { + let mut resolver: Arc = match config.hickory_dns { + false => Arc::new(GaiResolver::new()), + #[cfg(feature = "hickory-dns")] + true => Arc::new(HickoryDnsResolver::default()), + #[cfg(not(feature = "hickory-dns"))] + true => unreachable!("hickory-dns shouldn't be enabled unless the feature is"), + }; + if let Some(dns_resolver) = config.dns_resolver { + resolver = dns_resolver; + } + if !config.dns_overrides.is_empty() { + resolver = Arc::new(DnsResolverWithOverrides::new( + resolver, + config.dns_overrides, + )); + } + DynResolver::new(resolver) + }; + + let mut connector_builder = { + #[cfg(feature = "__tls")] + fn user_agent(headers: &HeaderMap) -> Option { + headers.get(USER_AGENT).cloned() + } + + let mut http = HttpConnector::new_with_resolver(resolver.clone()); + http.set_connect_timeout(config.connect_timeout); + + #[cfg(all(feature = "http3", feature = "__rustls"))] + let build_h3_connector = + |resolver, + tls, + quic_max_idle_timeout: Option, + quic_stream_receive_window, + quic_receive_window, + quic_send_window, + quic_congestion_bbr, + h3_max_field_section_size, + h3_send_grease, + local_address, + http_version_pref: &HttpVersionPref| { + let mut transport_config = TransportConfig::default(); + + if let Some(max_idle_timeout) = quic_max_idle_timeout { + transport_config.max_idle_timeout(Some( + max_idle_timeout.try_into().map_err(error::builder)?, + )); + } + + if let Some(stream_receive_window) = quic_stream_receive_window { + transport_config.stream_receive_window(stream_receive_window); + } + + if let Some(receive_window) = quic_receive_window { + transport_config.receive_window(receive_window); + } + + if let Some(send_window) = quic_send_window { + transport_config.send_window(send_window); + } + + if quic_congestion_bbr { + let factory = Arc::new(quinn::congestion::BbrConfig::default()); + transport_config.congestion_controller_factory(factory); + } + + let mut h3_client_config = H3ClientConfig::default(); + + if let Some(max_field_section_size) = h3_max_field_section_size { + h3_client_config.max_field_section_size = Some(max_field_section_size); + } + + if let Some(send_grease) = h3_send_grease { + h3_client_config.send_grease = Some(send_grease); + } + + let res = H3Connector::new( + resolver, + tls, + local_address, + transport_config, + h3_client_config, + ); + + match res { + Ok(connector) => Ok(Some(connector)), + Err(err) => { + if let HttpVersionPref::Http3 = http_version_pref { + Err(error::builder(err)) + } else { + Ok(None) + } + } + } + }; + + #[cfg(feature = "__tls")] + match config.tls { + #[cfg(feature = "default-tls")] + TlsBackend::Default => { + let mut tls = TlsConnector::builder(); + + #[cfg(all(feature = "native-tls-alpn", not(feature = "http3")))] + { + match config.http_version_pref { + HttpVersionPref::Http1 => { + tls.request_alpns(&["http/1.1"]); + } + #[cfg(feature = "http2")] + HttpVersionPref::Http2 => { + tls.request_alpns(&["h2"]); + } + HttpVersionPref::All => { + tls.request_alpns(&["h2", "http/1.1"]); + } + } + } + + tls.danger_accept_invalid_hostnames(!config.hostname_verification); + + tls.danger_accept_invalid_certs(!config.certs_verification); + + tls.use_sni(config.tls_sni); + + tls.disable_built_in_roots(!config.tls_built_in_root_certs); + + for cert in config.root_certs { + cert.add_to_native_tls(&mut tls); + } + + #[cfg(feature = "native-tls")] + { + if let Some(id) = config.identity { + id.add_to_native_tls(&mut tls)?; + } + } + #[cfg(all(feature = "__rustls", not(feature = "native-tls")))] + { + // Default backend + rustls Identity doesn't work. + if let Some(_id) = config.identity { + return Err(crate::error::builder("incompatible TLS identity type")); + } + } + + if let Some(min_tls_version) = config.min_tls_version { + let protocol = min_tls_version.to_native_tls().ok_or_else(|| { + // TLS v1.3. This would be entirely reasonable, + // native-tls just doesn't support it. + // https://github.com/sfackler/rust-native-tls/issues/140 + crate::error::builder("invalid minimum TLS version for backend") + })?; + tls.min_protocol_version(Some(protocol)); + } + + if let Some(max_tls_version) = config.max_tls_version { + let protocol = max_tls_version.to_native_tls().ok_or_else(|| { + // TLS v1.3. + // We could arguably do max_protocol_version(None), given + // that 1.4 does not exist yet, but that'd get messy in the + // future. + crate::error::builder("invalid maximum TLS version for backend") + })?; + tls.max_protocol_version(Some(protocol)); + } + + ConnectorBuilder::new_default_tls( + http, + tls, + proxies.clone(), + user_agent(&config.headers), + config.local_address, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + config.interface.as_deref(), + config.nodelay, + config.tls_info, + )? + } + #[cfg(feature = "native-tls")] + TlsBackend::BuiltNativeTls(conn) => ConnectorBuilder::from_built_default_tls( + http, + conn, + proxies.clone(), + user_agent(&config.headers), + config.local_address, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + config.interface.as_deref(), + config.nodelay, + config.tls_info, + ), + #[cfg(feature = "__rustls")] + TlsBackend::BuiltRustls(conn) => { + #[cfg(feature = "http3")] + { + h3_connector = build_h3_connector( + resolver.clone(), + conn.clone(), + config.quic_max_idle_timeout, + config.quic_stream_receive_window, + config.quic_receive_window, + config.quic_send_window, + config.quic_congestion_bbr, + config.h3_max_field_section_size, + config.h3_send_grease, + config.local_address, + &config.http_version_pref, + )?; + } + + ConnectorBuilder::new_rustls_tls( + http, + conn, + proxies.clone(), + user_agent(&config.headers), + config.local_address, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + config.interface.as_deref(), + config.nodelay, + config.tls_info, + ) + } + #[cfg(feature = "__rustls")] + TlsBackend::Rustls => { + use crate::tls::{IgnoreHostname, NoVerifier}; + + // Set root certificates. + let mut root_cert_store = rustls::RootCertStore::empty(); + for cert in config.root_certs { + cert.add_to_rustls(&mut root_cert_store)?; + } + + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + if config.tls_built_in_certs_webpki { + root_cert_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + } + + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + if config.tls_built_in_certs_native { + let mut valid_count = 0; + let mut invalid_count = 0; + + let load_results = rustls_native_certs::load_native_certs(); + for cert in load_results.certs { + // Continue on parsing errors, as native stores often include ancient or syntactically + // invalid certificates, like root certificates without any X509 extensions. + // Inspiration: https://github.com/rustls/rustls/blob/633bf4ba9d9521a95f68766d04c22e2b01e68318/rustls/src/anchors.rs#L105-L112 + match root_cert_store.add(cert.into()) { + Ok(_) => valid_count += 1, + Err(err) => { + invalid_count += 1; + log::debug!("rustls failed to parse DER certificate: {err:?}"); + } + } + } + if valid_count == 0 && invalid_count > 0 { + let err = if load_results.errors.is_empty() { + crate::error::builder( + "zero valid certificates found in native root store", + ) + } else { + use std::fmt::Write as _; + let mut acc = String::new(); + for err in load_results.errors { + let _ = writeln!(&mut acc, "{err}"); + } + + crate::error::builder(acc) + }; + + return Err(err); + } + } + + // Set TLS versions. + let mut versions = rustls::ALL_VERSIONS.to_vec(); + + if let Some(min_tls_version) = config.min_tls_version { + versions.retain(|&supported_version| { + match tls::Version::from_rustls(supported_version.version) { + Some(version) => version >= min_tls_version, + // Assume it's so new we don't know about it, allow it + // (as of writing this is unreachable) + None => true, + } + }); + } + + if let Some(max_tls_version) = config.max_tls_version { + versions.retain(|&supported_version| { + match tls::Version::from_rustls(supported_version.version) { + Some(version) => version <= max_tls_version, + None => false, + } + }); + } + + if versions.is_empty() { + return Err(crate::error::builder("empty supported tls versions")); + } + + // Allow user to have installed a runtime default. + // If not, we use ring. + let provider = rustls::crypto::CryptoProvider::get_default() + .map(|arc| arc.clone()) + .unwrap_or_else(|| { + #[cfg(not(feature = "__rustls-ring"))] + panic!("No provider set"); + + #[cfg(feature = "__rustls-ring")] + Arc::new(rustls::crypto::ring::default_provider()) + }); + + // Build TLS config + let signature_algorithms = provider.signature_verification_algorithms; + let config_builder = + rustls::ClientConfig::builder_with_provider(provider.clone()) + .with_protocol_versions(&versions) + .map_err(|_| crate::error::builder("invalid TLS versions"))?; + + let config_builder = if !config.certs_verification { + config_builder + .dangerous() + .with_custom_certificate_verifier(Arc::new(NoVerifier)) + } else if !config.hostname_verification { + config_builder + .dangerous() + .with_custom_certificate_verifier(Arc::new(IgnoreHostname::new( + root_cert_store, + signature_algorithms, + ))) + } else { + if config.crls.is_empty() { + config_builder.with_root_certificates(root_cert_store) + } else { + let crls = config + .crls + .iter() + .map(|e| e.as_rustls_crl()) + .collect::>(); + let verifier = + rustls::client::WebPkiServerVerifier::builder_with_provider( + Arc::new(root_cert_store), + provider, + ) + .with_crls(crls) + .build() + .map_err(|_| { + crate::error::builder("invalid TLS verification settings") + })?; + config_builder.with_webpki_verifier(verifier) + } + }; + + // Finalize TLS config + let mut tls = if let Some(id) = config.identity { + id.add_to_rustls(config_builder)? + } else { + config_builder.with_no_client_auth() + }; + + tls.enable_sni = config.tls_sni; + + // ALPN protocol + match config.http_version_pref { + HttpVersionPref::Http1 => { + tls.alpn_protocols = vec!["http/1.1".into()]; + } + #[cfg(feature = "http2")] + HttpVersionPref::Http2 => { + tls.alpn_protocols = vec!["h2".into()]; + } + #[cfg(feature = "http3")] + HttpVersionPref::Http3 => { + tls.alpn_protocols = vec!["h3".into()]; + } + HttpVersionPref::All => { + tls.alpn_protocols = vec![ + #[cfg(feature = "http2")] + "h2".into(), + "http/1.1".into(), + ]; + } + } + + #[cfg(feature = "http3")] + { + tls.enable_early_data = config.tls_enable_early_data; + + h3_connector = build_h3_connector( + resolver.clone(), + tls.clone(), + config.quic_max_idle_timeout, + config.quic_stream_receive_window, + config.quic_receive_window, + config.quic_send_window, + config.quic_congestion_bbr, + config.h3_max_field_section_size, + config.h3_send_grease, + config.local_address, + &config.http_version_pref, + )?; + } + + ConnectorBuilder::new_rustls_tls( + http, + tls, + proxies.clone(), + user_agent(&config.headers), + config.local_address, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + config.interface.as_deref(), + config.nodelay, + config.tls_info, + ) + } + #[cfg(any(feature = "native-tls", feature = "__rustls",))] + TlsBackend::UnknownPreconfigured => { + return Err(crate::error::builder( + "Unknown TLS backend passed to `use_preconfigured_tls`", + )); + } + } + + #[cfg(not(feature = "__tls"))] + ConnectorBuilder::new( + http, + proxies.clone(), + config.local_address, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + config.interface.as_deref(), + config.nodelay, + ) + }; + + connector_builder.set_timeout(config.connect_timeout); + connector_builder.set_verbose(config.connection_verbose); + connector_builder.set_keepalive(config.tcp_keepalive); + connector_builder.set_keepalive_interval(config.tcp_keepalive_interval); + connector_builder.set_keepalive_retries(config.tcp_keepalive_retries); + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + connector_builder.set_tcp_user_timeout(config.tcp_user_timeout); + + #[cfg(feature = "socks")] + connector_builder.set_socks_resolver(resolver); + + // TODO: It'd be best to refactor this so the HttpConnector is never + // constructed at all. But there's a lot of code for all the different + // ways TLS can be configured... + #[cfg(unix)] + connector_builder.set_unix_socket(config.unix_socket); + + let mut builder = + hyper_util::client::legacy::Client::builder(hyper_util::rt::TokioExecutor::new()); + #[cfg(feature = "http2")] + { + if matches!(config.http_version_pref, HttpVersionPref::Http2) { + builder.http2_only(true); + } + + if let Some(http2_initial_stream_window_size) = config.http2_initial_stream_window_size + { + builder.http2_initial_stream_window_size(http2_initial_stream_window_size); + } + if let Some(http2_initial_connection_window_size) = + config.http2_initial_connection_window_size + { + builder.http2_initial_connection_window_size(http2_initial_connection_window_size); + } + if config.http2_adaptive_window { + builder.http2_adaptive_window(true); + } + if let Some(http2_max_frame_size) = config.http2_max_frame_size { + builder.http2_max_frame_size(http2_max_frame_size); + } + if let Some(http2_max_header_list_size) = config.http2_max_header_list_size { + builder.http2_max_header_list_size(http2_max_header_list_size); + } + if let Some(http2_keep_alive_interval) = config.http2_keep_alive_interval { + builder.http2_keep_alive_interval(http2_keep_alive_interval); + } + if let Some(http2_keep_alive_timeout) = config.http2_keep_alive_timeout { + builder.http2_keep_alive_timeout(http2_keep_alive_timeout); + } + if config.http2_keep_alive_while_idle { + builder.http2_keep_alive_while_idle(true); + } + } + + builder.timer(hyper_util::rt::TokioTimer::new()); + builder.pool_timer(hyper_util::rt::TokioTimer::new()); + builder.pool_idle_timeout(config.pool_idle_timeout); + builder.pool_max_idle_per_host(config.pool_max_idle_per_host); + + if config.http09_responses { + builder.http09_responses(true); + } + + if config.http1_title_case_headers { + builder.http1_title_case_headers(true); + } + + if config.http1_allow_obsolete_multiline_headers_in_responses { + builder.http1_allow_obsolete_multiline_headers_in_responses(true); + } + + if config.http1_ignore_invalid_headers_in_responses { + builder.http1_ignore_invalid_headers_in_responses(true); + } + + if config.http1_allow_spaces_after_header_name_in_responses { + builder.http1_allow_spaces_after_header_name_in_responses(true); + } + + let proxies_maybe_http_auth = proxies.iter().any(|p| p.maybe_has_http_auth()); + let proxies_maybe_http_custom_headers = + proxies.iter().any(|p| p.maybe_has_http_custom_headers()); + + let redirect_policy_desc = if config.redirect_policy.is_default() { + None + } else { + Some(format!("{:?}", &config.redirect_policy)) + }; + + let hyper_client = builder.build(connector_builder.build(config.connector_layers)); + let hyper_service = HyperService { + hyper: hyper_client, + }; + + let redirect_policy = { + let mut p = TowerRedirectPolicy::new(config.redirect_policy); + p.with_referer(config.referer) + .with_https_only(config.https_only); + p + }; + + let retry_policy = config.retry_policy.into_policy(); + + let svc = tower::retry::Retry::new(retry_policy.clone(), hyper_service); + + #[cfg(feature = "cookies")] + let svc = CookieService::new(svc, config.cookie_store.clone()); + let hyper = FollowRedirect::with_policy(svc, redirect_policy.clone()); + + Ok(Client { + inner: Arc::new(ClientRef { + accepts: config.accepts, + #[cfg(feature = "cookies")] + cookie_store: config.cookie_store.clone(), + // Use match instead of map since config is partially moved, + // and it cannot be used in closure + #[cfg(feature = "http3")] + h3_client: match h3_connector { + Some(h3_connector) => { + let h3_service = H3Client::new(h3_connector, config.pool_idle_timeout); + let svc = tower::retry::Retry::new(retry_policy, h3_service); + #[cfg(feature = "cookies")] + let svc = CookieService::new(svc, config.cookie_store); + Some(FollowRedirect::with_policy(svc, redirect_policy)) + } + None => None, + }, + headers: config.headers, + referer: config.referer, + read_timeout: config.read_timeout, + total_timeout: RequestConfig::new(config.timeout), + hyper, + proxies, + proxies_maybe_http_auth, + proxies_maybe_http_custom_headers, + https_only: config.https_only, + redirect_policy_desc, + }), + }) + } + + // Higher-level options + + /// Sets the `User-Agent` header to be used by this client. + /// + /// # Example + /// + /// ```rust + /// # async fn doc() -> Result<(), reqwest::Error> { + /// // Name your user agent after your app? + /// static APP_USER_AGENT: &str = concat!( + /// env!("CARGO_PKG_NAME"), + /// "/", + /// env!("CARGO_PKG_VERSION"), + /// ); + /// + /// let client = reqwest::Client::builder() + /// .user_agent(APP_USER_AGENT) + /// .build()?; + /// let res = client.get("https://www.rust-lang.org").send().await?; + /// # Ok(()) + /// # } + /// ``` + pub fn user_agent(mut self, value: V) -> ClientBuilder + where + V: TryInto, + V::Error: Into, + { + match value.try_into() { + Ok(value) => { + self.config.headers.insert(USER_AGENT, value); + } + Err(e) => { + self.config.error = Some(crate::error::builder(e.into())); + } + }; + self + } + /// Sets the default headers for every request. + /// + /// # Example + /// + /// ```rust + /// use reqwest::header; + /// # async fn doc() -> Result<(), reqwest::Error> { + /// let mut headers = header::HeaderMap::new(); + /// headers.insert("X-MY-HEADER", header::HeaderValue::from_static("value")); + /// + /// // Consider marking security-sensitive headers with `set_sensitive`. + /// let mut auth_value = header::HeaderValue::from_static("secret"); + /// auth_value.set_sensitive(true); + /// headers.insert(header::AUTHORIZATION, auth_value); + /// + /// // get a client builder + /// let client = reqwest::Client::builder() + /// .default_headers(headers) + /// .build()?; + /// let res = client.get("https://www.rust-lang.org").send().await?; + /// # Ok(()) + /// # } + /// ``` + pub fn default_headers(mut self, headers: HeaderMap) -> ClientBuilder { + for (key, value) in headers.iter() { + self.config.headers.insert(key, value.clone()); + } + self + } + + /// Enable a persistent cookie store for the client. + /// + /// Cookies received in responses will be preserved and included in + /// additional requests. + /// + /// By default, no cookie store is used. Enabling the cookie store + /// with `cookie_store(true)` will set the store to a default implementation. + /// It is **not** necessary to call [cookie_store(true)](crate::ClientBuilder::cookie_store) if [cookie_provider(my_cookie_store)](crate::ClientBuilder::cookie_provider) + /// is used; calling [cookie_store(true)](crate::ClientBuilder::cookie_store) _after_ [cookie_provider(my_cookie_store)](crate::ClientBuilder::cookie_provider) will result + /// in the provided `my_cookie_store` being **overridden** with a default implementation. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookie_store(mut self, enable: bool) -> ClientBuilder { + if enable { + self.cookie_provider(Arc::new(cookie::Jar::default())) + } else { + self.config.cookie_store = None; + self + } + } + + /// Set the persistent cookie store for the client. + /// + /// Cookies received in responses will be passed to this store, and + /// additional requests will query this store for cookies. + /// + /// By default, no cookie store is used. It is **not** necessary to also call + /// [cookie_store(true)](crate::ClientBuilder::cookie_store) if [cookie_provider(my_cookie_store)](crate::ClientBuilder::cookie_provider) is used; calling + /// [cookie_store(true)](crate::ClientBuilder::cookie_store) _after_ [cookie_provider(my_cookie_store)](crate::ClientBuilder::cookie_provider) will result + /// in the provided `my_cookie_store` being **overridden** with a default implementation. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookie_provider( + mut self, + cookie_store: Arc, + ) -> ClientBuilder { + self.config.cookie_store = Some(cookie_store as _); + self + } + + /// Enable auto gzip decompression by checking the `Content-Encoding` response header. + /// + /// If auto gzip decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `gzip`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if its headers contain a `Content-Encoding` value of + /// `gzip`, both `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `gzip` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `gzip` feature to be enabled + #[cfg(feature = "gzip")] + #[cfg_attr(docsrs, doc(cfg(feature = "gzip")))] + pub fn gzip(mut self, enable: bool) -> ClientBuilder { + self.config.accepts.gzip = enable; + self + } + + /// Enable auto brotli decompression by checking the `Content-Encoding` response header. + /// + /// If auto brotli decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `br`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if its headers contain a `Content-Encoding` value of + /// `br`, both `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `brotli` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `brotli` feature to be enabled + #[cfg(feature = "brotli")] + #[cfg_attr(docsrs, doc(cfg(feature = "brotli")))] + pub fn brotli(mut self, enable: bool) -> ClientBuilder { + self.config.accepts.brotli = enable; + self + } + + /// Enable auto zstd decompression by checking the `Content-Encoding` response header. + /// + /// If auto zstd decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `zstd`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if its headers contain a `Content-Encoding` value of + /// `zstd`, both `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `zstd` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `zstd` feature to be enabled + #[cfg(feature = "zstd")] + #[cfg_attr(docsrs, doc(cfg(feature = "zstd")))] + pub fn zstd(mut self, enable: bool) -> ClientBuilder { + self.config.accepts.zstd = enable; + self + } + + /// Enable auto deflate decompression by checking the `Content-Encoding` response header. + /// + /// If auto deflate decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `deflate`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if it's headers contain a `Content-Encoding` value that + /// equals to `deflate`, both values `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `deflate` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `deflate` feature to be enabled + #[cfg(feature = "deflate")] + #[cfg_attr(docsrs, doc(cfg(feature = "deflate")))] + pub fn deflate(mut self, enable: bool) -> ClientBuilder { + self.config.accepts.deflate = enable; + self + } + + /// Disable auto response body gzip decompression. + /// + /// This method exists even if the optional `gzip` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use gzip decompression + /// even if another dependency were to enable the optional `gzip` feature. + pub fn no_gzip(self) -> ClientBuilder { + #[cfg(feature = "gzip")] + { + self.gzip(false) + } + + #[cfg(not(feature = "gzip"))] + { + self + } + } + + /// Disable auto response body brotli decompression. + /// + /// This method exists even if the optional `brotli` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use brotli decompression + /// even if another dependency were to enable the optional `brotli` feature. + pub fn no_brotli(self) -> ClientBuilder { + #[cfg(feature = "brotli")] + { + self.brotli(false) + } + + #[cfg(not(feature = "brotli"))] + { + self + } + } + + /// Disable auto response body zstd decompression. + /// + /// This method exists even if the optional `zstd` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use zstd decompression + /// even if another dependency were to enable the optional `zstd` feature. + pub fn no_zstd(self) -> ClientBuilder { + #[cfg(feature = "zstd")] + { + self.zstd(false) + } + + #[cfg(not(feature = "zstd"))] + { + self + } + } + + /// Disable auto response body deflate decompression. + /// + /// This method exists even if the optional `deflate` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use deflate decompression + /// even if another dependency were to enable the optional `deflate` feature. + pub fn no_deflate(self) -> ClientBuilder { + #[cfg(feature = "deflate")] + { + self.deflate(false) + } + + #[cfg(not(feature = "deflate"))] + { + self + } + } + + // Redirect options + + /// Set a `RedirectPolicy` for this client. + /// + /// Default will follow redirects up to a maximum of 10. + pub fn redirect(mut self, policy: redirect::Policy) -> ClientBuilder { + self.config.redirect_policy = policy; + self + } + + /// Enable or disable automatic setting of the `Referer` header. + /// + /// Default is `true`. + pub fn referer(mut self, enable: bool) -> ClientBuilder { + self.config.referer = enable; + self + } + + // Retry options + + /// Set a request retry policy. + /// + /// Default behavior is to retry protocol NACKs. + // XXX: accept an `impl retry::IntoPolicy` instead? + pub fn retry(mut self, policy: crate::retry::Builder) -> ClientBuilder { + self.config.retry_policy = policy; + self + } + + // Proxy options + + /// Add a `Proxy` to the list of proxies the `Client` will use. + /// + /// # Note + /// + /// Adding a proxy will disable the automatic usage of the "system" proxy. + pub fn proxy(mut self, proxy: Proxy) -> ClientBuilder { + self.config.proxies.push(proxy.into_matcher()); + self.config.auto_sys_proxy = false; + self + } + + /// Clear all `Proxies`, so `Client` will use no proxy anymore. + /// + /// # Note + /// To add a proxy exclusion list, use [crate::proxy::Proxy::no_proxy()] + /// on all desired proxies instead. + /// + /// This also disables the automatic usage of the "system" proxy. + pub fn no_proxy(mut self) -> ClientBuilder { + self.config.proxies.clear(); + self.config.auto_sys_proxy = false; + self + } + + // Timeout options + + /// Enables a total request timeout. + /// + /// The timeout is applied from when the request starts connecting until the + /// response body has finished. Also considered a total deadline. + /// + /// Default is no timeout. + pub fn timeout(mut self, timeout: Duration) -> ClientBuilder { + self.config.timeout = Some(timeout); + self + } + + /// Enables a read timeout. + /// + /// The timeout applies to each read operation, and resets after a + /// successful read. This is more appropriate for detecting stalled + /// connections when the size isn't known beforehand. + /// + /// Default is no timeout. + pub fn read_timeout(mut self, timeout: Duration) -> ClientBuilder { + self.config.read_timeout = Some(timeout); + self + } + + /// Set a timeout for only the connect phase of a `Client`. + /// + /// Default is `None`. + /// + /// # Note + /// + /// This **requires** the futures be executed in a tokio runtime with + /// a tokio timer enabled. + pub fn connect_timeout(mut self, timeout: Duration) -> ClientBuilder { + self.config.connect_timeout = Some(timeout); + self + } + + /// Set whether connections should emit verbose logs. + /// + /// Enabling this option will emit [log][] messages at the `TRACE` level + /// for read and write operations on connections. + /// + /// [log]: https://crates.io/crates/log + pub fn connection_verbose(mut self, verbose: bool) -> ClientBuilder { + self.config.connection_verbose = verbose; + self + } + + // HTTP options + + /// Set an optional timeout for idle sockets being kept-alive. + /// + /// Pass `None` to disable timeout. + /// + /// Default is 90 seconds. + pub fn pool_idle_timeout(mut self, val: D) -> ClientBuilder + where + D: Into>, + { + self.config.pool_idle_timeout = val.into(); + self + } + + /// Sets the maximum idle connection per host allowed in the pool. + /// + /// Default is `usize::MAX` (no limit). + pub fn pool_max_idle_per_host(mut self, max: usize) -> ClientBuilder { + self.config.pool_max_idle_per_host = max; + self + } + + /// Send headers as title case instead of lowercase. + pub fn http1_title_case_headers(mut self) -> ClientBuilder { + self.config.http1_title_case_headers = true; + self + } + + /// Set whether HTTP/1 connections will accept obsolete line folding for + /// header values. + /// + /// Newline codepoints (`\r` and `\n`) will be transformed to spaces when + /// parsing. + pub fn http1_allow_obsolete_multiline_headers_in_responses( + mut self, + value: bool, + ) -> ClientBuilder { + self.config + .http1_allow_obsolete_multiline_headers_in_responses = value; + self + } + + /// Sets whether invalid header lines should be silently ignored in HTTP/1 responses. + pub fn http1_ignore_invalid_headers_in_responses(mut self, value: bool) -> ClientBuilder { + self.config.http1_ignore_invalid_headers_in_responses = value; + self + } + + /// Set whether HTTP/1 connections will accept spaces between header + /// names and the colon that follow them in responses. + /// + /// Newline codepoints (`\r` and `\n`) will be transformed to spaces when + /// parsing. + pub fn http1_allow_spaces_after_header_name_in_responses( + mut self, + value: bool, + ) -> ClientBuilder { + self.config + .http1_allow_spaces_after_header_name_in_responses = value; + self + } + + /// Only use HTTP/1. + pub fn http1_only(mut self) -> ClientBuilder { + self.config.http_version_pref = HttpVersionPref::Http1; + self + } + + /// Allow HTTP/0.9 responses + pub fn http09_responses(mut self) -> ClientBuilder { + self.config.http09_responses = true; + self + } + + /// Only use HTTP/2. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_prior_knowledge(mut self) -> ClientBuilder { + self.config.http_version_pref = HttpVersionPref::Http2; + self + } + + /// Only use HTTP/3. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_prior_knowledge(mut self) -> ClientBuilder { + self.config.http_version_pref = HttpVersionPref::Http3; + self + } + + /// Sets the `SETTINGS_INITIAL_WINDOW_SIZE` option for HTTP2 stream-level flow control. + /// + /// Default is currently 65,535 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_initial_stream_window_size(mut self, sz: impl Into>) -> ClientBuilder { + self.config.http2_initial_stream_window_size = sz.into(); + self + } + + /// Sets the max connection-level flow control for HTTP2 + /// + /// Default is currently 65,535 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_initial_connection_window_size( + mut self, + sz: impl Into>, + ) -> ClientBuilder { + self.config.http2_initial_connection_window_size = sz.into(); + self + } + + /// Sets whether to use an adaptive flow control. + /// + /// Enabling this will override the limits set in `http2_initial_stream_window_size` and + /// `http2_initial_connection_window_size`. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_adaptive_window(mut self, enabled: bool) -> ClientBuilder { + self.config.http2_adaptive_window = enabled; + self + } + + /// Sets the maximum frame size to use for HTTP2. + /// + /// Default is currently 16,384 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_max_frame_size(mut self, sz: impl Into>) -> ClientBuilder { + self.config.http2_max_frame_size = sz.into(); + self + } + + /// Sets the maximum size of received header frames for HTTP2. + /// + /// Default is currently 16KB, but can change. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_max_header_list_size(mut self, max_header_size_bytes: u32) -> ClientBuilder { + self.config.http2_max_header_list_size = Some(max_header_size_bytes); + self + } + + /// Sets an interval for HTTP2 Ping frames should be sent to keep a connection alive. + /// + /// Pass `None` to disable HTTP2 keep-alive. + /// Default is currently disabled. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_keep_alive_interval( + mut self, + interval: impl Into>, + ) -> ClientBuilder { + self.config.http2_keep_alive_interval = interval.into(); + self + } + + /// Sets a timeout for receiving an acknowledgement of the keep-alive ping. + /// + /// If the ping is not acknowledged within the timeout, the connection will be closed. + /// Does nothing if `http2_keep_alive_interval` is disabled. + /// Default is currently disabled. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_keep_alive_timeout(mut self, timeout: Duration) -> ClientBuilder { + self.config.http2_keep_alive_timeout = Some(timeout); + self + } + + /// Sets whether HTTP2 keep-alive should apply while the connection is idle. + /// + /// If disabled, keep-alive pings are only sent while there are open request/responses streams. + /// If enabled, pings are also sent when no streams are active. + /// Does nothing if `http2_keep_alive_interval` is disabled. + /// Default is `false`. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_keep_alive_while_idle(mut self, enabled: bool) -> ClientBuilder { + self.config.http2_keep_alive_while_idle = enabled; + self + } + + // TCP options + + /// Set whether sockets have `TCP_NODELAY` enabled. + /// + /// Default is `true`. + pub fn tcp_nodelay(mut self, enabled: bool) -> ClientBuilder { + self.config.nodelay = enabled; + self + } + + /// Bind to a local IP Address. + /// + /// # Example + /// + /// ``` + /// # fn doc() -> Result<(), reqwest::Error> { + /// use std::net::IpAddr; + /// let local_addr = IpAddr::from([12, 4, 1, 8]); + /// let client = reqwest::Client::builder() + /// .local_address(local_addr) + /// .build()?; + /// # Ok(()) + /// # } + /// ``` + pub fn local_address(mut self, addr: T) -> ClientBuilder + where + T: Into>, + { + self.config.local_address = addr.into(); + self + } + + /// Bind connections only on the specified network interface. + /// + /// This option is only available on the following operating systems: + /// + /// - Android + /// - Fuchsia + /// - Linux, + /// - macOS and macOS-like systems (iOS, tvOS, watchOS and visionOS) + /// - Solaris and illumos + /// + /// On Android, Linux, and Fuchsia, this uses the + /// [`SO_BINDTODEVICE`][man-7-socket] socket option. On macOS and macOS-like + /// systems, Solaris, and illumos, this instead uses the [`IP_BOUND_IF` and + /// `IPV6_BOUND_IF`][man-7p-ip] socket options (as appropriate). + /// + /// Note that connections will fail if the provided interface name is not a + /// network interface that currently exists when a connection is established. + /// + /// # Example + /// + /// ``` + /// # fn doc() -> Result<(), reqwest::Error> { + /// let interface = "lo"; + /// let client = reqwest::Client::builder() + /// .interface(interface) + /// .build()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [man-7-socket]: https://man7.org/linux/man-pages/man7/socket.7.html + /// [man-7p-ip]: https://docs.oracle.com/cd/E86824_01/html/E54777/ip-7p.html + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + pub fn interface(mut self, interface: &str) -> ClientBuilder { + self.config.interface = Some(interface.to_string()); + self + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied duration. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive(mut self, val: D) -> ClientBuilder + where + D: Into>, + { + self.config.tcp_keepalive = val.into(); + self + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied interval. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive_interval(mut self, val: D) -> ClientBuilder + where + D: Into>, + { + self.config.tcp_keepalive_interval = val.into(); + self + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied retry count. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive_retries(mut self, retries: C) -> ClientBuilder + where + C: Into>, + { + self.config.tcp_keepalive_retries = retries.into(); + self + } + + /// Set that all sockets have `TCP_USER_TIMEOUT` set with the supplied duration. + /// + /// This option controls how long transmitted data may remain unacknowledged before + /// the connection is force-closed. + /// + /// If `None`, the option will not be set. + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + pub fn tcp_user_timeout(mut self, val: D) -> ClientBuilder + where + D: Into>, + { + self.config.tcp_user_timeout = val.into(); + self + } + + // Alt Transports + + /// Set that all connections will use this Unix socket. + /// + /// If a request URI uses the `https` scheme, TLS will still be used over + /// the Unix socket. + /// + /// # Note + /// + /// This option is not compatible with any of the TCP or Proxy options. + /// Setting this will ignore all those options previously set. + /// + /// Likewise, DNS resolution will not be done on the domain name. + #[cfg(unix)] + pub fn unix_socket(mut self, path: impl UnixSocketProvider) -> ClientBuilder { + self.config.unix_socket = Some(path.reqwest_uds_path(crate::connect::uds::Internal).into()); + self + } + + // TLS options + + /// Add a custom root certificate. + /// + /// This can be used to connect to a server that has a self-signed + /// certificate for example. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn add_root_certificate(mut self, cert: Certificate) -> ClientBuilder { + self.config.root_certs.push(cert); + self + } + + /// Add a certificate revocation list. + /// + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn add_crl(mut self, crl: CertificateRevocationList) -> ClientBuilder { + self.config.crls.push(crl); + self + } + + /// Add multiple certificate revocation lists. + /// + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn add_crls( + mut self, + crls: impl IntoIterator, + ) -> ClientBuilder { + self.config.crls.extend(crls); + self + } + + /// Controls the use of built-in/preloaded certificates during certificate validation. + /// + /// Defaults to `true` -- built-in system certs will be used. + /// + /// # Bulk Option + /// + /// If this value is `true`, _all_ enabled system certs configured with Cargo + /// features will be loaded. + /// + /// You can set this to `false`, and enable only a specific source with + /// individual methods. Do that will prevent other sources from being loaded + /// even if their feature Cargo feature is enabled. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_built_in_root_certs(mut self, tls_built_in_root_certs: bool) -> ClientBuilder { + self.config.tls_built_in_root_certs = tls_built_in_root_certs; + + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + { + self.config.tls_built_in_certs_webpki = tls_built_in_root_certs; + } + + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + { + self.config.tls_built_in_certs_native = tls_built_in_root_certs; + } + + self + } + + /// Sets whether to load webpki root certs with rustls. + /// + /// If the feature is enabled, this value is `true` by default. + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls-webpki-roots-no-provider")))] + pub fn tls_built_in_webpki_certs(mut self, enabled: bool) -> ClientBuilder { + self.config.tls_built_in_certs_webpki = enabled; + self + } + + /// Sets whether to load native root certs with rustls. + /// + /// If the feature is enabled, this value is `true` by default. + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls-native-roots-no-provider")))] + pub fn tls_built_in_native_certs(mut self, enabled: bool) -> ClientBuilder { + self.config.tls_built_in_certs_native = enabled; + self + } + + /// Sets the identity to be used for client certificate authentication. + /// + /// # Optional + /// + /// This requires the optional `native-tls` or `rustls-tls(-...)` feature to be + /// enabled. + #[cfg(any(feature = "native-tls", feature = "__rustls"))] + #[cfg_attr(docsrs, doc(cfg(any(feature = "native-tls", feature = "rustls-tls"))))] + pub fn identity(mut self, identity: Identity) -> ClientBuilder { + self.config.identity = Some(identity); + self + } + + /// Controls the use of hostname verification. + /// + /// Defaults to `false`. + /// + /// # Warning + /// + /// You should think very carefully before you use this method. If + /// hostname verification is not used, any valid certificate for any + /// site will be trusted for use from any other. This introduces a + /// significant vulnerability to man-in-the-middle attacks. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn danger_accept_invalid_hostnames( + mut self, + accept_invalid_hostname: bool, + ) -> ClientBuilder { + self.config.hostname_verification = !accept_invalid_hostname; + self + } + + /// Controls the use of certificate validation. + /// + /// Defaults to `false`. + /// + /// # Warning + /// + /// You should think very carefully before using this method. If + /// invalid certificates are trusted, *any* certificate for *any* site + /// will be trusted for use. This includes expired certificates. This + /// introduces significant vulnerabilities, and should only be used + /// as a last resort. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn danger_accept_invalid_certs(mut self, accept_invalid_certs: bool) -> ClientBuilder { + self.config.certs_verification = !accept_invalid_certs; + self + } + + /// Controls the use of TLS server name indication. + /// + /// Defaults to `true`. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_sni(mut self, tls_sni: bool) -> ClientBuilder { + self.config.tls_sni = tls_sni; + self + } + + /// Set the minimum required TLS version for connections. + /// + /// By default, the TLS backend's own default is used. + /// + /// # Errors + /// + /// A value of `tls::Version::TLS_1_3` will cause an error with the + /// `native-tls`/`default-tls` backend. This does not mean the version + /// isn't supported, just that it can't be set as a minimum due to + /// technical limitations. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn min_tls_version(mut self, version: tls::Version) -> ClientBuilder { + self.config.min_tls_version = Some(version); + self + } + + /// Set the maximum allowed TLS version for connections. + /// + /// By default, there's no maximum. + /// + /// # Errors + /// + /// A value of `tls::Version::TLS_1_3` will cause an error with the + /// `native-tls`/`default-tls` backend. This does not mean the version + /// isn't supported, just that it can't be set as a maximum due to + /// technical limitations. + /// + /// Cannot set a maximum outside the protocol versions supported by + /// `rustls` with the `rustls-tls` backend. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn max_tls_version(mut self, version: tls::Version) -> ClientBuilder { + self.config.max_tls_version = Some(version); + self + } + + /// Force using the native TLS backend. + /// + /// Since multiple TLS backends can be optionally enabled, this option will + /// force the `native-tls` backend to be used for this `Client`. + /// + /// # Optional + /// + /// This requires the optional `native-tls` feature to be enabled. + #[cfg(feature = "native-tls")] + #[cfg_attr(docsrs, doc(cfg(feature = "native-tls")))] + pub fn use_native_tls(mut self) -> ClientBuilder { + self.config.tls = TlsBackend::Default; + self + } + + /// Force using the Rustls TLS backend. + /// + /// Since multiple TLS backends can be optionally enabled, this option will + /// force the `rustls` backend to be used for this `Client`. + /// + /// # Optional + /// + /// This requires the optional `rustls-tls(-...)` feature to be enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn use_rustls_tls(mut self) -> ClientBuilder { + self.config.tls = TlsBackend::Rustls; + self + } + + /// Use a preconfigured TLS backend. + /// + /// If the passed `Any` argument is not a TLS backend that reqwest + /// understands, the `ClientBuilder` will error when calling `build`. + /// + /// # Advanced + /// + /// This is an advanced option, and can be somewhat brittle. Usage requires + /// keeping the preconfigured TLS argument version in sync with reqwest, + /// since version mismatches will result in an "unknown" TLS backend. + /// + /// If possible, it's preferable to use the methods on `ClientBuilder` + /// to configure reqwest's TLS. + /// + /// # Optional + /// + /// This requires one of the optional features `native-tls` or + /// `rustls-tls(-...)` to be enabled. + #[cfg(any(feature = "native-tls", feature = "__rustls",))] + #[cfg_attr(docsrs, doc(cfg(any(feature = "native-tls", feature = "rustls-tls"))))] + pub fn use_preconfigured_tls(mut self, tls: impl Any) -> ClientBuilder { + let mut tls = Some(tls); + #[cfg(feature = "native-tls")] + { + if let Some(conn) = (&mut tls as &mut dyn Any).downcast_mut::>() { + let tls = conn.take().expect("is definitely Some"); + let tls = crate::tls::TlsBackend::BuiltNativeTls(tls); + self.config.tls = tls; + return self; + } + } + #[cfg(feature = "__rustls")] + { + if let Some(conn) = + (&mut tls as &mut dyn Any).downcast_mut::>() + { + let tls = conn.take().expect("is definitely Some"); + let tls = crate::tls::TlsBackend::BuiltRustls(tls); + self.config.tls = tls; + return self; + } + } + + // Otherwise, we don't recognize the TLS backend! + self.config.tls = crate::tls::TlsBackend::UnknownPreconfigured; + self + } + + /// Add TLS information as `TlsInfo` extension to responses. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_info(mut self, tls_info: bool) -> ClientBuilder { + self.config.tls_info = tls_info; + self + } + + /// Restrict the Client to be used with HTTPS only requests. + /// + /// Defaults to false. + pub fn https_only(mut self, enabled: bool) -> ClientBuilder { + self.config.https_only = enabled; + self + } + + #[doc(hidden)] + #[cfg(feature = "hickory-dns")] + #[cfg_attr(docsrs, doc(cfg(feature = "hickory-dns")))] + #[deprecated(note = "use `hickory_dns` instead")] + pub fn trust_dns(mut self, enable: bool) -> ClientBuilder { + self.config.hickory_dns = enable; + self + } + + /// Enables the [hickory-dns](hickory_resolver) async resolver instead of a default threadpool + /// using `getaddrinfo`. + /// + /// If the `hickory-dns` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `hickory-dns` feature to be enabled + /// + /// # Warning + /// + /// The hickory resolver does not work exactly the same, or on all the platforms + /// that the default resolver does + #[cfg(feature = "hickory-dns")] + #[cfg_attr(docsrs, doc(cfg(feature = "hickory-dns")))] + pub fn hickory_dns(mut self, enable: bool) -> ClientBuilder { + self.config.hickory_dns = enable; + self + } + + #[doc(hidden)] + #[deprecated(note = "use `no_hickory_dns` instead")] + pub fn no_trust_dns(self) -> ClientBuilder { + self.no_hickory_dns() + } + + /// Disables the hickory-dns async resolver. + /// + /// This method exists even if the optional `hickory-dns` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use the hickory-dns async resolver + /// even if another dependency were to enable the optional `hickory-dns` feature. + pub fn no_hickory_dns(self) -> ClientBuilder { + #[cfg(feature = "hickory-dns")] + { + self.hickory_dns(false) + } + + #[cfg(not(feature = "hickory-dns"))] + { + self + } + } + + /// Override DNS resolution for specific domains to a particular IP address. + /// + /// Set the port to `0` to use the conventional port for the given scheme (e.g. 80 for http). + /// Ports in the URL itself will always be used instead of the port in the overridden addr. + pub fn resolve(self, domain: &str, addr: SocketAddr) -> ClientBuilder { + self.resolve_to_addrs(domain, &[addr]) + } + + /// Override DNS resolution for specific domains to particular IP addresses. + /// + /// Set the port to `0` to use the conventional port for the given scheme (e.g. 80 for http). + /// Ports in the URL itself will always be used instead of the port in the overridden addr. + pub fn resolve_to_addrs(mut self, domain: &str, addrs: &[SocketAddr]) -> ClientBuilder { + self.config + .dns_overrides + .insert(domain.to_ascii_lowercase(), addrs.to_vec()); + self + } + + /// Override the DNS resolver implementation. + /// + /// Pass an `Arc` wrapping a type implementing `Resolve`. + /// Overrides for specific names passed to `resolve` and `resolve_to_addrs` will + /// still be applied on top of this resolver. + pub fn dns_resolver(mut self, resolver: Arc) -> ClientBuilder { + self.config.dns_resolver = Some(resolver as _); + self + } + + /// Override the DNS resolver implementation. + /// + /// Overrides for specific names passed to `resolve` and `resolve_to_addrs` will + /// still be applied on top of this resolver. + /// + /// This method will replace `dns_resolver` in the next breaking change. + pub fn dns_resolver2(mut self, resolver: R) -> ClientBuilder + where + R: crate::dns::resolve::IntoResolve, + { + self.config.dns_resolver = Some(resolver.into_resolve()); + self + } + + /// Whether to send data on the first flight ("early data") in TLS 1.3 handshakes + /// for HTTP/3 connections. + /// + /// The default is false. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn tls_early_data(mut self, enabled: bool) -> ClientBuilder { + self.config.tls_enable_early_data = enabled; + self + } + + /// Maximum duration of inactivity to accept before timing out the QUIC connection. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_max_idle_timeout(mut self, value: Duration) -> ClientBuilder { + self.config.quic_max_idle_timeout = Some(value); + self + } + + /// Maximum number of bytes the peer may transmit without acknowledgement on any one stream + /// before becoming blocked. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + /// + /// # Panics + /// + /// Panics if the value is over 2^62. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_stream_receive_window(mut self, value: u64) -> ClientBuilder { + self.config.quic_stream_receive_window = Some(value.try_into().unwrap()); + self + } + + /// Maximum number of bytes the peer may transmit across all streams of a connection before + /// becoming blocked. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + /// + /// # Panics + /// + /// Panics if the value is over 2^62. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_conn_receive_window(mut self, value: u64) -> ClientBuilder { + self.config.quic_receive_window = Some(value.try_into().unwrap()); + self + } + + /// Maximum number of bytes to transmit to a peer without acknowledgment + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_send_window(mut self, value: u64) -> ClientBuilder { + self.config.quic_send_window = Some(value); + self + } + + /// Override the default congestion control algorithm to use [BBR] + /// + /// The current default congestion control algorithm is [CUBIC]. This method overrides the + /// default. + /// + /// [BBR]: https://datatracker.ietf.org/doc/html/draft-ietf-ccwg-bbr + /// [CUBIC]: https://datatracker.ietf.org/doc/html/rfc8312 + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_congestion_bbr(mut self) -> ClientBuilder { + self.config.quic_congestion_bbr = true; + self + } + + /// Set the maximum HTTP/3 header size this client is willing to accept. + /// + /// See [header size constraints] section of the specification for details. + /// + /// [header size constraints]: https://www.rfc-editor.org/rfc/rfc9114.html#name-header-size-constraints + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.max_field_section_size + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_max_field_section_size(mut self, value: u64) -> ClientBuilder { + self.config.h3_max_field_section_size = Some(value.try_into().unwrap()); + self + } + + /// Enable whether to send HTTP/3 protocol grease on the connections. + /// + /// HTTP/3 uses the concept of "grease" + /// + /// to prevent potential interoperability issues in the future. + /// In HTTP/3, the concept of grease is used to ensure that the protocol can evolve + /// and accommodate future changes without breaking existing implementations. + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.send_grease + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_send_grease(mut self, enabled: bool) -> ClientBuilder { + self.config.h3_send_grease = Some(enabled); + self + } + + /// Adds a new Tower [`Layer`](https://docs.rs/tower/latest/tower/trait.Layer.html) to the + /// base connector [`Service`](https://docs.rs/tower/latest/tower/trait.Service.html) which + /// is responsible for connection establishment. + /// + /// Each subsequent invocation of this function will wrap previous layers. + /// + /// If configured, the `connect_timeout` will be the outermost layer. + /// + /// Example usage: + /// ``` + /// use std::time::Duration; + /// + /// # #[cfg(not(feature = "rustls-tls-no-provider"))] + /// let client = reqwest::Client::builder() + /// // resolved to outermost layer, meaning while we are waiting on concurrency limit + /// .connect_timeout(Duration::from_millis(200)) + /// // underneath the concurrency check, so only after concurrency limit lets us through + /// .connector_layer(tower::timeout::TimeoutLayer::new(Duration::from_millis(50))) + /// .connector_layer(tower::limit::concurrency::ConcurrencyLimitLayer::new(2)) + /// .build() + /// .unwrap(); + /// ``` + /// + pub fn connector_layer(mut self, layer: L) -> ClientBuilder + where + L: Layer + Clone + Send + Sync + 'static, + L::Service: + Service + Clone + Send + Sync + 'static, + >::Future: Send + 'static, + { + let layer = BoxCloneSyncServiceLayer::new(layer); + + self.config.connector_layers.push(layer); + + self + } +} + +type HyperClient = hyper_util::client::legacy::Client; + +impl Default for Client { + fn default() -> Self { + Self::new() + } +} + +impl Client { + /// Constructs a new `Client`. + /// + /// # Panics + /// + /// This method panics if a TLS backend cannot be initialized, or the resolver + /// cannot load the system configuration. + /// + /// Use `Client::builder()` if you wish to handle the failure as an `Error` + /// instead of panicking. + pub fn new() -> Client { + ClientBuilder::new().build().expect("Client::new()") + } + + /// Creates a `ClientBuilder` to configure a `Client`. + /// + /// This is the same as `ClientBuilder::new()`. + pub fn builder() -> ClientBuilder { + ClientBuilder::new() + } + + /// Convenience method to make a `GET` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn get(&self, url: U) -> RequestBuilder { + self.request(Method::GET, url) + } + + /// Convenience method to make a `POST` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn post(&self, url: U) -> RequestBuilder { + self.request(Method::POST, url) + } + + /// Convenience method to make a `PUT` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn put(&self, url: U) -> RequestBuilder { + self.request(Method::PUT, url) + } + + /// Convenience method to make a `PATCH` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn patch(&self, url: U) -> RequestBuilder { + self.request(Method::PATCH, url) + } + + /// Convenience method to make a `DELETE` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn delete(&self, url: U) -> RequestBuilder { + self.request(Method::DELETE, url) + } + + /// Convenience method to make a `HEAD` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn head(&self, url: U) -> RequestBuilder { + self.request(Method::HEAD, url) + } + + /// Start building a `Request` with the `Method` and `Url`. + /// + /// Returns a `RequestBuilder`, which will allow setting headers and + /// the request body before sending. + /// + /// # Errors + /// + /// This method fails whenever the supplied `Url` cannot be parsed. + pub fn request(&self, method: Method, url: U) -> RequestBuilder { + let req = url.into_url().map(move |url| Request::new(method, url)); + RequestBuilder::new(self.clone(), req) + } + + /// Executes a `Request`. + /// + /// A `Request` can be built manually with `Request::new()` or obtained + /// from a RequestBuilder with `RequestBuilder::build()`. + /// + /// You should prefer to use the `RequestBuilder` and + /// `RequestBuilder::send()`. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request, + /// redirect loop was detected or redirect limit was exhausted. + pub fn execute( + &self, + request: Request, + ) -> impl Future> { + self.execute_request(request) + } + + pub(super) fn execute_request(&self, req: Request) -> Pending { + let (method, url, mut headers, body, version, extensions) = req.pieces(); + if url.scheme() != "http" && url.scheme() != "https" { + return Pending::new_err(error::url_bad_scheme(url)); + } + + // check if we're in https_only mode and check the scheme of the current URL + if self.inner.https_only && url.scheme() != "https" { + return Pending::new_err(error::url_bad_scheme(url)); + } + + // insert default headers in the request headers + // without overwriting already appended headers. + for (key, value) in &self.inner.headers { + if let Entry::Vacant(entry) = headers.entry(key) { + entry.insert(value.clone()); + } + } + + let accept_encoding = self.inner.accepts.as_str(); + + if let Some(accept_encoding) = accept_encoding { + if !headers.contains_key(ACCEPT_ENCODING) && !headers.contains_key(RANGE) { + headers.insert(ACCEPT_ENCODING, HeaderValue::from_static(accept_encoding)); + } + } + + let uri = match try_uri(&url) { + Ok(uri) => uri, + _ => return Pending::new_err(error::url_invalid_uri(url)), + }; + + let body = body.unwrap_or_else(Body::empty); + + self.proxy_auth(&uri, &mut headers); + self.proxy_custom_headers(&uri, &mut headers); + + let builder = hyper::Request::builder() + .method(method.clone()) + .uri(uri) + .version(version); + + let in_flight = match version { + #[cfg(feature = "http3")] + http::Version::HTTP_3 if self.inner.h3_client.is_some() => { + let mut req = builder.body(body).expect("valid request parts"); + *req.headers_mut() = headers.clone(); + let mut h3 = self.inner.h3_client.as_ref().unwrap().clone(); + ResponseFuture::H3(h3.call(req)) + } + _ => { + let mut req = builder.body(body).expect("valid request parts"); + *req.headers_mut() = headers.clone(); + let mut hyper = self.inner.hyper.clone(); + ResponseFuture::Default(hyper.call(req)) + } + }; + + let total_timeout = self + .inner + .total_timeout + .fetch(&extensions) + .copied() + .map(tokio::time::sleep) + .map(Box::pin); + + let read_timeout_fut = self + .inner + .read_timeout + .map(tokio::time::sleep) + .map(Box::pin); + + Pending { + inner: PendingInner::Request(Box::pin(PendingRequest { + method, + url, + headers, + + client: self.inner.clone(), + + in_flight, + total_timeout, + read_timeout_fut, + read_timeout: self.inner.read_timeout, + })), + } + } + + fn proxy_auth(&self, dst: &Uri, headers: &mut HeaderMap) { + if !self.inner.proxies_maybe_http_auth { + return; + } + + // Only set the header here if the destination scheme is 'http', + // since otherwise, the header will be included in the CONNECT tunnel + // request instead. + if dst.scheme() != Some(&Scheme::HTTP) { + return; + } + + if headers.contains_key(PROXY_AUTHORIZATION) { + return; + } + + for proxy in self.inner.proxies.iter() { + if let Some(header) = proxy.http_non_tunnel_basic_auth(dst) { + headers.insert(PROXY_AUTHORIZATION, header); + break; + } + } + } + + fn proxy_custom_headers(&self, dst: &Uri, headers: &mut HeaderMap) { + if !self.inner.proxies_maybe_http_custom_headers { + return; + } + + if dst.scheme() != Some(&Scheme::HTTP) { + return; + } + + for proxy in self.inner.proxies.iter() { + if let Some(iter) = proxy.http_non_tunnel_custom_headers(dst) { + iter.iter().for_each(|(key, value)| { + headers.insert(key, value.clone()); + }); + break; + } + } + } +} + +impl fmt::Debug for Client { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("Client"); + self.inner.fmt_fields(&mut builder); + builder.finish() + } +} + +impl tower_service::Service for Client { + type Response = Response; + type Error = crate::Error; + type Future = Pending; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, req: Request) -> Self::Future { + self.execute_request(req) + } +} + +impl tower_service::Service for &'_ Client { + type Response = Response; + type Error = crate::Error; + type Future = Pending; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, req: Request) -> Self::Future { + self.execute_request(req) + } +} + +impl fmt::Debug for ClientBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("ClientBuilder"); + self.config.fmt_fields(&mut builder); + builder.finish() + } +} + +impl Config { + fn fmt_fields(&self, f: &mut fmt::DebugStruct<'_, '_>) { + // Instead of deriving Debug, only print fields when their output + // would provide relevant or interesting data. + + #[cfg(feature = "cookies")] + { + if let Some(_) = self.cookie_store { + f.field("cookie_store", &true); + } + } + + f.field("accepts", &self.accepts); + + if !self.proxies.is_empty() { + f.field("proxies", &self.proxies); + } + + if !self.redirect_policy.is_default() { + f.field("redirect_policy", &self.redirect_policy); + } + + if self.referer { + f.field("referer", &true); + } + + f.field("default_headers", &self.headers); + + if self.http1_title_case_headers { + f.field("http1_title_case_headers", &true); + } + + if self.http1_allow_obsolete_multiline_headers_in_responses { + f.field("http1_allow_obsolete_multiline_headers_in_responses", &true); + } + + if self.http1_ignore_invalid_headers_in_responses { + f.field("http1_ignore_invalid_headers_in_responses", &true); + } + + if self.http1_allow_spaces_after_header_name_in_responses { + f.field("http1_allow_spaces_after_header_name_in_responses", &true); + } + + if matches!(self.http_version_pref, HttpVersionPref::Http1) { + f.field("http1_only", &true); + } + + #[cfg(feature = "http2")] + if matches!(self.http_version_pref, HttpVersionPref::Http2) { + f.field("http2_prior_knowledge", &true); + } + + if let Some(ref d) = self.connect_timeout { + f.field("connect_timeout", d); + } + + if let Some(ref d) = self.timeout { + f.field("timeout", d); + } + + if let Some(ref v) = self.local_address { + f.field("local_address", v); + } + + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + if let Some(ref v) = self.interface { + f.field("interface", v); + } + + if self.nodelay { + f.field("tcp_nodelay", &true); + } + + #[cfg(feature = "__tls")] + { + if !self.hostname_verification { + f.field("danger_accept_invalid_hostnames", &true); + } + } + + #[cfg(feature = "__tls")] + { + if !self.certs_verification { + f.field("danger_accept_invalid_certs", &true); + } + + if let Some(ref min_tls_version) = self.min_tls_version { + f.field("min_tls_version", min_tls_version); + } + + if let Some(ref max_tls_version) = self.max_tls_version { + f.field("max_tls_version", max_tls_version); + } + + f.field("tls_sni", &self.tls_sni); + + f.field("tls_info", &self.tls_info); + } + + #[cfg(all(feature = "default-tls", feature = "__rustls"))] + { + f.field("tls_backend", &self.tls); + } + + if !self.dns_overrides.is_empty() { + f.field("dns_overrides", &self.dns_overrides); + } + + #[cfg(feature = "http3")] + { + if self.tls_enable_early_data { + f.field("tls_enable_early_data", &true); + } + } + + #[cfg(unix)] + if let Some(ref p) = self.unix_socket { + f.field("unix_socket", p); + } + } +} + +#[cfg(not(feature = "cookies"))] +type LayeredService = + FollowRedirect, TowerRedirectPolicy>; +#[cfg(feature = "cookies")] +type LayeredService = FollowRedirect< + CookieService>, + TowerRedirectPolicy, +>; +type LayeredFuture = as Service>>::Future; + +struct ClientRef { + accepts: Accepts, + #[cfg(feature = "cookies")] + cookie_store: Option>, + headers: HeaderMap, + hyper: LayeredService, + #[cfg(feature = "http3")] + h3_client: Option>, + referer: bool, + total_timeout: RequestConfig, + read_timeout: Option, + proxies: Arc>, + proxies_maybe_http_auth: bool, + proxies_maybe_http_custom_headers: bool, + https_only: bool, + redirect_policy_desc: Option, +} + +impl ClientRef { + fn fmt_fields(&self, f: &mut fmt::DebugStruct<'_, '_>) { + // Instead of deriving Debug, only print fields when their output + // would provide relevant or interesting data. + + #[cfg(feature = "cookies")] + { + if let Some(_) = self.cookie_store { + f.field("cookie_store", &true); + } + } + + f.field("accepts", &self.accepts); + + if !self.proxies.is_empty() { + f.field("proxies", &self.proxies); + } + + if let Some(s) = &self.redirect_policy_desc { + f.field("redirect_policy", s); + } + + if self.referer { + f.field("referer", &true); + } + + f.field("default_headers", &self.headers); + + self.total_timeout.fmt_as_field(f); + + if let Some(ref d) = self.read_timeout { + f.field("read_timeout", d); + } + } +} + +pin_project! { + pub struct Pending { + #[pin] + inner: PendingInner, + } +} + +enum PendingInner { + Request(Pin>), + Error(Option), +} + +pin_project! { + struct PendingRequest { + method: Method, + url: Url, + headers: HeaderMap, + + client: Arc, + + #[pin] + in_flight: ResponseFuture, + #[pin] + total_timeout: Option>>, + #[pin] + read_timeout_fut: Option>>, + read_timeout: Option, + } +} + +enum ResponseFuture { + Default(LayeredFuture), + #[cfg(feature = "http3")] + H3(LayeredFuture), +} + +impl PendingRequest { + fn in_flight(self: Pin<&mut Self>) -> Pin<&mut ResponseFuture> { + self.project().in_flight + } + + fn total_timeout(self: Pin<&mut Self>) -> Pin<&mut Option>>> { + self.project().total_timeout + } + + fn read_timeout(self: Pin<&mut Self>) -> Pin<&mut Option>>> { + self.project().read_timeout_fut + } +} + +impl Pending { + pub(super) fn new_err(err: crate::Error) -> Pending { + Pending { + inner: PendingInner::Error(Some(err)), + } + } + + fn inner(self: Pin<&mut Self>) -> Pin<&mut PendingInner> { + self.project().inner + } +} + +impl Future for Pending { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let inner = self.inner(); + match inner.get_mut() { + PendingInner::Request(ref mut req) => Pin::new(req).poll(cx), + PendingInner::Error(ref mut err) => Poll::Ready(Err(err + .take() + .expect("Pending error polled more than once"))), + } + } +} + +impl Future for PendingRequest { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if let Some(delay) = self.as_mut().total_timeout().as_mut().as_pin_mut() { + if let Poll::Ready(()) = delay.poll(cx) { + return Poll::Ready(Err( + crate::error::request(crate::error::TimedOut).with_url(self.url.clone()) + )); + } + } + + if let Some(delay) = self.as_mut().read_timeout().as_mut().as_pin_mut() { + if let Poll::Ready(()) = delay.poll(cx) { + return Poll::Ready(Err( + crate::error::request(crate::error::TimedOut).with_url(self.url.clone()) + )); + } + } + + let res = match self.as_mut().in_flight().get_mut() { + ResponseFuture::Default(r) => match ready!(Pin::new(r).poll(cx)) { + Err(e) => { + return Poll::Ready(Err(e.if_no_url(|| self.url.clone()))); + } + Ok(res) => res.map(super::body::boxed), + }, + #[cfg(feature = "http3")] + ResponseFuture::H3(r) => match ready!(Pin::new(r).poll(cx)) { + Err(e) => { + return Poll::Ready(Err(crate::error::request(e).with_url(self.url.clone()))); + } + Ok(res) => res, + }, + }; + + if let Some(url) = &res + .extensions() + .get::() + { + self.url = match Url::parse(&url.0.to_string()) { + Ok(url) => url, + Err(e) => return Poll::Ready(Err(crate::error::decode(e))), + } + }; + + let res = Response::new( + res, + self.url.clone(), + self.client.accepts, + self.total_timeout.take(), + self.read_timeout, + ); + Poll::Ready(Ok(res)) + } +} + +impl fmt::Debug for Pending { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.inner { + PendingInner::Request(ref req) => f + .debug_struct("Pending") + .field("method", &req.method) + .field("url", &req.url) + .finish(), + PendingInner::Error(ref err) => f.debug_struct("Pending").field("error", err).finish(), + } + } +} + +#[cfg(test)] +mod tests { + #![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] + + #[tokio::test] + async fn execute_request_rejects_invalid_urls() { + let url_str = "hxxps://www.rust-lang.org/"; + let url = url::Url::parse(url_str).unwrap(); + let result = crate::get(url.clone()).await; + + assert!(result.is_err()); + let err = result.err().unwrap(); + assert!(err.is_builder()); + assert_eq!(url_str, err.url().unwrap().as_str()); + } + + /// https://github.com/seanmonstar/reqwest/issues/668 + #[tokio::test] + async fn execute_request_rejects_invalid_hostname() { + let url_str = "https://{{hostname}}/"; + let url = url::Url::parse(url_str).unwrap(); + let result = crate::get(url.clone()).await; + + assert!(result.is_err()); + let err = result.err().unwrap(); + assert!(err.is_builder()); + assert_eq!(url_str, err.url().unwrap().as_str()); + } + + #[test] + fn test_future_size() { + let s = std::mem::size_of::(); + assert!(s < 128, "size_of::() == {s}, too big"); + } +} diff --git a/rust/reqwest/src/async_impl/decoder.rs b/rust/reqwest/src/async_impl/decoder.rs new file mode 100644 index 0000000000..d219624837 --- /dev/null +++ b/rust/reqwest/src/async_impl/decoder.rs @@ -0,0 +1,746 @@ +use std::fmt; +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +use std::future::Future; +use std::pin::Pin; +use std::task::{ready, Context, Poll}; + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +use futures_util::stream::Fuse; + +#[cfg(feature = "gzip")] +use async_compression::tokio::bufread::GzipDecoder; + +#[cfg(feature = "brotli")] +use async_compression::tokio::bufread::BrotliDecoder; + +#[cfg(feature = "zstd")] +use async_compression::tokio::bufread::ZstdDecoder; + +#[cfg(feature = "deflate")] +use async_compression::tokio::bufread::ZlibDecoder; + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate", + feature = "blocking", +))] +use futures_core::Stream; + +use bytes::Bytes; +use http::HeaderMap; +use hyper::body::Body as HttpBody; +use hyper::body::Frame; + +#[cfg(any( + feature = "gzip", + feature = "brotli", + feature = "zstd", + feature = "deflate" +))] +use tokio_util::codec::{BytesCodec, FramedRead}; +#[cfg(any( + feature = "gzip", + feature = "brotli", + feature = "zstd", + feature = "deflate" +))] +use tokio_util::io::StreamReader; + +use super::body::ResponseBody; + +#[derive(Clone, Copy, Debug)] +pub(super) struct Accepts { + #[cfg(feature = "gzip")] + pub(super) gzip: bool, + #[cfg(feature = "brotli")] + pub(super) brotli: bool, + #[cfg(feature = "zstd")] + pub(super) zstd: bool, + #[cfg(feature = "deflate")] + pub(super) deflate: bool, +} + +impl Accepts { + pub fn none() -> Self { + Self { + #[cfg(feature = "gzip")] + gzip: false, + #[cfg(feature = "brotli")] + brotli: false, + #[cfg(feature = "zstd")] + zstd: false, + #[cfg(feature = "deflate")] + deflate: false, + } + } +} + +/// A response decompressor over a non-blocking stream of chunks. +/// +/// The inner decoder may be constructed asynchronously. +pub(crate) struct Decoder { + inner: Inner, +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +type PeekableIoStream = futures_util::stream::Peekable; + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +type PeekableIoStreamReader = StreamReader; + +enum Inner { + /// A `PlainText` decoder just returns the response content as is. + PlainText(ResponseBody), + + /// A `Gzip` decoder will uncompress the gzipped response content before returning it. + #[cfg(feature = "gzip")] + Gzip(Pin, BytesCodec>>>>), + + /// A `Brotli` decoder will uncompress the brotlied response content before returning it. + #[cfg(feature = "brotli")] + Brotli(Pin, BytesCodec>>>>), + + /// A `Zstd` decoder will uncompress the zstd compressed response content before returning it. + #[cfg(feature = "zstd")] + Zstd(Pin, BytesCodec>>>>), + + /// A `Deflate` decoder will uncompress the deflated response content before returning it. + #[cfg(feature = "deflate")] + Deflate(Pin, BytesCodec>>>>), + + /// A decoder that doesn't have a value yet. + #[cfg(any( + feature = "brotli", + feature = "zstd", + feature = "gzip", + feature = "deflate" + ))] + Pending(Pin>), +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +/// A future attempt to poll the response body for EOF so we know whether to use gzip or not. +struct Pending(PeekableIoStream, DecoderType); + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate", + feature = "blocking", +))] +pub(crate) struct IoStream(B); + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +enum DecoderType { + #[cfg(feature = "gzip")] + Gzip, + #[cfg(feature = "brotli")] + Brotli, + #[cfg(feature = "zstd")] + Zstd, + #[cfg(feature = "deflate")] + Deflate, +} + +impl fmt::Debug for Decoder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Decoder").finish() + } +} + +impl Decoder { + #[cfg(feature = "blocking")] + pub(crate) fn empty() -> Decoder { + Decoder { + inner: Inner::PlainText(empty()), + } + } + + #[cfg(feature = "blocking")] + pub(crate) fn into_stream(self) -> IoStream { + IoStream(self) + } + + /// A plain text decoder. + /// + /// This decoder will emit the underlying chunks as-is. + fn plain_text(body: ResponseBody) -> Decoder { + Decoder { + inner: Inner::PlainText(body), + } + } + + /// A gzip decoder. + /// + /// This decoder will buffer and decompress chunks that are gzipped. + #[cfg(feature = "gzip")] + fn gzip(body: ResponseBody) -> Decoder { + use futures_util::StreamExt; + + Decoder { + inner: Inner::Pending(Box::pin(Pending( + IoStream(body).peekable(), + DecoderType::Gzip, + ))), + } + } + + /// A brotli decoder. + /// + /// This decoder will buffer and decompress chunks that are brotlied. + #[cfg(feature = "brotli")] + fn brotli(body: ResponseBody) -> Decoder { + use futures_util::StreamExt; + + Decoder { + inner: Inner::Pending(Box::pin(Pending( + IoStream(body).peekable(), + DecoderType::Brotli, + ))), + } + } + + /// A zstd decoder. + /// + /// This decoder will buffer and decompress chunks that are zstd compressed. + #[cfg(feature = "zstd")] + fn zstd(body: ResponseBody) -> Decoder { + use futures_util::StreamExt; + + Decoder { + inner: Inner::Pending(Box::pin(Pending( + IoStream(body).peekable(), + DecoderType::Zstd, + ))), + } + } + + /// A deflate decoder. + /// + /// This decoder will buffer and decompress chunks that are deflated. + #[cfg(feature = "deflate")] + fn deflate(body: ResponseBody) -> Decoder { + use futures_util::StreamExt; + + Decoder { + inner: Inner::Pending(Box::pin(Pending( + IoStream(body).peekable(), + DecoderType::Deflate, + ))), + } + } + + #[cfg(any( + feature = "brotli", + feature = "zstd", + feature = "gzip", + feature = "deflate" + ))] + fn detect_encoding(headers: &mut HeaderMap, encoding_str: &str) -> bool { + use http::header::{CONTENT_ENCODING, CONTENT_LENGTH, TRANSFER_ENCODING}; + use log::warn; + + let mut is_content_encoded = { + headers + .get_all(CONTENT_ENCODING) + .iter() + .any(|enc| enc == encoding_str) + || headers + .get_all(TRANSFER_ENCODING) + .iter() + .any(|enc| enc == encoding_str) + }; + if is_content_encoded { + if let Some(content_length) = headers.get(CONTENT_LENGTH) { + if content_length == "0" { + warn!("{encoding_str} response with content-length of 0"); + is_content_encoded = false; + } + } + } + if is_content_encoded { + headers.remove(CONTENT_ENCODING); + headers.remove(CONTENT_LENGTH); + } + is_content_encoded + } + + /// Constructs a Decoder from a hyper request. + /// + /// A decoder is just a wrapper around the hyper request that knows + /// how to decode the content body of the request. + /// + /// Uses the correct variant by inspecting the Content-Encoding header. + pub(super) fn detect( + _headers: &mut HeaderMap, + body: ResponseBody, + _accepts: Accepts, + ) -> Decoder { + #[cfg(feature = "gzip")] + { + if _accepts.gzip && Decoder::detect_encoding(_headers, "gzip") { + return Decoder::gzip(body); + } + } + + #[cfg(feature = "brotli")] + { + if _accepts.brotli && Decoder::detect_encoding(_headers, "br") { + return Decoder::brotli(body); + } + } + + #[cfg(feature = "zstd")] + { + if _accepts.zstd && Decoder::detect_encoding(_headers, "zstd") { + return Decoder::zstd(body); + } + } + + #[cfg(feature = "deflate")] + { + if _accepts.deflate && Decoder::detect_encoding(_headers, "deflate") { + return Decoder::deflate(body); + } + } + + Decoder::plain_text(body) + } +} + +impl HttpBody for Decoder { + type Data = Bytes; + type Error = crate::Error; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + match self.inner { + #[cfg(any( + feature = "brotli", + feature = "zstd", + feature = "gzip", + feature = "deflate" + ))] + Inner::Pending(ref mut future) => match Pin::new(future).poll(cx) { + Poll::Ready(Ok(inner)) => { + self.inner = inner; + self.poll_frame(cx) + } + Poll::Ready(Err(e)) => Poll::Ready(Some(Err(crate::error::decode_io(e)))), + Poll::Pending => Poll::Pending, + }, + Inner::PlainText(ref mut body) => match ready!(Pin::new(body).poll_frame(cx)) { + Some(Ok(frame)) => Poll::Ready(Some(Ok(frame))), + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::decode(err)))), + None => Poll::Ready(None), + }, + #[cfg(feature = "gzip")] + Inner::Gzip(ref mut decoder) => { + match ready!(Pin::new(&mut *decoder).poll_next(cx)) { + Some(Ok(bytes)) => Poll::Ready(Some(Ok(Frame::data(bytes.freeze())))), + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::decode_io(err)))), + None => { + // poll inner connection until EOF after gzip stream is finished + poll_inner_should_be_empty( + decoder.get_mut().get_mut().get_mut().get_mut(), + cx, + ) + } + } + } + #[cfg(feature = "brotli")] + Inner::Brotli(ref mut decoder) => { + match ready!(Pin::new(&mut *decoder).poll_next(cx)) { + Some(Ok(bytes)) => Poll::Ready(Some(Ok(Frame::data(bytes.freeze())))), + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::decode_io(err)))), + None => { + // poll inner connection until EOF after brotli stream is finished + poll_inner_should_be_empty( + decoder.get_mut().get_mut().get_mut().get_mut(), + cx, + ) + } + } + } + #[cfg(feature = "zstd")] + Inner::Zstd(ref mut decoder) => { + match ready!(Pin::new(&mut *decoder).poll_next(cx)) { + Some(Ok(bytes)) => Poll::Ready(Some(Ok(Frame::data(bytes.freeze())))), + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::decode_io(err)))), + None => { + // poll inner connection until EOF after zstd stream is finished + poll_inner_should_be_empty( + decoder.get_mut().get_mut().get_mut().get_mut(), + cx, + ) + } + } + } + #[cfg(feature = "deflate")] + Inner::Deflate(ref mut decoder) => { + match ready!(Pin::new(&mut *decoder).poll_next(cx)) { + Some(Ok(bytes)) => Poll::Ready(Some(Ok(Frame::data(bytes.freeze())))), + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::decode_io(err)))), + None => { + // poll inner connection until EOF after deflate stream is finished + poll_inner_should_be_empty( + decoder.get_mut().get_mut().get_mut().get_mut(), + cx, + ) + } + } + } + } + } + + fn size_hint(&self) -> http_body::SizeHint { + match self.inner { + Inner::PlainText(ref body) => HttpBody::size_hint(body), + // the rest are "unknown", so default + #[cfg(any( + feature = "brotli", + feature = "zstd", + feature = "gzip", + feature = "deflate" + ))] + _ => http_body::SizeHint::default(), + } + } +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +fn poll_inner_should_be_empty( + inner: &mut PeekableIoStream, + cx: &mut Context, +) -> Poll, crate::Error>>> { + // poll inner connection until EOF after deflate stream is finished + // loop in case of empty frames + let mut inner = Pin::new(inner); + loop { + match ready!(inner.as_mut().poll_next(cx)) { + // ignore any empty frames + Some(Ok(bytes)) if bytes.is_empty() => continue, + Some(Ok(_)) => { + return Poll::Ready(Some(Err(crate::error::decode( + "there are extra bytes after body has been decompressed", + )))) + } + Some(Err(err)) => return Poll::Ready(Some(Err(crate::error::decode_io(err)))), + None => return Poll::Ready(None), + } + } +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate", + feature = "blocking", +))] +fn empty() -> ResponseBody { + use http_body_util::{combinators::BoxBody, BodyExt, Empty}; + BoxBody::new(Empty::new().map_err(|never| match never {})) +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate" +))] +impl Future for Pending { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + use futures_util::StreamExt; + + match ready!(Pin::new(&mut self.0).poll_peek(cx)) { + Some(Ok(_)) => { + // fallthrough + } + Some(Err(_e)) => { + // error was just a ref, so we need to really poll to move it + return Poll::Ready(Err(ready!(Pin::new(&mut self.0).poll_next(cx)) + .expect("just peeked Some") + .unwrap_err())); + } + None => return Poll::Ready(Ok(Inner::PlainText(empty()))), + }; + + let _body = std::mem::replace(&mut self.0, IoStream(empty()).peekable()); + + match self.1 { + #[cfg(feature = "brotli")] + DecoderType::Brotli => Poll::Ready(Ok(Inner::Brotli(Box::pin( + FramedRead::new( + BrotliDecoder::new(StreamReader::new(_body)), + BytesCodec::new(), + ) + .fuse(), + )))), + #[cfg(feature = "zstd")] + DecoderType::Zstd => Poll::Ready(Ok(Inner::Zstd(Box::pin( + FramedRead::new( + { + let mut d = ZstdDecoder::new(StreamReader::new(_body)); + d.multiple_members(true); + d + }, + BytesCodec::new(), + ) + .fuse(), + )))), + #[cfg(feature = "gzip")] + DecoderType::Gzip => Poll::Ready(Ok(Inner::Gzip(Box::pin( + FramedRead::new( + GzipDecoder::new(StreamReader::new(_body)), + BytesCodec::new(), + ) + .fuse(), + )))), + #[cfg(feature = "deflate")] + DecoderType::Deflate => Poll::Ready(Ok(Inner::Deflate(Box::pin( + FramedRead::new( + ZlibDecoder::new(StreamReader::new(_body)), + BytesCodec::new(), + ) + .fuse(), + )))), + } + } +} + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate", + feature = "blocking", +))] +impl Stream for IoStream +where + B: HttpBody + Unpin, + B::Error: Into>, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + loop { + return match ready!(Pin::new(&mut self.0).poll_frame(cx)) { + Some(Ok(frame)) => { + // skip non-data frames + if let Ok(buf) = frame.into_data() { + Poll::Ready(Some(Ok(buf))) + } else { + continue; + } + } + Some(Err(err)) => Poll::Ready(Some(Err(crate::error::into_io(err.into())))), + None => Poll::Ready(None), + }; + } + } +} + +// ===== impl Accepts ===== + +impl Accepts { + /* + pub(super) fn none() -> Self { + Accepts { + #[cfg(feature = "gzip")] + gzip: false, + #[cfg(feature = "brotli")] + brotli: false, + #[cfg(feature = "zstd")] + zstd: false, + #[cfg(feature = "deflate")] + deflate: false, + } + } + */ + + pub(super) const fn as_str(&self) -> Option<&'static str> { + match ( + self.is_gzip(), + self.is_brotli(), + self.is_zstd(), + self.is_deflate(), + ) { + (true, true, true, true) => Some("gzip, br, zstd, deflate"), + (true, true, false, true) => Some("gzip, br, deflate"), + (true, true, true, false) => Some("gzip, br, zstd"), + (true, true, false, false) => Some("gzip, br"), + (true, false, true, true) => Some("gzip, zstd, deflate"), + (true, false, false, true) => Some("gzip, deflate"), + (false, true, true, true) => Some("br, zstd, deflate"), + (false, true, false, true) => Some("br, deflate"), + (true, false, true, false) => Some("gzip, zstd"), + (true, false, false, false) => Some("gzip"), + (false, true, true, false) => Some("br, zstd"), + (false, true, false, false) => Some("br"), + (false, false, true, true) => Some("zstd, deflate"), + (false, false, true, false) => Some("zstd"), + (false, false, false, true) => Some("deflate"), + (false, false, false, false) => None, + } + } + + const fn is_gzip(&self) -> bool { + #[cfg(feature = "gzip")] + { + self.gzip + } + + #[cfg(not(feature = "gzip"))] + { + false + } + } + + const fn is_brotli(&self) -> bool { + #[cfg(feature = "brotli")] + { + self.brotli + } + + #[cfg(not(feature = "brotli"))] + { + false + } + } + + const fn is_zstd(&self) -> bool { + #[cfg(feature = "zstd")] + { + self.zstd + } + + #[cfg(not(feature = "zstd"))] + { + false + } + } + + const fn is_deflate(&self) -> bool { + #[cfg(feature = "deflate")] + { + self.deflate + } + + #[cfg(not(feature = "deflate"))] + { + false + } + } +} + +impl Default for Accepts { + fn default() -> Accepts { + Accepts { + #[cfg(feature = "gzip")] + gzip: true, + #[cfg(feature = "brotli")] + brotli: true, + #[cfg(feature = "zstd")] + zstd: true, + #[cfg(feature = "deflate")] + deflate: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_as_str() { + fn format_accept_encoding(accepts: &Accepts) -> String { + let mut encodings = vec![]; + if accepts.is_gzip() { + encodings.push("gzip"); + } + if accepts.is_brotli() { + encodings.push("br"); + } + if accepts.is_zstd() { + encodings.push("zstd"); + } + if accepts.is_deflate() { + encodings.push("deflate"); + } + encodings.join(", ") + } + + let state = [true, false]; + let mut permutations = Vec::new(); + + #[allow(unused_variables)] + for gzip in state { + for brotli in state { + for zstd in state { + for deflate in state { + permutations.push(Accepts { + #[cfg(feature = "gzip")] + gzip, + #[cfg(feature = "brotli")] + brotli, + #[cfg(feature = "zstd")] + zstd, + #[cfg(feature = "deflate")] + deflate, + }); + } + } + } + } + + for accepts in permutations { + let expected = format_accept_encoding(&accepts); + let got = accepts.as_str().unwrap_or(""); + assert_eq!(got, expected.as_str()); + } + } +} diff --git a/rust/reqwest/src/async_impl/h3_client/connect.rs b/rust/reqwest/src/async_impl/h3_client/connect.rs new file mode 100644 index 0000000000..d542070a9b --- /dev/null +++ b/rust/reqwest/src/async_impl/h3_client/connect.rs @@ -0,0 +1,144 @@ +use crate::async_impl::h3_client::dns::resolve; +use crate::dns::DynResolver; +use crate::error::BoxError; +use bytes::Bytes; +use h3::client::SendRequest; +use h3_quinn::{Connection, OpenStreams}; +use http::Uri; +use hyper_util::client::legacy::connect::dns::Name; +use quinn::crypto::rustls::QuicClientConfig; +use quinn::{ClientConfig, Endpoint, TransportConfig}; +use std::net::{IpAddr, SocketAddr}; +use std::str::FromStr; +use std::sync::Arc; + +type H3Connection = ( + h3::client::Connection, + SendRequest, +); + +/// H3 Client Config +#[derive(Clone)] +pub(crate) struct H3ClientConfig { + /// Set the maximum HTTP/3 header size this client is willing to accept. + /// + /// See [header size constraints] section of the specification for details. + /// + /// [header size constraints]: https://www.rfc-editor.org/rfc/rfc9114.html#name-header-size-constraints + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.max_field_section_size + pub(crate) max_field_section_size: Option, + + /// Enable whether to send HTTP/3 protocol grease on the connections. + /// + /// Just like in HTTP/2, HTTP/3 also uses the concept of "grease" + /// + /// to prevent potential interoperability issues in the future. + /// In HTTP/3, the concept of grease is used to ensure that the protocol can evolve + /// and accommodate future changes without breaking existing implementations. + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.send_grease + pub(crate) send_grease: Option, +} + +impl Default for H3ClientConfig { + fn default() -> Self { + Self { + max_field_section_size: None, + send_grease: None, + } + } +} + +#[derive(Clone)] +pub(crate) struct H3Connector { + resolver: DynResolver, + endpoint: Endpoint, + client_config: H3ClientConfig, +} + +impl H3Connector { + pub fn new( + resolver: DynResolver, + tls: rustls::ClientConfig, + local_addr: Option, + transport_config: TransportConfig, + client_config: H3ClientConfig, + ) -> Result { + let quic_client_config = Arc::new(QuicClientConfig::try_from(tls)?); + let mut config = ClientConfig::new(quic_client_config); + // FIXME: Replace this when there is a setter. + config.transport_config(Arc::new(transport_config)); + + let socket_addr = match local_addr { + Some(ip) => SocketAddr::new(ip, 0), + None => "[::]:0".parse::().unwrap(), + }; + + let mut endpoint = Endpoint::client(socket_addr)?; + endpoint.set_default_client_config(config); + + Ok(Self { + resolver, + endpoint, + client_config, + }) + } + + pub async fn connect(&mut self, dest: Uri) -> Result { + let host = dest + .host() + .ok_or("destination must have a host")? + .trim_start_matches('[') + .trim_end_matches(']'); + let port = dest.port_u16().unwrap_or(443); + + let addrs = if let Some(addr) = IpAddr::from_str(host).ok() { + // If the host is already an IP address, skip resolving. + vec![SocketAddr::new(addr, port)] + } else { + let addrs = resolve(&mut self.resolver, Name::from_str(host)?).await?; + let addrs = addrs.map(|mut addr| { + addr.set_port(port); + addr + }); + addrs.collect() + }; + + self.remote_connect(addrs, host).await + } + + async fn remote_connect( + &mut self, + addrs: Vec, + server_name: &str, + ) -> Result { + let mut err = None; + for addr in addrs { + match self.endpoint.connect(addr, server_name)?.await { + Ok(new_conn) => { + let quinn_conn = Connection::new(new_conn); + let mut h3_client_builder = h3::client::builder(); + if let Some(max_field_section_size) = self.client_config.max_field_section_size + { + h3_client_builder.max_field_section_size(max_field_section_size); + } + if let Some(send_grease) = self.client_config.send_grease { + h3_client_builder.send_grease(send_grease); + } + return Ok(h3_client_builder.build(quinn_conn).await?); + } + Err(e) => err = Some(e), + } + } + + match err { + Some(e) => Err(Box::new(e) as BoxError), + None => Err("failed to establish connection for HTTP/3 request".into()), + } + } +} diff --git a/rust/reqwest/src/async_impl/h3_client/dns.rs b/rust/reqwest/src/async_impl/h3_client/dns.rs new file mode 100644 index 0000000000..efcaad4d12 --- /dev/null +++ b/rust/reqwest/src/async_impl/h3_client/dns.rs @@ -0,0 +1,43 @@ +use core::task; +use hyper_util::client::legacy::connect::dns::Name; +use std::future::Future; +use std::net::SocketAddr; +use std::task::Poll; +use tower_service::Service; + +// Trait from hyper to implement DNS resolution for HTTP/3 client. +pub trait Resolve { + type Addrs: Iterator; + type Error: Into>; + type Future: Future>; + + fn poll_ready(&mut self, cx: &mut task::Context<'_>) -> Poll>; + fn resolve(&mut self, name: Name) -> Self::Future; +} + +impl Resolve for S +where + S: Service, + S::Response: Iterator, + S::Error: Into>, +{ + type Addrs = S::Response; + type Error = S::Error; + type Future = S::Future; + + fn poll_ready(&mut self, cx: &mut task::Context<'_>) -> Poll> { + Service::poll_ready(self, cx) + } + + fn resolve(&mut self, name: Name) -> Self::Future { + Service::call(self, name) + } +} + +pub(super) async fn resolve(resolver: &mut R, name: Name) -> Result +where + R: Resolve, +{ + std::future::poll_fn(|cx| resolver.poll_ready(cx)).await?; + resolver.resolve(name).await +} diff --git a/rust/reqwest/src/async_impl/h3_client/mod.rs b/rust/reqwest/src/async_impl/h3_client/mod.rs new file mode 100644 index 0000000000..301eb536f0 --- /dev/null +++ b/rust/reqwest/src/async_impl/h3_client/mod.rs @@ -0,0 +1,116 @@ +#![cfg(feature = "http3")] + +pub(crate) mod connect; +pub(crate) mod dns; +mod pool; + +use crate::async_impl::body::ResponseBody; +use crate::async_impl::h3_client::pool::{Key, Pool, PoolClient}; +use crate::error::{BoxError, Error, Kind}; +use crate::{error, Body}; +use connect::H3Connector; +use http::{Request, Response}; +use log::trace; +use std::future::{self, Future}; +use std::pin::Pin; +use std::task::{Context, Poll}; +use std::time::Duration; +use sync_wrapper::SyncWrapper; +use tower::Service; + +#[derive(Clone)] +pub(crate) struct H3Client { + pool: Pool, + connector: H3Connector, +} + +impl H3Client { + pub fn new(connector: H3Connector, pool_timeout: Option) -> Self { + H3Client { + pool: Pool::new(pool_timeout), + connector, + } + } + + async fn get_pooled_client(&mut self, key: Key) -> Result { + if let Some(client) = self.pool.try_pool(&key) { + trace!("getting client from pool with key {key:?}"); + return Ok(client); + } + + trace!("did not find connection {key:?} in pool so connecting..."); + + let dest = pool::domain_as_uri(key.clone()); + + let lock = match self.pool.connecting(&key) { + pool::Connecting::InProgress(waiter) => { + trace!("connecting to {key:?} is already in progress, subscribing..."); + + match waiter.receive().await { + Some(client) => return Ok(client), + None => return Err("failed to establish connection for HTTP/3 request".into()), + } + } + pool::Connecting::Acquired(lock) => lock, + }; + trace!("connecting to {key:?}..."); + let (driver, tx) = self.connector.connect(dest).await?; + trace!("saving new pooled connection to {key:?}"); + Ok(self.pool.new_connection(lock, driver, tx)) + } + + async fn send_request( + mut self, + key: Key, + req: Request, + ) -> Result, Error> { + let mut pooled = match self.get_pooled_client(key).await { + Ok(client) => client, + Err(e) => return Err(error::request(e)), + }; + pooled + .send_request(req) + .await + .map_err(|e| Error::new(Kind::Request, Some(e))) + } + + pub fn request(&self, mut req: Request) -> H3ResponseFuture { + let pool_key = match pool::extract_domain(req.uri_mut()) { + Ok(s) => s, + Err(e) => { + return H3ResponseFuture { + inner: SyncWrapper::new(Box::pin(future::ready(Err(e)))), + } + } + }; + H3ResponseFuture { + inner: SyncWrapper::new(Box::pin(self.clone().send_request(pool_key, req))), + } + } +} + +impl Service> for H3Client { + type Response = Response; + type Error = Error; + type Future = H3ResponseFuture; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, req: Request) -> Self::Future { + self.request(req) + } +} + +pub(crate) struct H3ResponseFuture { + inner: SyncWrapper, Error>> + Send>>>, +} + +impl Future for H3ResponseFuture { + type Output = Result, Error>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.inner.get_mut().as_mut().poll(cx) + } +} diff --git a/rust/reqwest/src/async_impl/h3_client/pool.rs b/rust/reqwest/src/async_impl/h3_client/pool.rs new file mode 100644 index 0000000000..8d730f39b2 --- /dev/null +++ b/rust/reqwest/src/async_impl/h3_client/pool.rs @@ -0,0 +1,372 @@ +use bytes::Bytes; +use std::collections::HashMap; +use std::future; +use std::pin::Pin; +use std::sync::mpsc::{Receiver, TryRecvError}; +use std::sync::{Arc, Mutex}; +use std::task::{Context, Poll}; +use std::time::Duration; +use tokio::sync::{oneshot, watch}; +use tokio::time::Instant; + +use crate::async_impl::body::ResponseBody; +use crate::error::{BoxError, Error, Kind}; +use crate::Body; +use bytes::Buf; +use h3::client::SendRequest; +use h3_quinn::{Connection, OpenStreams}; +use http::uri::{Authority, Scheme}; +use http::{Request, Response, Uri}; +use log::{error, trace}; + +pub(super) type Key = (Scheme, Authority); + +#[derive(Clone)] +pub struct Pool { + inner: Arc>, +} + +struct ConnectingLockInner { + key: Key, + pool: Arc>, +} + +/// A lock that ensures only one HTTP/3 connection is established per host at a +/// time. The lock is automatically released when dropped. +pub struct ConnectingLock(Option); + +/// A waiter that allows subscribers to receive updates when a new connection is +/// established or when the connection attempt fails. For example, when +/// connection lock is dropped due to an error. +pub struct ConnectingWaiter { + receiver: watch::Receiver>, +} + +pub enum Connecting { + /// A connection attempt is already in progress. + /// You must subscribe to updates instead of initiating a new connection. + InProgress(ConnectingWaiter), + /// The connection lock has been acquired, allowing you to initiate a + /// new connection. + Acquired(ConnectingLock), +} + +impl ConnectingLock { + fn new(key: Key, pool: Arc>) -> Self { + Self(Some(ConnectingLockInner { key, pool })) + } + + /// Forget the lock and return corresponding Key + fn forget(mut self) -> Key { + // Unwrap is safe because the Option can be None only after dropping the + // lock + self.0.take().unwrap().key + } +} + +impl Drop for ConnectingLock { + fn drop(&mut self) { + if let Some(ConnectingLockInner { key, pool }) = self.0.take() { + let mut pool = pool.lock().unwrap(); + pool.connecting.remove(&key); + trace!("HTTP/3 connecting lock for {:?} is dropped", key); + } + } +} + +impl ConnectingWaiter { + pub async fn receive(mut self) -> Option { + match self.receiver.wait_for(Option::is_some).await { + // unwrap because we already checked that option is Some + Ok(ok) => Some(ok.as_ref().unwrap().to_owned()), + Err(_) => None, + } + } +} + +impl Pool { + pub fn new(timeout: Option) -> Self { + Self { + inner: Arc::new(Mutex::new(PoolInner { + connecting: HashMap::new(), + idle_conns: HashMap::new(), + timeout, + })), + } + } + + /// Acquire a connecting lock. This is to ensure that we have only one HTTP3 + /// connection per host. + pub fn connecting(&self, key: &Key) -> Connecting { + let mut inner = self.inner.lock().unwrap(); + + if let Some(sender) = inner.connecting.get(key) { + Connecting::InProgress(ConnectingWaiter { + receiver: sender.subscribe(), + }) + } else { + let (tx, _) = watch::channel(None); + inner.connecting.insert(key.clone(), tx); + Connecting::Acquired(ConnectingLock::new(key.clone(), Arc::clone(&self.inner))) + } + } + + pub fn try_pool(&self, key: &Key) -> Option { + let mut inner = self.inner.lock().unwrap(); + let timeout = inner.timeout; + if let Some(conn) = inner.idle_conns.get(&key) { + // We check first if the connection still valid + // and if not, we remove it from the pool. + if conn.is_invalid() { + trace!("pooled HTTP/3 connection is invalid so removing it..."); + inner.idle_conns.remove(&key); + return None; + } + + if let Some(duration) = timeout { + if Instant::now().saturating_duration_since(conn.idle_timeout) > duration { + trace!("pooled connection expired"); + return None; + } + } + } + + inner + .idle_conns + .get_mut(&key) + .and_then(|conn| Some(conn.pool())) + } + + pub fn new_connection( + &mut self, + lock: ConnectingLock, + mut driver: h3::client::Connection, + tx: SendRequest, + ) -> PoolClient { + let (close_tx, close_rx) = std::sync::mpsc::channel(); + tokio::spawn(async move { + let e = future::poll_fn(|cx| driver.poll_close(cx)).await; + trace!("poll_close returned error {e:?}"); + close_tx.send(e).ok(); + }); + + let mut inner = self.inner.lock().unwrap(); + + // We clean up "connecting" here so we don't have to acquire the lock again. + let key = lock.forget(); + let Some(notifier) = inner.connecting.remove(&key) else { + unreachable!("there should be one connecting lock at a time"); + }; + let client = PoolClient::new(tx); + + // Send the client to all our awaiters + let pool_client = if let Err(watch::error::SendError(Some(unsent_client))) = + notifier.send(Some(client.clone())) + { + // If there are no awaiters, the client is returned to us. As a + // micro optimisation, let's reuse it and avoid cloning. + unsent_client + } else { + client.clone() + }; + + let conn = PoolConnection::new(pool_client, close_rx); + inner.insert(key, conn); + + client + } +} + +struct PoolInner { + connecting: HashMap>>, + idle_conns: HashMap, + timeout: Option, +} + +impl PoolInner { + fn insert(&mut self, key: Key, conn: PoolConnection) { + if self.idle_conns.contains_key(&key) { + trace!("connection already exists for key {key:?}"); + } + + self.idle_conns.insert(key, conn); + } +} + +#[derive(Clone)] +pub struct PoolClient { + inner: SendRequest, +} + +impl PoolClient { + pub fn new(tx: SendRequest) -> Self { + Self { inner: tx } + } + + pub async fn send_request( + &mut self, + req: Request, + ) -> Result, BoxError> { + use hyper::body::Body as _; + + let (head, mut req_body) = req.into_parts(); + let mut req = Request::from_parts(head, ()); + + if let Some(n) = req_body.size_hint().exact() { + if n > 0 { + req.headers_mut() + .insert(http::header::CONTENT_LENGTH, n.into()); + } + } + + let (mut send, mut recv) = self.inner.send_request(req).await?.split(); + + let (tx, mut rx) = oneshot::channel::>(); + tokio::spawn(async move { + let mut req_body = Pin::new(&mut req_body); + loop { + match std::future::poll_fn(|cx| req_body.as_mut().poll_frame(cx)).await { + Some(Ok(frame)) => { + if let Ok(b) = frame.into_data() { + if let Err(e) = send.send_data(Bytes::copy_from_slice(&b)).await { + if let Err(e) = tx.send(Err(e.into())) { + error!("Failed to communicate send.send_data() error: {e:?}"); + } + return; + } + } + } + Some(Err(e)) => { + if let Err(e) = tx.send(Err(e.into())) { + error!("Failed to communicate req_body read error: {e:?}"); + } + return; + } + + None => break, + } + } + + if let Err(e) = send.finish().await { + if let Err(e) = tx.send(Err(e.into())) { + error!("Failed to communicate send.finish read error: {e:?}"); + } + return; + } + + let _ = tx.send(Ok(())); + }); + + tokio::select! { + Ok(Err(e)) = &mut rx => Err(e), + resp = recv.recv_response() => { + let resp = resp?; + let resp_body = crate::async_impl::body::boxed(Incoming::new(recv, resp.headers(), rx)); + Ok(resp.map(|_| resp_body)) + } + } + } +} + +pub struct PoolConnection { + // This receives errors from polling h3 driver. + close_rx: Receiver, + client: PoolClient, + idle_timeout: Instant, +} + +impl PoolConnection { + pub fn new(client: PoolClient, close_rx: Receiver) -> Self { + Self { + close_rx, + client, + idle_timeout: Instant::now(), + } + } + + pub fn pool(&mut self) -> PoolClient { + self.idle_timeout = Instant::now(); + self.client.clone() + } + + pub fn is_invalid(&self) -> bool { + match self.close_rx.try_recv() { + Err(TryRecvError::Empty) => false, + Err(TryRecvError::Disconnected) => true, + Ok(_) => true, + } + } +} + +struct Incoming { + inner: h3::client::RequestStream, + content_length: Option, + send_rx: oneshot::Receiver>, +} + +impl Incoming { + fn new( + stream: h3::client::RequestStream, + headers: &http::header::HeaderMap, + send_rx: oneshot::Receiver>, + ) -> Self { + Self { + inner: stream, + content_length: headers + .get(http::header::CONTENT_LENGTH) + .and_then(|h| h.to_str().ok()) + .and_then(|v| v.parse().ok()), + send_rx, + } + } +} + +impl http_body::Body for Incoming +where + S: h3::quic::RecvStream, +{ + type Data = Bytes; + type Error = crate::error::Error; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll, Self::Error>>> { + if let Ok(Err(e)) = self.send_rx.try_recv() { + return Poll::Ready(Some(Err(crate::error::body(e)))); + } + + match futures_core::ready!(self.inner.poll_recv_data(cx)) { + Ok(Some(mut b)) => Poll::Ready(Some(Ok(hyper::body::Frame::data( + b.copy_to_bytes(b.remaining()), + )))), + Ok(None) => Poll::Ready(None), + Err(e) => Poll::Ready(Some(Err(crate::error::body(e)))), + } + } + + fn size_hint(&self) -> hyper::body::SizeHint { + if let Some(content_length) = self.content_length { + hyper::body::SizeHint::with_exact(content_length) + } else { + hyper::body::SizeHint::default() + } + } +} + +pub(crate) fn extract_domain(uri: &mut Uri) -> Result { + let uri_clone = uri.clone(); + match (uri_clone.scheme(), uri_clone.authority()) { + (Some(scheme), Some(auth)) => Ok((scheme.clone(), auth.clone())), + _ => Err(Error::new(Kind::Request, None::)), + } +} + +pub(crate) fn domain_as_uri((scheme, auth): Key) -> Uri { + http::uri::Builder::new() + .scheme(scheme) + .authority(auth) + .path_and_query("/") + .build() + .expect("domain is valid Uri") +} diff --git a/rust/reqwest/src/async_impl/mod.rs b/rust/reqwest/src/async_impl/mod.rs new file mode 100644 index 0000000000..5d99ef0273 --- /dev/null +++ b/rust/reqwest/src/async_impl/mod.rs @@ -0,0 +1,18 @@ +pub use self::body::Body; +pub use self::client::{Client, ClientBuilder}; +pub use self::request::{Request, RequestBuilder}; +pub use self::response::Response; +pub use self::upgrade::Upgraded; + +#[cfg(feature = "blocking")] +pub(crate) use self::decoder::Decoder; + +pub mod body; +pub mod client; +pub mod decoder; +pub mod h3_client; +#[cfg(feature = "multipart")] +pub mod multipart; +pub(crate) mod request; +mod response; +mod upgrade; diff --git a/rust/reqwest/src/async_impl/multipart.rs b/rust/reqwest/src/async_impl/multipart.rs new file mode 100644 index 0000000000..26b2e71b6f --- /dev/null +++ b/rust/reqwest/src/async_impl/multipart.rs @@ -0,0 +1,753 @@ +//! multipart/form-data +use std::borrow::Cow; +use std::fmt; +use std::pin::Pin; + +#[cfg(feature = "stream")] +use std::io; +#[cfg(feature = "stream")] +use std::path::Path; + +use bytes::Bytes; +use mime_guess::Mime; +use percent_encoding::{self, AsciiSet, NON_ALPHANUMERIC}; +#[cfg(feature = "stream")] +use tokio::fs::File; + +use futures_core::Stream; +use futures_util::{future, stream, StreamExt}; + +use super::Body; +use crate::header::HeaderMap; + +/// An async multipart/form-data request. +pub struct Form { + inner: FormParts, +} + +/// A field in a multipart form. +pub struct Part { + meta: PartMetadata, + value: Body, + body_length: Option, +} + +pub(crate) struct FormParts

{ + pub(crate) boundary: String, + pub(crate) computed_headers: Vec>, + pub(crate) fields: Vec<(Cow<'static, str>, P)>, + pub(crate) percent_encoding: PercentEncoding, +} + +pub(crate) struct PartMetadata { + mime: Option, + file_name: Option>, + pub(crate) headers: HeaderMap, +} + +pub(crate) trait PartProps { + fn value_len(&self) -> Option; + fn metadata(&self) -> &PartMetadata; +} + +// ===== impl Form ===== + +impl Default for Form { + fn default() -> Self { + Self::new() + } +} + +impl Form { + /// Creates a new async Form without any content. + pub fn new() -> Form { + Form { + inner: FormParts::new(), + } + } + + /// Get the boundary that this form will use. + #[inline] + pub fn boundary(&self) -> &str { + self.inner.boundary() + } + + /// Add a data field with supplied name and value. + /// + /// # Examples + /// + /// ``` + /// let form = reqwest::multipart::Form::new() + /// .text("username", "seanmonstar") + /// .text("password", "secret"); + /// ``` + pub fn text(self, name: T, value: U) -> Form + where + T: Into>, + U: Into>, + { + self.part(name, Part::text(value)) + } + + /// Adds a file field. + /// + /// The path will be used to try to guess the filename and mime. + /// + /// # Examples + /// + /// ```no_run + /// # async fn run() -> std::io::Result<()> { + /// let form = reqwest::multipart::Form::new() + /// .file("key", "/path/to/file").await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// Errors when the file cannot be opened. + #[cfg(feature = "stream")] + #[cfg_attr(docsrs, doc(cfg(feature = "stream")))] + pub async fn file(self, name: T, path: U) -> io::Result

+ where + T: Into>, + U: AsRef, + { + Ok(self.part(name, Part::file(path).await?)) + } + + /// Adds a customized Part. + pub fn part(self, name: T, part: Part) -> Form + where + T: Into>, + { + self.with_inner(move |inner| inner.part(name, part)) + } + + /// Configure this `Form` to percent-encode using the `path-segment` rules. + pub fn percent_encode_path_segment(self) -> Form { + self.with_inner(|inner| inner.percent_encode_path_segment()) + } + + /// Configure this `Form` to percent-encode using the `attr-char` rules. + pub fn percent_encode_attr_chars(self) -> Form { + self.with_inner(|inner| inner.percent_encode_attr_chars()) + } + + /// Configure this `Form` to skip percent-encoding + pub fn percent_encode_noop(self) -> Form { + self.with_inner(|inner| inner.percent_encode_noop()) + } + + /// Consume this instance and transform into an instance of Body for use in a request. + pub(crate) fn stream(self) -> Body { + if self.inner.fields.is_empty() { + return Body::empty(); + } + + Body::stream(self.into_stream()) + } + + /// Produce a stream of the bytes in this `Form`, consuming it. + pub fn into_stream(mut self) -> impl Stream> + Send + Sync { + if self.inner.fields.is_empty() { + let empty_stream: Pin< + Box> + Send + Sync>, + > = Box::pin(futures_util::stream::empty()); + return empty_stream; + } + + // create initial part to init reduce chain + let (name, part) = self.inner.fields.remove(0); + let start = Box::pin(self.part_stream(name, part)) + as Pin> + Send + Sync>>; + + let fields = self.inner.take_fields(); + // for each field, chain an additional stream + let stream = fields.into_iter().fold(start, |memo, (name, part)| { + let part_stream = self.part_stream(name, part); + Box::pin(memo.chain(part_stream)) + as Pin> + Send + Sync>> + }); + // append special ending boundary + let last = stream::once(future::ready(Ok( + format!("--{}--\r\n", self.boundary()).into() + ))); + Box::pin(stream.chain(last)) + } + + /// Generate a hyper::Body stream for a single Part instance of a Form request. + pub(crate) fn part_stream( + &mut self, + name: T, + part: Part, + ) -> impl Stream> + where + T: Into>, + { + // start with boundary + let boundary = stream::once(future::ready(Ok( + format!("--{}\r\n", self.boundary()).into() + ))); + // append headers + let header = stream::once(future::ready(Ok({ + let mut h = self + .inner + .percent_encoding + .encode_headers(&name.into(), &part.meta); + h.extend_from_slice(b"\r\n\r\n"); + h.into() + }))); + // then append form data followed by terminating CRLF + boundary + .chain(header) + .chain(part.value.into_stream()) + .chain(stream::once(future::ready(Ok("\r\n".into())))) + } + + pub(crate) fn compute_length(&mut self) -> Option { + self.inner.compute_length() + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(FormParts) -> FormParts, + { + Form { + inner: func(self.inner), + } + } +} + +impl fmt::Debug for Form { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt_fields("Form", f) + } +} + +// ===== impl Part ===== + +impl Part { + /// Makes a text parameter. + pub fn text(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(string) => Body::from(string), + }; + Part::new(body, None) + } + + /// Makes a new parameter from arbitrary bytes. + pub fn bytes(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(vec) => Body::from(vec), + }; + Part::new(body, None) + } + + /// Makes a new parameter from an arbitrary stream. + pub fn stream>(value: T) -> Part { + Part::new(value.into(), None) + } + + /// Makes a new parameter from an arbitrary stream with a known length. This is particularly + /// useful when adding something like file contents as a stream, where you can know the content + /// length beforehand. + pub fn stream_with_length>(value: T, length: u64) -> Part { + Part::new(value.into(), Some(length)) + } + + /// Makes a file parameter. + /// + /// # Errors + /// + /// Errors when the file cannot be opened. + #[cfg(feature = "stream")] + #[cfg_attr(docsrs, doc(cfg(feature = "stream")))] + pub async fn file>(path: T) -> io::Result { + let path = path.as_ref(); + let file_name = path + .file_name() + .map(|filename| filename.to_string_lossy().into_owned()); + let ext = path.extension().and_then(|ext| ext.to_str()).unwrap_or(""); + let mime = mime_guess::from_ext(ext).first_or_octet_stream(); + let file = File::open(path).await?; + let len = file.metadata().await.map(|m| m.len()).ok(); + let field = match len { + Some(len) => Part::stream_with_length(file, len), + None => Part::stream(file), + } + .mime(mime); + + Ok(if let Some(file_name) = file_name { + field.file_name(file_name) + } else { + field + }) + } + + fn new(value: Body, body_length: Option) -> Part { + Part { + meta: PartMetadata::new(), + value, + body_length, + } + } + + /// Tries to set the mime of this part. + pub fn mime_str(self, mime: &str) -> crate::Result { + Ok(self.mime(mime.parse().map_err(crate::error::builder)?)) + } + + // Re-export when mime 0.4 is available, with split MediaType/MediaRange. + fn mime(self, mime: Mime) -> Part { + self.with_inner(move |inner| inner.mime(mime)) + } + + /// Sets the filename, builder style. + pub fn file_name(self, filename: T) -> Part + where + T: Into>, + { + self.with_inner(move |inner| inner.file_name(filename)) + } + + /// Sets custom headers for the part. + pub fn headers(self, headers: HeaderMap) -> Part { + self.with_inner(move |inner| inner.headers(headers)) + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(PartMetadata) -> PartMetadata, + { + Part { + meta: func(self.meta), + ..self + } + } +} + +impl fmt::Debug for Part { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut dbg = f.debug_struct("Part"); + dbg.field("value", &self.value); + self.meta.fmt_fields(&mut dbg); + dbg.finish() + } +} + +impl PartProps for Part { + fn value_len(&self) -> Option { + if self.body_length.is_some() { + self.body_length + } else { + self.value.content_length() + } + } + + fn metadata(&self) -> &PartMetadata { + &self.meta + } +} + +// ===== impl FormParts ===== + +impl FormParts

{ + pub(crate) fn new() -> Self { + FormParts { + boundary: gen_boundary(), + computed_headers: Vec::new(), + fields: Vec::new(), + percent_encoding: PercentEncoding::PathSegment, + } + } + + pub(crate) fn boundary(&self) -> &str { + &self.boundary + } + + /// Adds a customized Part. + pub(crate) fn part(mut self, name: T, part: P) -> Self + where + T: Into>, + { + self.fields.push((name.into(), part)); + self + } + + /// Configure this `Form` to percent-encode using the `path-segment` rules. + pub(crate) fn percent_encode_path_segment(mut self) -> Self { + self.percent_encoding = PercentEncoding::PathSegment; + self + } + + /// Configure this `Form` to percent-encode using the `attr-char` rules. + pub(crate) fn percent_encode_attr_chars(mut self) -> Self { + self.percent_encoding = PercentEncoding::AttrChar; + self + } + + /// Configure this `Form` to skip percent-encoding + pub(crate) fn percent_encode_noop(mut self) -> Self { + self.percent_encoding = PercentEncoding::NoOp; + self + } + + // If predictable, computes the length the request will have + // The length should be predictable if only String and file fields have been added, + // but not if a generic reader has been added; + pub(crate) fn compute_length(&mut self) -> Option { + let mut length = 0u64; + for &(ref name, ref field) in self.fields.iter() { + match field.value_len() { + Some(value_length) => { + // We are constructing the header just to get its length. To not have to + // construct it again when the request is sent we cache these headers. + let header = self.percent_encoding.encode_headers(name, field.metadata()); + let header_length = header.len(); + self.computed_headers.push(header); + // The additions mimic the format string out of which the field is constructed + // in Reader. Not the cleanest solution because if that format string is + // ever changed then this formula needs to be changed too which is not an + // obvious dependency in the code. + length += 2 + + self.boundary().len() as u64 + + 2 + + header_length as u64 + + 4 + + value_length + + 2 + } + _ => return None, + } + } + // If there is at least one field there is a special boundary for the very last field. + if !self.fields.is_empty() { + length += 2 + self.boundary().len() as u64 + 4 + } + Some(length) + } + + /// Take the fields vector of this instance, replacing with an empty vector. + fn take_fields(&mut self) -> Vec<(Cow<'static, str>, P)> { + std::mem::replace(&mut self.fields, Vec::new()) + } +} + +impl FormParts

{ + pub(crate) fn fmt_fields(&self, ty_name: &'static str, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct(ty_name) + .field("boundary", &self.boundary) + .field("parts", &self.fields) + .finish() + } +} + +// ===== impl PartMetadata ===== + +impl PartMetadata { + pub(crate) fn new() -> Self { + PartMetadata { + mime: None, + file_name: None, + headers: HeaderMap::default(), + } + } + + pub(crate) fn mime(mut self, mime: Mime) -> Self { + self.mime = Some(mime); + self + } + + pub(crate) fn file_name(mut self, filename: T) -> Self + where + T: Into>, + { + self.file_name = Some(filename.into()); + self + } + + pub(crate) fn headers(mut self, headers: T) -> Self + where + T: Into, + { + self.headers = headers.into(); + self + } +} + +impl PartMetadata { + pub(crate) fn fmt_fields<'f, 'fa, 'fb>( + &self, + debug_struct: &'f mut fmt::DebugStruct<'fa, 'fb>, + ) -> &'f mut fmt::DebugStruct<'fa, 'fb> { + debug_struct + .field("mime", &self.mime) + .field("file_name", &self.file_name) + .field("headers", &self.headers) + } +} + +// https://url.spec.whatwg.org/#fragment-percent-encode-set +const FRAGMENT_ENCODE_SET: &AsciiSet = &percent_encoding::CONTROLS + .add(b' ') + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`'); + +// https://url.spec.whatwg.org/#path-percent-encode-set +const PATH_ENCODE_SET: &AsciiSet = &FRAGMENT_ENCODE_SET.add(b'#').add(b'?').add(b'{').add(b'}'); + +const PATH_SEGMENT_ENCODE_SET: &AsciiSet = &PATH_ENCODE_SET.add(b'/').add(b'%'); + +// https://tools.ietf.org/html/rfc8187#section-3.2.1 +const ATTR_CHAR_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC + .remove(b'!') + .remove(b'#') + .remove(b'$') + .remove(b'&') + .remove(b'+') + .remove(b'-') + .remove(b'.') + .remove(b'^') + .remove(b'_') + .remove(b'`') + .remove(b'|') + .remove(b'~'); + +pub(crate) enum PercentEncoding { + PathSegment, + AttrChar, + NoOp, +} + +impl PercentEncoding { + pub(crate) fn encode_headers(&self, name: &str, field: &PartMetadata) -> Vec { + let mut buf = Vec::new(); + buf.extend_from_slice(b"Content-Disposition: form-data; "); + + match self.percent_encode(name) { + Cow::Borrowed(value) => { + // nothing has been percent encoded + buf.extend_from_slice(b"name=\""); + buf.extend_from_slice(value.as_bytes()); + buf.extend_from_slice(b"\""); + } + Cow::Owned(value) => { + // something has been percent encoded + buf.extend_from_slice(b"name*=utf-8''"); + buf.extend_from_slice(value.as_bytes()); + } + } + + // According to RFC7578 Section 4.2, `filename*=` syntax is invalid. + // See https://github.com/seanmonstar/reqwest/issues/419. + if let Some(filename) = &field.file_name { + buf.extend_from_slice(b"; filename=\""); + let legal_filename = filename + .replace('\\', "\\\\") + .replace('"', "\\\"") + .replace('\r', "\\\r") + .replace('\n', "\\\n"); + buf.extend_from_slice(legal_filename.as_bytes()); + buf.extend_from_slice(b"\""); + } + + if let Some(mime) = &field.mime { + buf.extend_from_slice(b"\r\nContent-Type: "); + buf.extend_from_slice(mime.as_ref().as_bytes()); + } + + for (k, v) in field.headers.iter() { + buf.extend_from_slice(b"\r\n"); + buf.extend_from_slice(k.as_str().as_bytes()); + buf.extend_from_slice(b": "); + buf.extend_from_slice(v.as_bytes()); + } + buf + } + + fn percent_encode<'a>(&self, value: &'a str) -> Cow<'a, str> { + use percent_encoding::utf8_percent_encode as percent_encode; + + match self { + Self::PathSegment => percent_encode(value, PATH_SEGMENT_ENCODE_SET).into(), + Self::AttrChar => percent_encode(value, ATTR_CHAR_ENCODE_SET).into(), + Self::NoOp => value.into(), + } + } +} + +fn gen_boundary() -> String { + use crate::util::fast_random as random; + + let a = random(); + let b = random(); + let c = random(); + let d = random(); + + format!("{a:016x}-{b:016x}-{c:016x}-{d:016x}") +} + +#[cfg(test)] +mod tests { + use super::*; + use futures_util::stream; + use futures_util::TryStreamExt; + use std::future; + use tokio::{self, runtime}; + + #[test] + fn form_empty() { + let form = Form::new(); + + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let body = form.stream().into_stream(); + let s = body.map_ok(|try_c| try_c.to_vec()).try_concat(); + + let out = rt.block_on(s); + assert!(out.unwrap().is_empty()); + } + + #[test] + fn stream_to_end() { + let mut form = Form::new() + .part( + "reader1", + Part::stream(Body::stream(stream::once(future::ready::< + Result, + >(Ok( + "part1".to_owned() + ))))), + ) + .part("key1", Part::text("value1")) + .part( + "key2", + Part::text("value2").mime(mime_guess::mime::IMAGE_BMP), + ) + .part( + "reader2", + Part::stream(Body::stream(stream::once(future::ready::< + Result, + >(Ok( + "part2".to_owned() + ))))), + ) + .part("key3", Part::text("value3").file_name("filename")); + form.inner.boundary = "boundary".to_string(); + let expected = "--boundary\r\n\ + Content-Disposition: form-data; name=\"reader1\"\r\n\r\n\ + part1\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key1\"\r\n\r\n\ + value1\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key2\"\r\n\ + Content-Type: image/bmp\r\n\r\n\ + value2\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"reader2\"\r\n\r\n\ + part2\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key3\"; filename=\"filename\"\r\n\r\n\ + value3\r\n--boundary--\r\n"; + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let body = form.stream().into_stream(); + let s = body.map(|try_c| try_c.map(|r| r.to_vec())).try_concat(); + + let out = rt.block_on(s).unwrap(); + // These prints are for debug purposes in case the test fails + println!( + "START REAL\n{}\nEND REAL", + std::str::from_utf8(&out).unwrap() + ); + println!("START EXPECTED\n{expected}\nEND EXPECTED"); + assert_eq!(std::str::from_utf8(&out).unwrap(), expected); + } + + #[test] + fn stream_to_end_with_header() { + let mut part = Part::text("value2").mime(mime_guess::mime::IMAGE_BMP); + let mut headers = HeaderMap::new(); + headers.insert("Hdr3", "/a/b/c".parse().unwrap()); + part = part.headers(headers); + let mut form = Form::new().part("key2", part); + form.inner.boundary = "boundary".to_string(); + let expected = "--boundary\r\n\ + Content-Disposition: form-data; name=\"key2\"\r\n\ + Content-Type: image/bmp\r\n\ + hdr3: /a/b/c\r\n\ + \r\n\ + value2\r\n\ + --boundary--\r\n"; + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let body = form.stream().into_stream(); + let s = body.map(|try_c| try_c.map(|r| r.to_vec())).try_concat(); + + let out = rt.block_on(s).unwrap(); + // These prints are for debug purposes in case the test fails + println!( + "START REAL\n{}\nEND REAL", + std::str::from_utf8(&out).unwrap() + ); + println!("START EXPECTED\n{expected}\nEND EXPECTED"); + assert_eq!(std::str::from_utf8(&out).unwrap(), expected); + } + + #[test] + fn correct_content_length() { + // Setup an arbitrary data stream + let stream_data = b"just some stream data"; + let stream_len = stream_data.len(); + let stream_data = stream_data + .chunks(3) + .map(|c| Ok::<_, std::io::Error>(Bytes::from(c))); + let the_stream = futures_util::stream::iter(stream_data); + + let bytes_data = b"some bytes data".to_vec(); + let bytes_len = bytes_data.len(); + + let stream_part = Part::stream_with_length(Body::stream(the_stream), stream_len as u64); + let body_part = Part::bytes(bytes_data); + + // A simple check to make sure we get the configured body length + assert_eq!(stream_part.value_len().unwrap(), stream_len as u64); + + // Make sure it delegates to the underlying body if length is not specified + assert_eq!(body_part.value_len().unwrap(), bytes_len as u64); + } + + #[test] + fn header_percent_encoding() { + let name = "start%'\"\r\nßend"; + let field = Part::text(""); + + assert_eq!( + PercentEncoding::PathSegment.encode_headers(name, &field.meta), + &b"Content-Disposition: form-data; name*=utf-8''start%25'%22%0D%0A%C3%9Fend"[..] + ); + + assert_eq!( + PercentEncoding::AttrChar.encode_headers(name, &field.meta), + &b"Content-Disposition: form-data; name*=utf-8''start%25%27%22%0D%0A%C3%9Fend"[..] + ); + } +} diff --git a/rust/reqwest/src/async_impl/request.rs b/rust/reqwest/src/async_impl/request.rs new file mode 100644 index 0000000000..bcde795b9b --- /dev/null +++ b/rust/reqwest/src/async_impl/request.rs @@ -0,0 +1,1143 @@ +use std::convert::TryFrom; +use std::fmt; +use std::future::Future; +use std::time::Duration; + +use serde::Serialize; +#[cfg(feature = "json")] +use serde_json; + +use super::body::Body; +use super::client::{Client, Pending}; +#[cfg(feature = "multipart")] +use super::multipart; +use super::response::Response; +use crate::config::{RequestConfig, TotalTimeout}; +#[cfg(feature = "multipart")] +use crate::header::CONTENT_LENGTH; +use crate::header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE}; +use crate::{Method, Url}; +use http::{request::Parts, Extensions, Request as HttpRequest, Version}; + +/// A request which can be executed with `Client::execute()`. +pub struct Request { + method: Method, + url: Url, + headers: HeaderMap, + body: Option, + version: Version, + extensions: Extensions, +} + +/// A builder to construct the properties of a `Request`. +/// +/// To construct a `RequestBuilder`, refer to the `Client` documentation. +#[must_use = "RequestBuilder does nothing until you 'send' it"] +pub struct RequestBuilder { + client: Client, + request: crate::Result, +} + +impl Request { + /// Constructs a new request. + #[inline] + pub fn new(method: Method, url: Url) -> Self { + Request { + method, + url, + headers: HeaderMap::new(), + body: None, + version: Version::default(), + extensions: Extensions::new(), + } + } + + /// Get the method. + #[inline] + pub fn method(&self) -> &Method { + &self.method + } + + /// Get a mutable reference to the method. + #[inline] + pub fn method_mut(&mut self) -> &mut Method { + &mut self.method + } + + /// Get the url. + #[inline] + pub fn url(&self) -> &Url { + &self.url + } + + /// Get a mutable reference to the url. + #[inline] + pub fn url_mut(&mut self) -> &mut Url { + &mut self.url + } + + /// Get the headers. + #[inline] + pub fn headers(&self) -> &HeaderMap { + &self.headers + } + + /// Get a mutable reference to the headers. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + &mut self.headers + } + + /// Get the body. + #[inline] + pub fn body(&self) -> Option<&Body> { + self.body.as_ref() + } + + /// Get a mutable reference to the body. + #[inline] + pub fn body_mut(&mut self) -> &mut Option { + &mut self.body + } + + /// Get the extensions. + #[inline] + pub(crate) fn extensions(&self) -> &Extensions { + &self.extensions + } + + /// Get a mutable reference to the extensions. + #[inline] + pub(crate) fn extensions_mut(&mut self) -> &mut Extensions { + &mut self.extensions + } + + /// Get the timeout. + #[inline] + pub fn timeout(&self) -> Option<&Duration> { + RequestConfig::::get(&self.extensions) + } + + /// Get a mutable reference to the timeout. + #[inline] + pub fn timeout_mut(&mut self) -> &mut Option { + RequestConfig::::get_mut(&mut self.extensions) + } + + /// Get the http version. + #[inline] + pub fn version(&self) -> Version { + self.version + } + + /// Get a mutable reference to the http version. + #[inline] + pub fn version_mut(&mut self) -> &mut Version { + &mut self.version + } + + /// Attempt to clone the request. + /// + /// `None` is returned if the request can not be cloned, i.e. if the body is a stream. + pub fn try_clone(&self) -> Option { + let body = match self.body.as_ref() { + Some(body) => Some(body.try_clone()?), + None => None, + }; + let mut req = Request::new(self.method().clone(), self.url().clone()); + *req.timeout_mut() = self.timeout().copied(); + *req.headers_mut() = self.headers().clone(); + *req.version_mut() = self.version(); + *req.extensions_mut() = self.extensions().clone(); + req.body = body; + Some(req) + } + + pub(super) fn pieces(self) -> (Method, Url, HeaderMap, Option, Version, Extensions) { + ( + self.method, + self.url, + self.headers, + self.body, + self.version, + self.extensions, + ) + } +} + +impl RequestBuilder { + pub(super) fn new(client: Client, request: crate::Result) -> RequestBuilder { + let mut builder = RequestBuilder { client, request }; + + let auth = builder + .request + .as_mut() + .ok() + .and_then(|req| extract_authority(&mut req.url)); + + if let Some((username, password)) = auth { + builder.basic_auth(username, password) + } else { + builder + } + } + + /// Assemble a builder starting from an existing `Client` and a `Request`. + pub fn from_parts(client: Client, request: Request) -> RequestBuilder { + RequestBuilder { + client, + request: crate::Result::Ok(request), + } + } + + /// Add a `Header` to this Request. + pub fn header(self, key: K, value: V) -> RequestBuilder + where + HeaderName: TryFrom, + >::Error: Into, + HeaderValue: TryFrom, + >::Error: Into, + { + self.header_sensitive(key, value, false) + } + + /// Add a `Header` to this Request with ability to define if `header_value` is sensitive. + fn header_sensitive(mut self, key: K, value: V, sensitive: bool) -> RequestBuilder + where + HeaderName: TryFrom, + >::Error: Into, + HeaderValue: TryFrom, + >::Error: Into, + { + let mut error = None; + if let Ok(ref mut req) = self.request { + match >::try_from(key) { + Ok(key) => match >::try_from(value) { + Ok(mut value) => { + // We want to potentially make an non-sensitive header + // to be sensitive, not the reverse. So, don't turn off + // a previously sensitive header. + if sensitive { + value.set_sensitive(true); + } + req.headers_mut().append(key, value); + } + Err(e) => error = Some(crate::error::builder(e.into())), + }, + Err(e) => error = Some(crate::error::builder(e.into())), + }; + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Add a set of Headers to the existing ones on this Request. + /// + /// The headers will be merged in to any already set. + pub fn headers(mut self, headers: crate::header::HeaderMap) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + crate::util::replace_headers(req.headers_mut(), headers); + } + self + } + + /// Enable HTTP basic authentication. + /// + /// ```rust + /// # use reqwest::Error; + /// + /// # async fn run() -> Result<(), Error> { + /// let client = reqwest::Client::new(); + /// let resp = client.delete("http://httpbin.org/delete") + /// .basic_auth("admin", Some("good password")) + /// .send() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + pub fn basic_auth(self, username: U, password: Option

) -> RequestBuilder + where + U: fmt::Display, + P: fmt::Display, + { + let header_value = crate::util::basic_auth(username, password); + self.header_sensitive(crate::header::AUTHORIZATION, header_value, true) + } + + /// Enable HTTP bearer authentication. + pub fn bearer_auth(self, token: T) -> RequestBuilder + where + T: fmt::Display, + { + let header_value = format!("Bearer {token}"); + self.header_sensitive(crate::header::AUTHORIZATION, header_value, true) + } + + /// Set the request body. + pub fn body>(mut self, body: T) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.body_mut() = Some(body.into()); + } + self + } + + /// Enables a request timeout. + /// + /// The timeout is applied from when the request starts connecting until the + /// response body has finished. It affects only this request and overrides + /// the timeout configured using `ClientBuilder::timeout()`. + pub fn timeout(mut self, timeout: Duration) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.timeout_mut() = Some(timeout); + } + self + } + + /// Sends a multipart/form-data body. + /// + /// ``` + /// # use reqwest::Error; + /// + /// # async fn run() -> Result<(), Error> { + /// let client = reqwest::Client::new(); + /// let form = reqwest::multipart::Form::new() + /// .text("key3", "value3") + /// .text("key4", "value4"); + /// + /// + /// let response = client.post("your url") + /// .multipart(form) + /// .send() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// In additional the request's body, the Content-Type and Content-Length fields are + /// appropriately set. + #[cfg(feature = "multipart")] + #[cfg_attr(docsrs, doc(cfg(feature = "multipart")))] + pub fn multipart(self, mut multipart: multipart::Form) -> RequestBuilder { + let mut builder = self.header( + CONTENT_TYPE, + format!("multipart/form-data; boundary={}", multipart.boundary()).as_str(), + ); + + builder = match multipart.compute_length() { + Some(length) => builder.header(CONTENT_LENGTH, length), + None => builder, + }; + + if let Ok(ref mut req) = builder.request { + *req.body_mut() = Some(multipart.stream()) + } + builder + } + + /// Modify the query string of the URL. + /// + /// Modifies the URL of this request, adding the parameters provided. + /// This method appends and does not overwrite. This means that it can + /// be called multiple times and that existing query parameters are not + /// overwritten if the same key is used. The key will simply show up + /// twice in the query string. + /// Calling `.query(&[("foo", "a"), ("foo", "b")])` gives `"foo=a&foo=b"`. + /// + /// # Note + /// This method does not support serializing a single key-value + /// pair. Instead of using `.query(("key", "val"))`, use a sequence, such + /// as `.query(&[("key", "val")])`. It's also possible to serialize structs + /// and maps into a key-value pair. + /// + /// # Errors + /// This method will fail if the object you provide cannot be serialized + /// into a query string. + pub fn query(mut self, query: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + let url = req.url_mut(); + let mut pairs = url.query_pairs_mut(); + let serializer = serde_urlencoded::Serializer::new(&mut pairs); + + if let Err(err) = query.serialize(serializer) { + error = Some(crate::error::builder(err)); + } + } + if let Ok(ref mut req) = self.request { + if let Some("") = req.url().query() { + req.url_mut().set_query(None); + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Set HTTP version + pub fn version(mut self, version: Version) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.version = version; + } + self + } + + /// Send a form body. + /// + /// Sets the body to the url encoded serialization of the passed value, + /// and also sets the `Content-Type: application/x-www-form-urlencoded` + /// header. + /// + /// ```rust + /// # use reqwest::Error; + /// # use std::collections::HashMap; + /// # + /// # async fn run() -> Result<(), Error> { + /// let mut params = HashMap::new(); + /// params.insert("lang", "rust"); + /// + /// let client = reqwest::Client::new(); + /// let res = client.post("http://httpbin.org") + /// .form(¶ms) + /// .send() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// This method fails if the passed value cannot be serialized into + /// url encoded format + pub fn form(mut self, form: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_urlencoded::to_string(form) { + Ok(body) => { + req.headers_mut() + .entry(CONTENT_TYPE) + .or_insert(HeaderValue::from_static( + "application/x-www-form-urlencoded", + )); + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Send a JSON body. + /// + /// # Optional + /// + /// This requires the optional `json` feature enabled. + /// + /// # Errors + /// + /// Serialization can fail if `T`'s implementation of `Serialize` decides to + /// fail, or if `T` contains a map with non-string keys. + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + pub fn json(mut self, json: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_json::to_vec(json) { + Ok(body) => { + if !req.headers().contains_key(CONTENT_TYPE) { + req.headers_mut() + .insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + } + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + // This was a shell only meant to help with rendered documentation. + // However, docs.rs can now show the docs for the wasm platforms, so this + // is no longer needed. + // + // You should not otherwise depend on this function. It's deprecation + // is just to nudge people to reduce breakage. It may be removed in a + // future patch version. + #[doc(hidden)] + #[cfg_attr(target_arch = "wasm32", deprecated)] + pub fn fetch_mode_no_cors(self) -> RequestBuilder { + self + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + pub fn build(self) -> crate::Result { + self.request + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + /// + /// This is similar to [`RequestBuilder::build()`], but also returns the + /// embedded `Client`. + pub fn build_split(self) -> (Client, crate::Result) { + (self.client, self.request) + } + + /// Constructs the Request and sends it to the target URL, returning a + /// future Response. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request, + /// redirect loop was detected or redirect limit was exhausted. + /// + /// # Example + /// + /// ```no_run + /// # use reqwest::Error; + /// # + /// # async fn run() -> Result<(), Error> { + /// let response = reqwest::Client::new() + /// .get("https://hyper.rs") + /// .send() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + pub fn send(self) -> impl Future> { + match self.request { + Ok(req) => self.client.execute_request(req), + Err(err) => Pending::new_err(err), + } + } + + /// Attempt to clone the RequestBuilder. + /// + /// `None` is returned if the RequestBuilder can not be cloned, + /// i.e. if the request body is a stream. + /// + /// # Examples + /// + /// ``` + /// # use reqwest::Error; + /// # + /// # fn run() -> Result<(), Error> { + /// let client = reqwest::Client::new(); + /// let builder = client.post("http://httpbin.org/post") + /// .body("from a &str!"); + /// let clone = builder.try_clone(); + /// assert!(clone.is_some()); + /// # Ok(()) + /// # } + /// ``` + pub fn try_clone(&self) -> Option { + self.request + .as_ref() + .ok() + .and_then(|req| req.try_clone()) + .map(|req| RequestBuilder { + client: self.client.clone(), + request: Ok(req), + }) + } +} + +impl fmt::Debug for Request { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt_request_fields(&mut f.debug_struct("Request"), self).finish() + } +} + +impl fmt::Debug for RequestBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("RequestBuilder"); + match self.request { + Ok(ref req) => fmt_request_fields(&mut builder, req).finish(), + Err(ref err) => builder.field("error", err).finish(), + } + } +} + +fn fmt_request_fields<'a, 'b>( + f: &'a mut fmt::DebugStruct<'a, 'b>, + req: &Request, +) -> &'a mut fmt::DebugStruct<'a, 'b> { + f.field("method", &req.method) + .field("url", &req.url) + .field("headers", &req.headers) +} + +/// Check the request URL for a "username:password" type authority, and if +/// found, remove it from the URL and return it. +pub(crate) fn extract_authority(url: &mut Url) -> Option<(String, Option)> { + use percent_encoding::percent_decode; + + if url.has_authority() { + let username: String = percent_decode(url.username().as_bytes()) + .decode_utf8() + .ok()? + .into(); + let password = url.password().and_then(|pass| { + percent_decode(pass.as_bytes()) + .decode_utf8() + .ok() + .map(String::from) + }); + if !username.is_empty() || password.is_some() { + url.set_username("") + .expect("has_authority means set_username shouldn't fail"); + url.set_password(None) + .expect("has_authority means set_password shouldn't fail"); + return Some((username, password)); + } + } + + None +} + +impl TryFrom> for Request +where + T: Into, +{ + type Error = crate::Error; + + fn try_from(req: HttpRequest) -> crate::Result { + let (parts, body) = req.into_parts(); + let Parts { + method, + uri, + headers, + version, + extensions, + .. + } = parts; + let url = Url::parse(&uri.to_string()).map_err(crate::error::builder)?; + Ok(Request { + method, + url, + headers, + body: Some(body.into()), + version, + extensions, + }) + } +} + +impl TryFrom for HttpRequest { + type Error = crate::Error; + + fn try_from(req: Request) -> crate::Result { + let Request { + method, + url, + headers, + body, + version, + extensions, + .. + } = req; + + let mut req = HttpRequest::builder() + .version(version) + .method(method) + .uri(url.as_str()) + .body(body.unwrap_or_else(Body::empty)) + .map_err(crate::error::builder)?; + + *req.headers_mut() = headers; + *req.extensions_mut() = extensions; + Ok(req) + } +} + +#[cfg(test)] +mod tests { + #![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] + + use super::{Client, HttpRequest, Request, RequestBuilder, Version}; + use crate::Method; + use serde::Serialize; + use std::collections::BTreeMap; + use std::convert::TryFrom; + + #[test] + fn add_query_append() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.get(some_url); + + let r = r.query(&[("foo", "bar")]); + let r = r.query(&[("qux", 3)]); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=3")); + } + + #[test] + fn add_query_append_same() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.get(some_url); + + let r = r.query(&[("foo", "a"), ("foo", "b")]); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=a&foo=b")); + } + + #[test] + fn add_query_struct() { + #[derive(Serialize)] + struct Params { + foo: String, + qux: i32, + } + + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.get(some_url); + + let params = Params { + foo: "bar".into(), + qux: 3, + }; + + let r = r.query(¶ms); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=3")); + } + + #[test] + fn add_query_map() { + let mut params = BTreeMap::new(); + params.insert("foo", "bar"); + params.insert("qux", "three"); + + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.get(some_url); + + let r = r.query(¶ms); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=three")); + } + + #[test] + fn test_replace_headers() { + use http::HeaderMap; + + let mut headers = HeaderMap::new(); + headers.insert("foo", "bar".parse().unwrap()); + headers.append("foo", "baz".parse().unwrap()); + + let client = Client::new(); + let req = client + .get("https://hyper.rs") + .header("im-a", "keeper") + .header("foo", "pop me") + .headers(headers) + .build() + .expect("request build"); + + assert_eq!(req.headers()["im-a"], "keeper"); + + let foo = req.headers().get_all("foo").iter().collect::>(); + assert_eq!(foo.len(), 2); + assert_eq!(foo[0], "bar"); + assert_eq!(foo[1], "baz"); + } + + #[test] + fn normalize_empty_query() { + let client = Client::new(); + let some_url = "https://google.com/"; + let empty_query: &[(&str, &str)] = &[]; + + let req = client + .get(some_url) + .query(empty_query) + .build() + .expect("request build"); + + assert_eq!(req.url().query(), None); + assert_eq!(req.url().as_str(), "https://google.com/"); + } + + #[test] + fn try_clone_reusable() { + let client = Client::new(); + let builder = client + .post("http://httpbin.org/post") + .header("foo", "bar") + .body("from a &str!"); + let req = builder + .try_clone() + .expect("clone successful") + .build() + .expect("request is valid"); + assert_eq!(req.url().as_str(), "http://httpbin.org/post"); + assert_eq!(req.method(), Method::POST); + assert_eq!(req.headers()["foo"], "bar"); + } + + #[test] + fn try_clone_no_body() { + let client = Client::new(); + let builder = client.get("http://httpbin.org/get"); + let req = builder + .try_clone() + .expect("clone successful") + .build() + .expect("request is valid"); + assert_eq!(req.url().as_str(), "http://httpbin.org/get"); + assert_eq!(req.method(), Method::GET); + assert!(req.body().is_none()); + } + + #[test] + #[cfg(feature = "stream")] + fn try_clone_stream() { + let chunks: Vec> = vec![Ok("hello"), Ok(" "), Ok("world")]; + let stream = futures_util::stream::iter(chunks); + let client = Client::new(); + let builder = client + .get("http://httpbin.org/get") + .body(super::Body::wrap_stream(stream)); + let clone = builder.try_clone(); + assert!(clone.is_none()); + } + + #[test] + fn convert_url_authority_into_basic_auth() { + let client = Client::new(); + let some_url = "https://Aladdin:open sesame@localhost/"; + + let req = client.get(some_url).build().expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!( + req.headers()["authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + } + + #[test] + fn test_basic_auth_sensitive_header() { + let client = Client::new(); + let some_url = "https://localhost/"; + + let req = client + .get(some_url) + .basic_auth("Aladdin", Some("open sesame")) + .build() + .expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!( + req.headers()["authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + assert!(req.headers()["authorization"].is_sensitive()); + } + + #[test] + fn test_bearer_auth_sensitive_header() { + let client = Client::new(); + let some_url = "https://localhost/"; + + let req = client + .get(some_url) + .bearer_auth("Hold my bear") + .build() + .expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!(req.headers()["authorization"], "Bearer Hold my bear"); + assert!(req.headers()["authorization"].is_sensitive()); + } + + #[test] + fn test_explicit_sensitive_header() { + let client = Client::new(); + let some_url = "https://localhost/"; + + let mut header = http::HeaderValue::from_static("in plain sight"); + header.set_sensitive(true); + + let req = client + .get(some_url) + .header("hiding", header) + .build() + .expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!(req.headers()["hiding"], "in plain sight"); + assert!(req.headers()["hiding"].is_sensitive()); + } + + #[test] + fn convert_from_http_request() { + let http_request = HttpRequest::builder() + .method("GET") + .uri("http://localhost/") + .header("User-Agent", "my-awesome-agent/1.0") + .body("test test test") + .unwrap(); + let req: Request = Request::try_from(http_request).unwrap(); + assert!(req.body().is_some()); + let test_data = b"test test test"; + assert_eq!(req.body().unwrap().as_bytes(), Some(&test_data[..])); + let headers = req.headers(); + assert_eq!(headers.get("User-Agent").unwrap(), "my-awesome-agent/1.0"); + assert_eq!(req.method(), Method::GET); + assert_eq!(req.url().as_str(), "http://localhost/"); + } + + #[test] + fn set_http_request_version() { + let http_request = HttpRequest::builder() + .method("GET") + .uri("http://localhost/") + .header("User-Agent", "my-awesome-agent/1.0") + .version(Version::HTTP_11) + .body("test test test") + .unwrap(); + let req: Request = Request::try_from(http_request).unwrap(); + assert!(req.body().is_some()); + let test_data = b"test test test"; + assert_eq!(req.body().unwrap().as_bytes(), Some(&test_data[..])); + let headers = req.headers(); + assert_eq!(headers.get("User-Agent").unwrap(), "my-awesome-agent/1.0"); + assert_eq!(req.method(), Method::GET); + assert_eq!(req.url().as_str(), "http://localhost/"); + assert_eq!(req.version(), Version::HTTP_11); + } + + #[test] + fn builder_split_reassemble() { + let builder = { + let client = Client::new(); + client.get("http://example.com") + }; + let (client, inner) = builder.build_split(); + let request = inner.unwrap(); + let builder = RequestBuilder::from_parts(client, request); + builder.build().unwrap(); + } + + /* + use {body, Method}; + use super::Client; + use header::{Host, Headers, ContentType}; + use std::collections::HashMap; + use serde_urlencoded; + use serde_json; + + #[test] + fn basic_get_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.get(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Get); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn basic_head_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.head(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Head); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn basic_post_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.post(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Post); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn basic_put_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.put(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Put); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn basic_patch_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.patch(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Patch); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn basic_delete_request() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let r = client.delete(some_url).unwrap().build(); + + assert_eq!(r.method, Method::Delete); + assert_eq!(r.url.as_str(), some_url); + } + + #[test] + fn add_header() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let header = Host { + hostname: "google.com".to_string(), + port: None, + }; + + // Add a copy of the header to the request builder + let r = r.header(header.clone()).build(); + + // then check it was actually added + assert_eq!(r.headers.get::(), Some(&header)); + } + + #[test] + fn add_headers() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let header = Host { + hostname: "google.com".to_string(), + port: None, + }; + + let mut headers = Headers::new(); + headers.set(header); + + // Add a copy of the headers to the request builder + let r = r.headers(headers.clone()).build(); + + // then make sure they were added correctly + assert_eq!(r.headers, headers); + } + + #[test] + fn add_headers_multi() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let header = Host { + hostname: "google.com".to_string(), + port: None, + }; + + let mut headers = Headers::new(); + headers.set(header); + + // Add a copy of the headers to the request builder + let r = r.headers(headers.clone()).build(); + + // then make sure they were added correctly + assert_eq!(r.headers, headers); + } + + #[test] + fn add_body() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let body = "Some interesting content"; + + let r = r.body(body).build(); + + let buf = body::read_to_string(r.body.unwrap()).unwrap(); + + assert_eq!(buf, body); + } + + #[test] + fn add_form() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let mut form_data = HashMap::new(); + form_data.insert("foo", "bar"); + + let r = r.form(&form_data).unwrap().build(); + + // Make sure the content type was set + assert_eq!(r.headers.get::(), + Some(&ContentType::form_url_encoded())); + + let buf = body::read_to_string(r.body.unwrap()).unwrap(); + + let body_should_be = serde_urlencoded::to_string(&form_data).unwrap(); + assert_eq!(buf, body_should_be); + } + + #[test] + fn add_json() { + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + + let mut json_data = HashMap::new(); + json_data.insert("foo", "bar"); + + let r = r.json(&json_data).unwrap().build(); + + // Make sure the content type was set + assert_eq!(r.headers.get::(), Some(&ContentType::json())); + + let buf = body::read_to_string(r.body.unwrap()).unwrap(); + + let body_should_be = serde_json::to_string(&json_data).unwrap(); + assert_eq!(buf, body_should_be); + } + + #[test] + fn add_json_fail() { + use serde::{Serialize, Serializer}; + use serde::ser::Error; + struct MyStruct; + impl Serialize for MyStruct { + fn serialize(&self, _serializer: S) -> Result + where S: Serializer + { + Err(S::Error::custom("nope")) + } + } + + let client = Client::new().unwrap(); + let some_url = "https://google.com/"; + let mut r = client.post(some_url).unwrap(); + let json_data = MyStruct{}; + assert!(r.json(&json_data).unwrap_err().is_serialization()); + } + */ +} diff --git a/rust/reqwest/src/async_impl/response.rs b/rust/reqwest/src/async_impl/response.rs new file mode 100644 index 0000000000..4c0d52727f --- /dev/null +++ b/rust/reqwest/src/async_impl/response.rs @@ -0,0 +1,513 @@ +use std::fmt; +use std::net::SocketAddr; +use std::pin::Pin; +use std::time::Duration; + +use bytes::Bytes; +use http_body_util::BodyExt; +use hyper::{HeaderMap, StatusCode, Version}; +use hyper_util::client::legacy::connect::HttpInfo; +#[cfg(feature = "json")] +use serde::de::DeserializeOwned; +#[cfg(feature = "json")] +use serde_json; +use tokio::time::Sleep; +use url::Url; + +use super::body::Body; +use super::decoder::{Accepts, Decoder}; +use crate::async_impl::body::ResponseBody; +#[cfg(feature = "cookies")] +use crate::cookie; + +#[cfg(feature = "charset")] +use encoding_rs::{Encoding, UTF_8}; +#[cfg(feature = "charset")] +use mime::Mime; + +/// A Response to a submitted `Request`. +pub struct Response { + pub(super) res: hyper::Response, + // Boxed to save space (11 words to 1 word), and it's not accessed + // frequently internally. + url: Box, +} + +impl Response { + pub(super) fn new( + res: hyper::Response, + url: Url, + accepts: Accepts, + total_timeout: Option>>, + read_timeout: Option, + ) -> Response { + let (mut parts, body) = res.into_parts(); + let decoder = Decoder::detect( + &mut parts.headers, + super::body::response(body, total_timeout, read_timeout), + accepts, + ); + let res = hyper::Response::from_parts(parts, decoder); + + Response { + res, + url: Box::new(url), + } + } + + /// Get the `StatusCode` of this `Response`. + #[inline] + pub fn status(&self) -> StatusCode { + self.res.status() + } + + /// Get the HTTP `Version` of this `Response`. + #[inline] + pub fn version(&self) -> Version { + self.res.version() + } + + /// Get the `Headers` of this `Response`. + #[inline] + pub fn headers(&self) -> &HeaderMap { + self.res.headers() + } + + /// Get a mutable reference to the `Headers` of this `Response`. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + self.res.headers_mut() + } + + /// Get the content length of the response, if it is known. + /// + /// This value does not directly represents the value of the `Content-Length` + /// header, but rather the size of the response's body. To read the header's + /// value, please use the [`Response::headers`] method instead. + /// + /// Reasons it may not be known: + /// + /// - The response does not include a body (e.g. it responds to a `HEAD` + /// request). + /// - The response is gzipped and automatically decoded (thus changing the + /// actual decoded length). + pub fn content_length(&self) -> Option { + use hyper::body::Body; + + Body::size_hint(self.res.body()).exact() + } + + /// Retrieve the cookies contained in the response. + /// + /// Note that invalid 'Set-Cookie' headers will be ignored. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookies<'a>(&'a self) -> impl Iterator> + 'a { + cookie::extract_response_cookies(self.res.headers()).filter_map(Result::ok) + } + + /// Get the final `Url` of this `Response`. + #[inline] + pub fn url(&self) -> &Url { + &self.url + } + + /// Get the remote address used to get this `Response`. + pub fn remote_addr(&self) -> Option { + self.res + .extensions() + .get::() + .map(|info| info.remote_addr()) + } + + /// Returns a reference to the associated extensions. + pub fn extensions(&self) -> &http::Extensions { + self.res.extensions() + } + + /// Returns a mutable reference to the associated extensions. + pub fn extensions_mut(&mut self) -> &mut http::Extensions { + self.res.extensions_mut() + } + + // body methods + + /// Get the full response text. + /// + /// This method decodes the response body with BOM sniffing + /// and with malformed sequences replaced with the + /// [`char::REPLACEMENT_CHARACTER`]. + /// Encoding is determined from the `charset` parameter of `Content-Type` header, + /// and defaults to `utf-8` if not presented. + /// + /// Note that the BOM is stripped from the returned String. + /// + /// # Note + /// + /// If the `charset` feature is disabled the method will only attempt to decode the + /// response as UTF-8, regardless of the given `Content-Type` + /// + /// # Example + /// + /// ``` + /// # async fn run() -> Result<(), Box> { + /// let content = reqwest::get("http://httpbin.org/range/26") + /// .await? + /// .text() + /// .await?; + /// + /// println!("text: {content:?}"); + /// # Ok(()) + /// # } + /// ``` + pub async fn text(self) -> crate::Result { + #[cfg(feature = "charset")] + { + self.text_with_charset("utf-8").await + } + + #[cfg(not(feature = "charset"))] + { + let full = self.bytes().await?; + let text = String::from_utf8_lossy(&full); + Ok(text.into_owned()) + } + } + + /// Get the full response text given a specific encoding. + /// + /// This method decodes the response body with BOM sniffing + /// and with malformed sequences replaced with the [`char::REPLACEMENT_CHARACTER`]. + /// You can provide a default encoding for decoding the raw message, while the + /// `charset` parameter of `Content-Type` header is still prioritized. For more information + /// about the possible encoding name, please go to [`encoding_rs`] docs. + /// + /// Note that the BOM is stripped from the returned String. + /// + /// [`encoding_rs`]: https://docs.rs/encoding_rs/0.8/encoding_rs/#relationship-with-windows-code-pages + /// + /// # Optional + /// + /// This requires the optional `encoding_rs` feature enabled. + /// + /// # Example + /// + /// ``` + /// # async fn run() -> Result<(), Box> { + /// let content = reqwest::get("http://httpbin.org/range/26") + /// .await? + /// .text_with_charset("utf-8") + /// .await?; + /// + /// println!("text: {content:?}"); + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "charset")] + #[cfg_attr(docsrs, doc(cfg(feature = "charset")))] + pub async fn text_with_charset(self, default_encoding: &str) -> crate::Result { + let content_type = self + .headers() + .get(crate::header::CONTENT_TYPE) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse::().ok()); + let encoding_name = content_type + .as_ref() + .and_then(|mime| mime.get_param("charset").map(|charset| charset.as_str())) + .unwrap_or(default_encoding); + let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8); + + let full = self.bytes().await?; + + let (text, _, _) = encoding.decode(&full); + Ok(text.into_owned()) + } + + /// Try to deserialize the response body as JSON. + /// + /// # Optional + /// + /// This requires the optional `json` feature enabled. + /// + /// # Examples + /// + /// ``` + /// # extern crate reqwest; + /// # extern crate serde; + /// # + /// # use reqwest::Error; + /// # use serde::Deserialize; + /// # + /// // This `derive` requires the `serde` dependency. + /// #[derive(Deserialize)] + /// struct Ip { + /// origin: String, + /// } + /// + /// # async fn run() -> Result<(), Error> { + /// let ip = reqwest::get("http://httpbin.org/ip") + /// .await? + /// .json::() + /// .await?; + /// + /// println!("ip: {}", ip.origin); + /// # Ok(()) + /// # } + /// # + /// # fn main() { } + /// ``` + /// + /// # Errors + /// + /// This method fails whenever the response body is not in JSON format, + /// or it cannot be properly deserialized to target type `T`. For more + /// details please see [`serde_json::from_reader`]. + /// + /// [`serde_json::from_reader`]: https://docs.serde.rs/serde_json/fn.from_reader.html + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + pub async fn json(self) -> crate::Result { + let full = self.bytes().await?; + + serde_json::from_slice(&full).map_err(crate::error::decode) + } + + /// Get the full response body as `Bytes`. + /// + /// # Example + /// + /// ``` + /// # async fn run() -> Result<(), Box> { + /// let bytes = reqwest::get("http://httpbin.org/ip") + /// .await? + /// .bytes() + /// .await?; + /// + /// println!("bytes: {bytes:?}"); + /// # Ok(()) + /// # } + /// ``` + pub async fn bytes(self) -> crate::Result { + use http_body_util::BodyExt; + + BodyExt::collect(self.res.into_body()) + .await + .map(|buf| buf.to_bytes()) + } + + /// Stream a chunk of the response body. + /// + /// When the response body has been exhausted, this will return `None`. + /// + /// # Example + /// + /// ``` + /// # async fn run() -> Result<(), Box> { + /// let mut res = reqwest::get("https://hyper.rs").await?; + /// + /// while let Some(chunk) = res.chunk().await? { + /// println!("Chunk: {chunk:?}"); + /// } + /// # Ok(()) + /// # } + /// ``` + pub async fn chunk(&mut self) -> crate::Result> { + use http_body_util::BodyExt; + + // loop to ignore unrecognized frames + loop { + if let Some(res) = self.res.body_mut().frame().await { + let frame = res?; + if let Ok(buf) = frame.into_data() { + return Ok(Some(buf)); + } + // else continue + } else { + return Ok(None); + } + } + } + + /// Convert the response into a `Stream` of `Bytes` from the body. + /// + /// # Example + /// + /// ``` + /// use futures_util::StreamExt; + /// + /// # async fn run() -> Result<(), Box> { + /// let mut stream = reqwest::get("http://httpbin.org/ip") + /// .await? + /// .bytes_stream(); + /// + /// while let Some(item) = stream.next().await { + /// println!("Chunk: {:?}", item?); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the optional `stream` feature to be enabled. + #[cfg(feature = "stream")] + #[cfg_attr(docsrs, doc(cfg(feature = "stream")))] + pub fn bytes_stream(self) -> impl futures_core::Stream> { + super::body::DataStream(self.res.into_body()) + } + + // util methods + + /// Turn a response into an error if the server returned an error. + /// + /// # Example + /// + /// ``` + /// # use reqwest::Response; + /// fn on_response(res: Response) { + /// match res.error_for_status() { + /// Ok(_res) => (), + /// Err(err) => { + /// // asserting a 400 as an example + /// // it could be any status between 400...599 + /// assert_eq!( + /// err.status(), + /// Some(reqwest::StatusCode::BAD_REQUEST) + /// ); + /// } + /// } + /// } + /// # fn main() {} + /// ``` + pub fn error_for_status(self) -> crate::Result { + let status = self.status(); + let reason = self.extensions().get::().cloned(); + if status.is_client_error() || status.is_server_error() { + Err(crate::error::status_code(*self.url, status, reason)) + } else { + Ok(self) + } + } + + /// Turn a reference to a response into an error if the server returned an error. + /// + /// # Example + /// + /// ``` + /// # use reqwest::Response; + /// fn on_response(res: &Response) { + /// match res.error_for_status_ref() { + /// Ok(_res) => (), + /// Err(err) => { + /// // asserting a 400 as an example + /// // it could be any status between 400...599 + /// assert_eq!( + /// err.status(), + /// Some(reqwest::StatusCode::BAD_REQUEST) + /// ); + /// } + /// } + /// } + /// # fn main() {} + /// ``` + pub fn error_for_status_ref(&self) -> crate::Result<&Self> { + let status = self.status(); + let reason = self.extensions().get::().cloned(); + if status.is_client_error() || status.is_server_error() { + Err(crate::error::status_code(*self.url.clone(), status, reason)) + } else { + Ok(self) + } + } + + // private + + // The Response's body is an implementation detail. + // You no longer need to get a reference to it, there are async methods + // on the `Response` itself. + // + // This method is just used by the blocking API. + #[cfg(feature = "blocking")] + pub(crate) fn body_mut(&mut self) -> &mut Decoder { + self.res.body_mut() + } +} + +impl fmt::Debug for Response { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Response") + .field("url", &self.url().as_str()) + .field("status", &self.status()) + .field("headers", self.headers()) + .finish() + } +} + +/// A `Response` can be piped as the `Body` of another request. +impl From for Body { + fn from(r: Response) -> Body { + Body::wrap(r.res.into_body()) + } +} + +// I'm not sure this conversion is that useful... People should be encouraged +// to use `http::Response`, not `reqwest::Response`. +impl> From> for Response { + fn from(r: http::Response) -> Response { + use crate::response::ResponseUrl; + + let (mut parts, body) = r.into_parts(); + let body: crate::async_impl::body::Body = body.into(); + let decoder = Decoder::detect( + &mut parts.headers, + ResponseBody::new(body.map_err(Into::into)), + Accepts::none(), + ); + let url = parts + .extensions + .remove::() + .unwrap_or_else(|| ResponseUrl(Url::parse("http://no.url.provided.local").unwrap())); + let url = url.0; + let res = hyper::Response::from_parts(parts, decoder); + Response { + res, + url: Box::new(url), + } + } +} + +/// A `Response` can be converted into a `http::Response`. +// It's supposed to be the inverse of the conversion above. +impl From for http::Response { + fn from(r: Response) -> http::Response { + let (parts, body) = r.res.into_parts(); + let body = Body::wrap(body); + http::Response::from_parts(parts, body) + } +} + +#[cfg(test)] +mod tests { + use super::Response; + use crate::ResponseBuilderExt; + use http::response::Builder; + use url::Url; + + #[test] + fn test_from_http_response() { + let url = Url::parse("http://example.com").unwrap(); + let response = Builder::new() + .status(200) + .url(url.clone()) + .body("foo") + .unwrap(); + let response = Response::from(response); + + assert_eq!(response.status(), 200); + assert_eq!(*response.url(), url); + } +} diff --git a/rust/reqwest/src/async_impl/upgrade.rs b/rust/reqwest/src/async_impl/upgrade.rs new file mode 100644 index 0000000000..5a6ed6cbbc --- /dev/null +++ b/rust/reqwest/src/async_impl/upgrade.rs @@ -0,0 +1,75 @@ +use std::pin::Pin; +use std::task::{self, Poll}; +use std::{fmt, io}; + +use hyper_util::rt::TokioIo; +use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; + +/// An upgraded HTTP connection. +pub struct Upgraded { + inner: TokioIo, +} + +impl AsyncRead for Upgraded { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + Pin::new(&mut self.inner).poll_read(cx, buf) + } +} + +impl AsyncWrite for Upgraded { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + buf: &[u8], + ) -> Poll> { + Pin::new(&mut self.inner).poll_write(cx, buf) + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + bufs: &[io::IoSlice<'_>], + ) -> Poll> { + Pin::new(&mut self.inner).poll_write_vectored(cx, bufs) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll> { + Pin::new(&mut self.inner).poll_flush(cx) + } + + fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll> { + Pin::new(&mut self.inner).poll_shutdown(cx) + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } +} + +impl fmt::Debug for Upgraded { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Upgraded").finish() + } +} + +impl From for Upgraded { + fn from(inner: hyper::upgrade::Upgraded) -> Self { + Upgraded { + inner: TokioIo::new(inner), + } + } +} + +impl super::response::Response { + /// Consumes the response and returns a future for a possible HTTP upgrade. + pub async fn upgrade(self) -> crate::Result { + hyper::upgrade::on(self.res) + .await + .map(Upgraded::from) + .map_err(crate::error::upgrade) + } +} diff --git a/rust/reqwest/src/blocking/body.rs b/rust/reqwest/src/blocking/body.rs new file mode 100644 index 0000000000..f213b55bc9 --- /dev/null +++ b/rust/reqwest/src/blocking/body.rs @@ -0,0 +1,372 @@ +use std::fmt; +use std::fs::File; +use std::future::Future; +#[cfg(feature = "multipart")] +use std::io::Cursor; +use std::io::{self, Read}; +use std::mem::{self, MaybeUninit}; +use std::ptr; + +use bytes::Bytes; +use futures_channel::mpsc; + +use crate::async_impl; + +/// The body of a `Request`. +/// +/// In most cases, this is not needed directly, as the +/// [`RequestBuilder.body`][builder] method uses `Into`, which allows +/// passing many things (like a string or vector of bytes). +/// +/// [builder]: ./struct.RequestBuilder.html#method.body +#[derive(Debug)] +pub struct Body { + kind: Kind, +} + +impl Body { + /// Instantiate a `Body` from a reader. + /// + /// # Note + /// + /// While allowing for many types to be used, these bodies do not have + /// a way to reset to the beginning and be reused. This means that when + /// encountering a 307 or 308 status code, instead of repeating the + /// request at the new location, the `Response` will be returned with + /// the redirect status code set. + /// + /// ```rust + /// # use std::fs::File; + /// # use reqwest::blocking::Body; + /// # fn run() -> Result<(), Box> { + /// let file = File::open("national_secrets.txt")?; + /// let body = Body::new(file); + /// # Ok(()) + /// # } + /// ``` + /// + /// If you have a set of bytes, like `String` or `Vec`, using the + /// `From` implementations for `Body` will store the data in a manner + /// it can be reused. + /// + /// ```rust + /// # use reqwest::blocking::Body; + /// # fn run() -> Result<(), Box> { + /// let s = "A stringy body"; + /// let body = Body::from(s); + /// # Ok(()) + /// # } + /// ``` + pub fn new(reader: R) -> Body { + Body { + kind: Kind::Reader(Box::from(reader), None), + } + } + + /// Create a `Body` from a `Read` where the size is known in advance + /// but the data should not be fully loaded into memory. This will + /// set the `Content-Length` header and stream from the `Read`. + /// + /// ```rust + /// # use std::fs::File; + /// # use reqwest::blocking::Body; + /// # fn run() -> Result<(), Box> { + /// let file = File::open("a_large_file.txt")?; + /// let file_size = file.metadata()?.len(); + /// let body = Body::sized(file, file_size); + /// # Ok(()) + /// # } + /// ``` + pub fn sized(reader: R, len: u64) -> Body { + Body { + kind: Kind::Reader(Box::from(reader), Some(len)), + } + } + + /// Returns the body as a byte slice if the body is already buffered in + /// memory. For streamed requests this method returns `None`. + pub fn as_bytes(&self) -> Option<&[u8]> { + match self.kind { + Kind::Reader(_, _) => None, + Kind::Bytes(ref bytes) => Some(bytes.as_ref()), + } + } + + /// Converts streamed requests to their buffered equivalent and + /// returns a reference to the buffer. If the request is already + /// buffered, this has no effect. + /// + /// Be aware that for large requests this method is expensive + /// and may cause your program to run out of memory. + pub fn buffer(&mut self) -> Result<&[u8], crate::Error> { + match self.kind { + Kind::Reader(ref mut reader, maybe_len) => { + let mut bytes = if let Some(len) = maybe_len { + Vec::with_capacity(len as usize) + } else { + Vec::new() + }; + io::copy(reader, &mut bytes).map_err(crate::error::builder)?; + self.kind = Kind::Bytes(bytes.into()); + self.buffer() + } + Kind::Bytes(ref bytes) => Ok(bytes.as_ref()), + } + } + + #[cfg(feature = "multipart")] + pub(crate) fn len(&self) -> Option { + match self.kind { + Kind::Reader(_, len) => len, + Kind::Bytes(ref bytes) => Some(bytes.len() as u64), + } + } + + #[cfg(feature = "multipart")] + pub(crate) fn into_reader(self) -> Reader { + match self.kind { + Kind::Reader(r, _) => Reader::Reader(r), + Kind::Bytes(b) => Reader::Bytes(Cursor::new(b)), + } + } + + pub(crate) fn into_async(self) -> (Option, async_impl::Body, Option) { + match self.kind { + Kind::Reader(read, len) => { + let (tx, rx) = mpsc::channel(0); + let tx = Sender { + body: (read, len), + tx, + }; + (Some(tx), async_impl::Body::stream(rx), len) + } + Kind::Bytes(chunk) => { + let len = chunk.len() as u64; + (None, async_impl::Body::reusable(chunk), Some(len)) + } + } + } + + pub(crate) fn try_clone(&self) -> Option { + self.kind.try_clone().map(|kind| Body { kind }) + } +} + +enum Kind { + Reader(Box, Option), + Bytes(Bytes), +} + +impl Kind { + fn try_clone(&self) -> Option { + match self { + Kind::Reader(..) => None, + Kind::Bytes(v) => Some(Kind::Bytes(v.clone())), + } + } +} + +impl From> for Body { + #[inline] + fn from(v: Vec) -> Body { + Body { + kind: Kind::Bytes(v.into()), + } + } +} + +impl From for Body { + #[inline] + fn from(s: String) -> Body { + s.into_bytes().into() + } +} + +impl From<&'static [u8]> for Body { + #[inline] + fn from(s: &'static [u8]) -> Body { + Body { + kind: Kind::Bytes(Bytes::from_static(s)), + } + } +} + +impl From<&'static str> for Body { + #[inline] + fn from(s: &'static str) -> Body { + s.as_bytes().into() + } +} + +impl From for Body { + #[inline] + fn from(f: File) -> Body { + let len = f.metadata().map(|m| m.len()).ok(); + Body { + kind: Kind::Reader(Box::new(f), len), + } + } +} +impl From for Body { + #[inline] + fn from(b: Bytes) -> Body { + Body { + kind: Kind::Bytes(b), + } + } +} + +impl fmt::Debug for Kind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Kind::Reader(_, ref v) => f + .debug_struct("Reader") + .field("length", &DebugLength(v)) + .finish(), + Kind::Bytes(ref v) => fmt::Debug::fmt(v, f), + } + } +} + +struct DebugLength<'a>(&'a Option); + +impl<'a> fmt::Debug for DebugLength<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self.0 { + Some(ref len) => fmt::Debug::fmt(len, f), + None => f.write_str("Unknown"), + } + } +} + +#[cfg(feature = "multipart")] +pub(crate) enum Reader { + Reader(Box), + Bytes(Cursor), +} + +#[cfg(feature = "multipart")] +impl Read for Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match *self { + Reader::Reader(ref mut rdr) => rdr.read(buf), + Reader::Bytes(ref mut rdr) => rdr.read(buf), + } + } +} + +pub(crate) struct Sender { + body: (Box, Option), + tx: mpsc::Sender>, +} + +#[derive(Debug)] +struct Abort; + +impl fmt::Display for Abort { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("abort request body") + } +} + +impl std::error::Error for Abort {} + +async fn send_future(sender: Sender) -> Result<(), crate::Error> { + use bytes::{BufMut, BytesMut}; + use futures_util::SinkExt; + use std::cmp; + + let con_len = sender.body.1; + let cap = cmp::min(sender.body.1.unwrap_or(8192), 8192); + let mut written = 0; + let mut buf = BytesMut::zeroed(cap as usize); + buf.clear(); + let mut body = sender.body.0; + // Put in an option so that it can be consumed on error to call abort() + let mut tx = Some(sender.tx); + + loop { + if Some(written) == con_len { + // Written up to content-length, so stop. + return Ok(()); + } + + // The input stream is read only if the buffer is empty so + // that there is only one read in the buffer at any time. + // + // We need to know whether there is any data to send before + // we check the transmission channel (with poll_ready below) + // because sometimes the receiver disappears as soon as it + // considers the data is completely transmitted, which may + // be true. + // + // The use case is a web server that closes its + // input stream as soon as the data received is valid JSON. + // This behaviour is questionable, but it exists and the + // fact is that there is actually no remaining data to read. + if buf.is_empty() { + if buf.capacity() == buf.len() { + buf.reserve(8192); + // zero out the reserved memory + let uninit = buf.spare_capacity_mut(); + let uninit_len = uninit.len(); + unsafe { + ptr::write_bytes(uninit.as_mut_ptr().cast::(), 0, uninit_len); + } + } + + let bytes = unsafe { + mem::transmute::<&mut [MaybeUninit], &mut [u8]>(buf.spare_capacity_mut()) + }; + match body.read(bytes) { + Ok(0) => { + // The buffer was empty and nothing's left to + // read. Return. + return Ok(()); + } + Ok(n) => unsafe { + buf.advance_mut(n); + }, + Err(e) => { + let _ = tx + .take() + .expect("tx only taken on error") + .clone() + .try_send(Err(Abort)); + return Err(crate::error::body(e)); + } + } + } + + // The only way to get here is when the buffer is not empty. + // We can check the transmission channel + + let buf_len = buf.len() as u64; + tx.as_mut() + .expect("tx only taken on error") + .send(Ok(buf.split().freeze())) + .await + .map_err(crate::error::body)?; + + written += buf_len; + } +} + +impl Sender { + // A `Future` that may do blocking read calls. + // As a `Future`, this integrates easily with `wait::timeout`. + pub(crate) fn send(self) -> impl Future> { + send_future(self) + } +} + +// useful for tests, but not publicly exposed +#[cfg(test)] +pub(crate) fn read_to_string(mut body: Body) -> io::Result { + let mut s = String::new(); + match body.kind { + Kind::Reader(ref mut reader, _) => reader.read_to_string(&mut s), + Kind::Bytes(ref mut bytes) => (&**bytes).read_to_string(&mut s), + } + .map(|_| s) +} diff --git a/rust/reqwest/src/blocking/client.rs b/rust/reqwest/src/blocking/client.rs new file mode 100644 index 0000000000..255d9d8aa4 --- /dev/null +++ b/rust/reqwest/src/blocking/client.rs @@ -0,0 +1,1524 @@ +#[cfg(any(feature = "native-tls", feature = "__rustls",))] +use std::any::Any; +use std::convert::TryInto; +use std::fmt; +use std::future::Future; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::sync::Arc; +use std::task::{ready, Poll}; +use std::thread; +use std::time::Duration; + +use http::header::HeaderValue; +use log::{error, trace}; +use tokio::sync::{mpsc, oneshot}; +use tower::Layer; +use tower::Service; + +use super::request::{Request, RequestBuilder}; +use super::response::Response; +use super::wait; +use crate::connect::sealed::{Conn, Unnameable}; +#[cfg(unix)] +use crate::connect::uds::UnixSocketProvider; +use crate::connect::BoxedConnectorService; +use crate::dns::Resolve; +use crate::error::BoxError; +#[cfg(feature = "__tls")] +use crate::tls; +#[cfg(feature = "__rustls")] +use crate::tls::CertificateRevocationList; +#[cfg(feature = "__tls")] +use crate::Certificate; +#[cfg(any(feature = "native-tls", feature = "__rustls"))] +use crate::Identity; +use crate::{async_impl, header, redirect, IntoUrl, Method, Proxy}; + +/// A `Client` to make Requests with. +/// +/// The Client has various configuration values to tweak, but the defaults +/// are set to what is usually the most commonly desired value. To configure a +/// `Client`, use `Client::builder()`. +/// +/// The `Client` holds a connection pool internally, so it is advised that +/// you create one and **reuse** it. +/// +/// # Examples +/// +/// ```rust +/// use reqwest::blocking::Client; +/// # +/// # fn run() -> Result<(), reqwest::Error> { +/// let client = Client::new(); +/// let resp = client.get("http://httpbin.org/").send()?; +/// # drop(resp); +/// # Ok(()) +/// # } +/// +/// ``` +#[derive(Clone)] +pub struct Client { + inner: ClientHandle, +} + +/// A `ClientBuilder` can be used to create a `Client` with custom configuration. +/// +/// # Example +/// +/// ``` +/// # fn run() -> Result<(), reqwest::Error> { +/// use std::time::Duration; +/// +/// let client = reqwest::blocking::Client::builder() +/// .timeout(Duration::from_secs(10)) +/// .build()?; +/// # Ok(()) +/// # } +/// ``` +#[must_use] +pub struct ClientBuilder { + inner: async_impl::ClientBuilder, + timeout: Timeout, +} + +impl Default for ClientBuilder { + fn default() -> Self { + Self::new() + } +} + +impl ClientBuilder { + /// Constructs a new `ClientBuilder`. + /// + /// This is the same as `Client::builder()`. + pub fn new() -> Self { + ClientBuilder { + inner: async_impl::ClientBuilder::new(), + timeout: Timeout::default(), + } + } +} + +impl ClientBuilder { + /// Returns a `Client` that uses this `ClientBuilder` configuration. + /// + /// # Errors + /// + /// This method fails if TLS backend cannot be initialized, or the resolver + /// cannot load the system configuration. + /// + /// # Panics + /// + /// This method panics if called from within an async runtime. See docs on + /// [`reqwest::blocking`][crate::blocking] for details. + pub fn build(self) -> crate::Result { + ClientHandle::new(self).map(|handle| Client { inner: handle }) + } + + // Higher-level options + + /// Sets the `User-Agent` header to be used by this client. + /// + /// # Example + /// + /// ```rust + /// # fn doc() -> Result<(), reqwest::Error> { + /// // Name your user agent after your app? + /// static APP_USER_AGENT: &str = concat!( + /// env!("CARGO_PKG_NAME"), + /// "/", + /// env!("CARGO_PKG_VERSION"), + /// ); + /// + /// let client = reqwest::blocking::Client::builder() + /// .user_agent(APP_USER_AGENT) + /// .build()?; + /// let res = client.get("https://www.rust-lang.org").send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn user_agent(self, value: V) -> ClientBuilder + where + V: TryInto, + V::Error: Into, + { + self.with_inner(move |inner| inner.user_agent(value)) + } + + /// Sets the default headers for every request. + /// + /// # Example + /// + /// ```rust + /// use reqwest::header; + /// # fn build_client() -> Result<(), reqwest::Error> { + /// let mut headers = header::HeaderMap::new(); + /// headers.insert("X-MY-HEADER", header::HeaderValue::from_static("value")); + /// headers.insert(header::AUTHORIZATION, header::HeaderValue::from_static("secret")); + /// + /// // Consider marking security-sensitive headers with `set_sensitive`. + /// let mut auth_value = header::HeaderValue::from_static("secret"); + /// auth_value.set_sensitive(true); + /// headers.insert(header::AUTHORIZATION, auth_value); + /// + /// // get a client builder + /// let client = reqwest::blocking::Client::builder() + /// .default_headers(headers) + /// .build()?; + /// let res = client.get("https://www.rust-lang.org").send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn default_headers(self, headers: header::HeaderMap) -> ClientBuilder { + self.with_inner(move |inner| inner.default_headers(headers)) + } + + /// Enable a persistent cookie store for the client. + /// + /// Cookies received in responses will be preserved and included in + /// additional requests. + /// + /// By default, no cookie store is used. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookie_store(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.cookie_store(enable)) + } + + /// Set the persistent cookie store for the client. + /// + /// Cookies received in responses will be passed to this store, and + /// additional requests will query this store for cookies. + /// + /// By default, no cookie store is used. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookie_provider( + self, + cookie_store: Arc, + ) -> ClientBuilder { + self.with_inner(|inner| inner.cookie_provider(cookie_store)) + } + + /// Enable auto gzip decompression by checking the `Content-Encoding` response header. + /// + /// If auto gzip decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `gzip`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if it's headers contain a `Content-Encoding` value that + /// equals to `gzip`, both values `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `gzip` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `gzip` feature to be enabled + #[cfg(feature = "gzip")] + #[cfg_attr(docsrs, doc(cfg(feature = "gzip")))] + pub fn gzip(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.gzip(enable)) + } + + /// Enable auto brotli decompression by checking the `Content-Encoding` response header. + /// + /// If auto brotli decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `br`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if it's headers contain a `Content-Encoding` value that + /// equals to `br`, both values `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `brotli` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `brotli` feature to be enabled + #[cfg(feature = "brotli")] + #[cfg_attr(docsrs, doc(cfg(feature = "brotli")))] + pub fn brotli(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.brotli(enable)) + } + + /// Enable auto zstd decompression by checking the `Content-Encoding` response header. + /// + /// If auto zstd decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `zstd`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if its headers contain a `Content-Encoding` value of + /// `zstd`, both `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `zstd` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `zstd` feature to be enabled + #[cfg(feature = "zstd")] + #[cfg_attr(docsrs, doc(cfg(feature = "zstd")))] + pub fn zstd(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.zstd(enable)) + } + + /// Enable auto deflate decompression by checking the `Content-Encoding` response header. + /// + /// If auto deflate decompression is turned on: + /// + /// - When sending a request and if the request's headers do not already contain + /// an `Accept-Encoding` **and** `Range` values, the `Accept-Encoding` header is set to `deflate`. + /// The request body is **not** automatically compressed. + /// - When receiving a response, if it's headers contain a `Content-Encoding` value that + /// equals to `deflate`, both values `Content-Encoding` and `Content-Length` are removed from the + /// headers' set. The response body is automatically decompressed. + /// + /// If the `deflate` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `deflate` feature to be enabled + #[cfg(feature = "deflate")] + #[cfg_attr(docsrs, doc(cfg(feature = "deflate")))] + pub fn deflate(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.deflate(enable)) + } + + /// Disable auto response body gzip decompression. + /// + /// This method exists even if the optional `gzip` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use gzip decompression + /// even if another dependency were to enable the optional `gzip` feature. + pub fn no_gzip(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_gzip()) + } + + /// Disable auto response body brotli decompression. + /// + /// This method exists even if the optional `brotli` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use brotli decompression + /// even if another dependency were to enable the optional `brotli` feature. + pub fn no_brotli(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_brotli()) + } + + /// Disable auto response body zstd decompression. + /// + /// This method exists even if the optional `zstd` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use zstd decompression + /// even if another dependency were to enable the optional `zstd` feature. + pub fn no_zstd(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_zstd()) + } + + /// Disable auto response body deflate decompression. + /// + /// This method exists even if the optional `deflate` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use deflate decompression + /// even if another dependency were to enable the optional `deflate` feature. + pub fn no_deflate(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_deflate()) + } + + // Redirect options + + /// Set a `redirect::Policy` for this client. + /// + /// Default will follow redirects up to a maximum of 10. + pub fn redirect(self, policy: redirect::Policy) -> ClientBuilder { + self.with_inner(move |inner| inner.redirect(policy)) + } + + /// Set a request retry policy. + /// + /// Default behavior is to retry protocol NACKs. + pub fn retry(self, policy: crate::retry::Builder) -> ClientBuilder { + self.with_inner(move |inner| inner.retry(policy)) + } + + /// Enable or disable automatic setting of the `Referer` header. + /// + /// Default is `true`. + pub fn referer(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.referer(enable)) + } + + // Proxy options + + /// Add a `Proxy` to the list of proxies the `Client` will use. + /// + /// # Note + /// + /// Adding a proxy will disable the automatic usage of the "system" proxy. + pub fn proxy(self, proxy: Proxy) -> ClientBuilder { + self.with_inner(move |inner| inner.proxy(proxy)) + } + + /// Clear all `Proxies`, so `Client` will use no proxy anymore. + /// + /// # Note + /// To add a proxy exclusion list, use [Proxy::no_proxy()] + /// on all desired proxies instead. + /// + /// This also disables the automatic usage of the "system" proxy. + pub fn no_proxy(self) -> ClientBuilder { + self.with_inner(move |inner| inner.no_proxy()) + } + + // Timeout options + + /// Set a timeout for connect, read and write operations of a `Client`. + /// + /// Default is 30 seconds. + /// + /// Pass `None` to disable timeout. + pub fn timeout(mut self, timeout: T) -> ClientBuilder + where + T: Into>, + { + self.timeout = Timeout(timeout.into()); + self + } + + /// Set a timeout for only the connect phase of a `Client`. + /// + /// Default is `None`. + pub fn connect_timeout(self, timeout: T) -> ClientBuilder + where + T: Into>, + { + let timeout = timeout.into(); + if let Some(dur) = timeout { + self.with_inner(|inner| inner.connect_timeout(dur)) + } else { + self + } + } + + /// Set whether connections should emit verbose logs. + /// + /// Enabling this option will emit [log][] messages at the `TRACE` level + /// for read and write operations on connections. + /// + /// [log]: https://crates.io/crates/log + pub fn connection_verbose(self, verbose: bool) -> ClientBuilder { + self.with_inner(move |inner| inner.connection_verbose(verbose)) + } + + // HTTP options + + /// Set an optional timeout for idle sockets being kept-alive. + /// + /// Pass `None` to disable timeout. + /// + /// Default is 90 seconds. + pub fn pool_idle_timeout(self, val: D) -> ClientBuilder + where + D: Into>, + { + self.with_inner(|inner| inner.pool_idle_timeout(val)) + } + + /// Sets the maximum idle connection per host allowed in the pool. + pub fn pool_max_idle_per_host(self, max: usize) -> ClientBuilder { + self.with_inner(move |inner| inner.pool_max_idle_per_host(max)) + } + + /// Send headers as title case instead of lowercase. + pub fn http1_title_case_headers(self) -> ClientBuilder { + self.with_inner(|inner| inner.http1_title_case_headers()) + } + + /// Set whether HTTP/1 connections will accept obsolete line folding for + /// header values. + /// + /// Newline codepoints (`\r` and `\n`) will be transformed to spaces when + /// parsing. + pub fn http1_allow_obsolete_multiline_headers_in_responses(self, value: bool) -> ClientBuilder { + self.with_inner(|inner| inner.http1_allow_obsolete_multiline_headers_in_responses(value)) + } + + /// Sets whether invalid header lines should be silently ignored in HTTP/1 responses. + pub fn http1_ignore_invalid_headers_in_responses(self, value: bool) -> ClientBuilder { + self.with_inner(|inner| inner.http1_ignore_invalid_headers_in_responses(value)) + } + + /// Set whether HTTP/1 connections will accept spaces between header + /// names and the colon that follow them in responses. + /// + /// Newline codepoints (\r and \n) will be transformed to spaces when + /// parsing. + pub fn http1_allow_spaces_after_header_name_in_responses(self, value: bool) -> ClientBuilder { + self.with_inner(|inner| inner.http1_allow_spaces_after_header_name_in_responses(value)) + } + + /// Only use HTTP/1. + pub fn http1_only(self) -> ClientBuilder { + self.with_inner(|inner| inner.http1_only()) + } + + /// Allow HTTP/0.9 responses + pub fn http09_responses(self) -> ClientBuilder { + self.with_inner(|inner| inner.http09_responses()) + } + + /// Only use HTTP/2. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_prior_knowledge(self) -> ClientBuilder { + self.with_inner(|inner| inner.http2_prior_knowledge()) + } + + /// Sets the `SETTINGS_INITIAL_WINDOW_SIZE` option for HTTP2 stream-level flow control. + /// + /// Default is currently 65,535 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_initial_stream_window_size(self, sz: impl Into>) -> ClientBuilder { + self.with_inner(|inner| inner.http2_initial_stream_window_size(sz)) + } + + /// Sets the max connection-level flow control for HTTP2 + /// + /// Default is currently 65,535 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_initial_connection_window_size(self, sz: impl Into>) -> ClientBuilder { + self.with_inner(|inner| inner.http2_initial_connection_window_size(sz)) + } + + /// Sets whether to use an adaptive flow control. + /// + /// Enabling this will override the limits set in `http2_initial_stream_window_size` and + /// `http2_initial_connection_window_size`. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_adaptive_window(self, enabled: bool) -> ClientBuilder { + self.with_inner(|inner| inner.http2_adaptive_window(enabled)) + } + + /// Sets the maximum frame size to use for HTTP2. + /// + /// Default is currently 16,384 but may change internally to optimize for common uses. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_max_frame_size(self, sz: impl Into>) -> ClientBuilder { + self.with_inner(|inner| inner.http2_max_frame_size(sz)) + } + + /// Sets the maximum size of received header frames for HTTP2. + /// + /// Default is currently 16KB, but can change. + #[cfg(feature = "http2")] + #[cfg_attr(docsrs, doc(cfg(feature = "http2")))] + pub fn http2_max_header_list_size(self, max_header_size_bytes: u32) -> ClientBuilder { + self.with_inner(|inner| inner.http2_max_header_list_size(max_header_size_bytes)) + } + + /// This requires the optional `http3` feature to be + /// enabled. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(feature = "http3")))] + pub fn http3_prior_knowledge(self) -> ClientBuilder { + self.with_inner(|inner| inner.http3_prior_knowledge()) + } + + /// Maximum duration of inactivity to accept before timing out the QUIC connection. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_max_idle_timeout(self, value: Duration) -> ClientBuilder { + self.with_inner(|inner| inner.http3_max_idle_timeout(value)) + } + + /// Maximum number of bytes the peer may transmit without acknowledgement on any one stream + /// before becoming blocked. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + /// + /// # Panics + /// + /// Panics if the value is over 2^62. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_stream_receive_window(self, value: u64) -> ClientBuilder { + self.with_inner(|inner| inner.http3_stream_receive_window(value)) + } + + /// Maximum number of bytes the peer may transmit across all streams of a connection before + /// becoming blocked. + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + /// + /// # Panics + /// + /// Panics if the value is over 2^62. + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_conn_receive_window(self, value: u64) -> ClientBuilder { + self.with_inner(|inner| inner.http3_conn_receive_window(value)) + } + + /// Maximum number of bytes to transmit to a peer without acknowledgment + /// + /// Please see docs in [`TransportConfig`] in [`quinn`]. + /// + /// [`TransportConfig`]: https://docs.rs/quinn/latest/quinn/struct.TransportConfig.html + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_send_window(self, value: u64) -> ClientBuilder { + self.with_inner(|inner| inner.http3_send_window(value)) + } + + /// Override the default congestion control algorithm to use [BBR] + /// + /// The current default congestion control algorithm is [CUBIC]. This method overrides the + /// default. + /// + /// [BBR]: https://datatracker.ietf.org/doc/html/draft-ietf-ccwg-bbr + /// [CUBIC]: https://datatracker.ietf.org/doc/html/rfc8312 + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_congestion_bbr(self) -> ClientBuilder { + self.with_inner(|inner| inner.http3_congestion_bbr()) + } + + /// Set the maximum HTTP/3 header size this client is willing to accept. + /// + /// See [header size constraints] section of the specification for details. + /// + /// [header size constraints]: https://www.rfc-editor.org/rfc/rfc9114.html#name-header-size-constraints + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.max_field_section_size + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_max_field_section_size(self, value: u64) -> ClientBuilder { + self.with_inner(|inner| inner.http3_max_field_section_size(value)) + } + + /// Enable whether to send HTTP/3 protocol grease on the connections. + /// + /// HTTP/3 uses the concept of "grease" + /// + /// to prevent potential interoperability issues in the future. + /// In HTTP/3, the concept of grease is used to ensure that the protocol can evolve + /// and accommodate future changes without breaking existing implementations. + /// + /// Please see docs in [`Builder`] in [`h3`]. + /// + /// [`Builder`]: https://docs.rs/h3/latest/h3/client/struct.Builder.html#method.send_grease + #[cfg(feature = "http3")] + #[cfg_attr(docsrs, doc(cfg(all(reqwest_unstable, feature = "http3",))))] + pub fn http3_send_grease(self, enabled: bool) -> ClientBuilder { + self.with_inner(|inner| inner.http3_send_grease(enabled)) + } + + // TCP options + + /// Set whether sockets have `TCP_NODELAY` enabled. + /// + /// Default is `true`. + pub fn tcp_nodelay(self, enabled: bool) -> ClientBuilder { + self.with_inner(move |inner| inner.tcp_nodelay(enabled)) + } + + /// Bind to a local IP Address. + /// + /// # Example + /// + /// ``` + /// use std::net::IpAddr; + /// let local_addr = IpAddr::from([12, 4, 1, 8]); + /// let client = reqwest::blocking::Client::builder() + /// .local_address(local_addr) + /// .build().unwrap(); + /// ``` + pub fn local_address(self, addr: T) -> ClientBuilder + where + T: Into>, + { + self.with_inner(move |inner| inner.local_address(addr)) + } + + /// Bind to an interface by `SO_BINDTODEVICE`. + /// + /// # Example + /// + /// ``` + /// let interface = "lo"; + /// let client = reqwest::blocking::Client::builder() + /// .interface(interface) + /// .build().unwrap(); + /// ``` + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + pub fn interface(self, interface: &str) -> ClientBuilder { + self.with_inner(move |inner| inner.interface(interface)) + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied duration. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive(self, val: D) -> ClientBuilder + where + D: Into>, + { + self.with_inner(move |inner| inner.tcp_keepalive(val)) + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied interval. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive_interval(self, val: D) -> ClientBuilder + where + D: Into>, + { + self.with_inner(move |inner| inner.tcp_keepalive_interval(val)) + } + + /// Set that all sockets have `SO_KEEPALIVE` set with the supplied retry count. + /// + /// If `None`, the option will not be set. + pub fn tcp_keepalive_retries(self, retries: C) -> ClientBuilder + where + C: Into>, + { + self.with_inner(move |inner| inner.tcp_keepalive_retries(retries)) + } + + /// Set that all sockets have `TCP_USER_TIMEOUT` set with the supplied duration. + /// + /// This option controls how long transmitted data may remain unacknowledged before + /// the connection is force-closed. + /// + /// The current default is `None` (option disabled). + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + pub fn tcp_user_timeout(self, val: D) -> ClientBuilder + where + D: Into>, + { + self.with_inner(move |inner| inner.tcp_user_timeout(val)) + } + + // Alt Transports + + /// Set that all connections will use this Unix socket. + /// + /// If a request URI uses the `https` scheme, TLS will still be used over + /// the Unix socket. + /// + /// # Note + /// + /// This option is not compatible with any of the TCP or Proxy options. + /// Setting this will ignore all those options previously set. + /// + /// Likewise, DNS resolution will not be done on the domain name. + #[cfg(unix)] + pub fn unix_socket(self, path: impl UnixSocketProvider) -> ClientBuilder { + self.with_inner(move |inner| inner.unix_socket(path)) + } + + // TLS options + + /// Add a custom root certificate. + /// + /// This allows connecting to a server that has a self-signed + /// certificate for example. This **does not** replace the existing + /// trusted store. + /// + /// # Example + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn build_client() -> Result<(), Box> { + /// // read a local binary DER encoded certificate + /// let der = std::fs::read("my-cert.der")?; + /// + /// // create a certificate + /// let cert = reqwest::Certificate::from_der(&der)?; + /// + /// // get a client builder + /// let client = reqwest::blocking::Client::builder() + /// .add_root_certificate(cert) + /// .build()?; + /// # drop(client); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn add_root_certificate(self, cert: Certificate) -> ClientBuilder { + self.with_inner(move |inner| inner.add_root_certificate(cert)) + } + + /// Add a certificate revocation list. + /// + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn add_crl(self, crl: CertificateRevocationList) -> ClientBuilder { + self.with_inner(move |inner| inner.add_crl(crl)) + } + + /// Add multiple certificate revocation lists. + /// + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn add_crls( + self, + crls: impl IntoIterator, + ) -> ClientBuilder { + self.with_inner(move |inner| inner.add_crls(crls)) + } + + /// Controls the use of built-in system certificates during certificate validation. + /// + /// Defaults to `true` -- built-in system certs will be used. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_built_in_root_certs(self, tls_built_in_root_certs: bool) -> ClientBuilder { + self.with_inner(move |inner| inner.tls_built_in_root_certs(tls_built_in_root_certs)) + } + + /// Sets whether to load webpki root certs with rustls. + /// + /// If the feature is enabled, this value is `true` by default. + #[cfg(feature = "rustls-tls-webpki-roots-no-provider")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls-webpki-roots-no-provider")))] + pub fn tls_built_in_webpki_certs(self, enabled: bool) -> ClientBuilder { + self.with_inner(move |inner| inner.tls_built_in_webpki_certs(enabled)) + } + + /// Sets whether to load native root certs with rustls. + /// + /// If the feature is enabled, this value is `true` by default. + #[cfg(feature = "rustls-tls-native-roots-no-provider")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls-native-roots-no-provider")))] + pub fn tls_built_in_native_certs(self, enabled: bool) -> ClientBuilder { + self.with_inner(move |inner| inner.tls_built_in_native_certs(enabled)) + } + + /// Sets the identity to be used for client certificate authentication. + /// + /// # Optional + /// + /// This requires the optional `native-tls` or `rustls-tls(-...)` feature to be + /// enabled. + #[cfg(any(feature = "native-tls", feature = "__rustls"))] + #[cfg_attr(docsrs, doc(cfg(any(feature = "native-tls", feature = "rustls-tls"))))] + pub fn identity(self, identity: Identity) -> ClientBuilder { + self.with_inner(move |inner| inner.identity(identity)) + } + + /// Controls the use of hostname verification. + /// + /// Defaults to `false`. + /// + /// # Warning + /// + /// You should think very carefully before you use this method. If + /// hostname verification is not used, any valid certificate for any + /// site will be trusted for use from any other. This introduces a + /// significant vulnerability to man-in-the-middle attacks. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn danger_accept_invalid_hostnames(self, accept_invalid_hostname: bool) -> ClientBuilder { + self.with_inner(|inner| inner.danger_accept_invalid_hostnames(accept_invalid_hostname)) + } + + /// Controls the use of certificate validation. + /// + /// Defaults to `false`. + /// + /// # Warning + /// + /// You should think very carefully before using this method. If + /// invalid certificates are trusted, *any* certificate for *any* site + /// will be trusted for use. This includes expired certificates. This + /// introduces significant vulnerabilities, and should only be used + /// as a last resort. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn danger_accept_invalid_certs(self, accept_invalid_certs: bool) -> ClientBuilder { + self.with_inner(|inner| inner.danger_accept_invalid_certs(accept_invalid_certs)) + } + + /// Controls the use of TLS server name indication. + /// + /// Defaults to `true`. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_sni(self, tls_sni: bool) -> ClientBuilder { + self.with_inner(|inner| inner.tls_sni(tls_sni)) + } + + /// Set the minimum required TLS version for connections. + /// + /// By default, the TLS backend's own default is used. + /// + /// # Errors + /// + /// A value of `tls::Version::TLS_1_3` will cause an error with the + /// `native-tls`/`default-tls` backend. This does not mean the version + /// isn't supported, just that it can't be set as a minimum due to + /// technical limitations. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn min_tls_version(self, version: tls::Version) -> ClientBuilder { + self.with_inner(|inner| inner.min_tls_version(version)) + } + + /// Set the maximum allowed TLS version for connections. + /// + /// By default, there's no maximum. + /// + /// # Errors + /// + /// A value of `tls::Version::TLS_1_3` will cause an error with the + /// `native-tls`/`default-tls` backend. This does not mean the version + /// isn't supported, just that it can't be set as a maximum due to + /// technical limitations. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn max_tls_version(self, version: tls::Version) -> ClientBuilder { + self.with_inner(|inner| inner.max_tls_version(version)) + } + + /// Force using the native TLS backend. + /// + /// Since multiple TLS backends can be optionally enabled, this option will + /// force the `native-tls` backend to be used for this `Client`. + /// + /// # Optional + /// + /// This requires the optional `native-tls` feature to be enabled. + #[cfg(feature = "native-tls")] + #[cfg_attr(docsrs, doc(cfg(feature = "native-tls")))] + pub fn use_native_tls(self) -> ClientBuilder { + self.with_inner(move |inner| inner.use_native_tls()) + } + + /// Force using the Rustls TLS backend. + /// + /// Since multiple TLS backends can be optionally enabled, this option will + /// force the `rustls` backend to be used for this `Client`. + /// + /// # Optional + /// + /// This requires the optional `rustls-tls(-...)` feature to be enabled. + #[cfg(feature = "__rustls")] + #[cfg_attr(docsrs, doc(cfg(feature = "rustls-tls")))] + pub fn use_rustls_tls(self) -> ClientBuilder { + self.with_inner(move |inner| inner.use_rustls_tls()) + } + + /// Add TLS information as `TlsInfo` extension to responses. + /// + /// # Optional + /// + /// This requires the optional `default-tls`, `native-tls`, or `rustls-tls(-...)` + /// feature to be enabled. + #[cfg(feature = "__tls")] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "default-tls", + feature = "native-tls", + feature = "rustls-tls" + ))) + )] + pub fn tls_info(self, tls_info: bool) -> ClientBuilder { + self.with_inner(|inner| inner.tls_info(tls_info)) + } + + /// Use a preconfigured TLS backend. + /// + /// If the passed `Any` argument is not a TLS backend that reqwest + /// understands, the `ClientBuilder` will error when calling `build`. + /// + /// # Advanced + /// + /// This is an advanced option, and can be somewhat brittle. Usage requires + /// keeping the preconfigured TLS argument version in sync with reqwest, + /// since version mismatches will result in an "unknown" TLS backend. + /// + /// If possible, it's preferable to use the methods on `ClientBuilder` + /// to configure reqwest's TLS. + /// + /// # Optional + /// + /// This requires one of the optional features `native-tls` or + /// `rustls-tls(-...)` to be enabled. + #[cfg(any(feature = "native-tls", feature = "__rustls",))] + #[cfg_attr(docsrs, doc(cfg(any(feature = "native-tls", feature = "rustls-tls"))))] + pub fn use_preconfigured_tls(self, tls: impl Any) -> ClientBuilder { + self.with_inner(move |inner| inner.use_preconfigured_tls(tls)) + } + + /// Enables the [hickory-dns](hickory_resolver) async resolver instead of a default threadpool using `getaddrinfo`. + /// + /// If the `hickory-dns` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `hickory-dns` feature to be enabled + #[cfg(feature = "hickory-dns")] + #[cfg_attr(docsrs, doc(cfg(feature = "hickory-dns")))] + #[deprecated(note = "use `hickory_dns` instead", since = "0.12.0")] + pub fn trust_dns(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.hickory_dns(enable)) + } + + /// Enables the [hickory-dns](hickory_resolver) async resolver instead of a default threadpool using `getaddrinfo`. + /// + /// If the `hickory-dns` feature is turned on, the default option is enabled. + /// + /// # Optional + /// + /// This requires the optional `hickory-dns` feature to be enabled + #[cfg(feature = "hickory-dns")] + #[cfg_attr(docsrs, doc(cfg(feature = "hickory-dns")))] + pub fn hickory_dns(self, enable: bool) -> ClientBuilder { + self.with_inner(|inner| inner.hickory_dns(enable)) + } + + /// Disables the hickory-dns async resolver. + /// + /// This method exists even if the optional `hickory-dns` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use the hickory-dns async resolver + /// even if another dependency were to enable the optional `hickory-dns` feature. + #[deprecated(note = "use `no_hickory_dns` instead", since = "0.12.0")] + pub fn no_trust_dns(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_hickory_dns()) + } + + /// Disables the hickory-dns async resolver. + /// + /// This method exists even if the optional `hickory-dns` feature is not enabled. + /// This can be used to ensure a `Client` doesn't use the hickory-dns async resolver + /// even if another dependency were to enable the optional `hickory-dns` feature. + pub fn no_hickory_dns(self) -> ClientBuilder { + self.with_inner(|inner| inner.no_hickory_dns()) + } + + /// Restrict the Client to be used with HTTPS only requests. + /// + /// Defaults to false. + pub fn https_only(self, enabled: bool) -> ClientBuilder { + self.with_inner(|inner| inner.https_only(enabled)) + } + + /// Override DNS resolution for specific domains to a particular IP address. + /// + /// Set the port to `0` to use the conventional port for the given scheme (e.g. 80 for http). + /// Ports in the URL itself will always be used instead of the port in the overridden addr. + pub fn resolve(self, domain: &str, addr: SocketAddr) -> ClientBuilder { + self.resolve_to_addrs(domain, &[addr]) + } + + /// Override DNS resolution for specific domains to particular IP addresses. + /// + /// Set the port to `0` to use the conventional port for the given scheme (e.g. 80 for http). + /// Ports in the URL itself will always be used instead of the port in the overridden addr. + pub fn resolve_to_addrs(self, domain: &str, addrs: &[SocketAddr]) -> ClientBuilder { + self.with_inner(|inner| inner.resolve_to_addrs(domain, addrs)) + } + + /// Override the DNS resolver implementation. + /// + /// Pass an `Arc` wrapping a trait object implementing `Resolve`. + /// Overrides for specific names passed to `resolve` and `resolve_to_addrs` will + /// still be applied on top of this resolver. + pub fn dns_resolver(self, resolver: Arc) -> ClientBuilder { + self.with_inner(|inner| inner.dns_resolver(resolver)) + } + + /// Adds a new Tower [`Layer`](https://docs.rs/tower/latest/tower/trait.Layer.html) to the + /// base connector [`Service`](https://docs.rs/tower/latest/tower/trait.Service.html) which + /// is responsible for connection establishment. + /// + /// Each subsequent invocation of this function will wrap previous layers. + /// + /// Example usage: + /// ``` + /// use std::time::Duration; + /// + /// let client = reqwest::blocking::Client::builder() + /// // resolved to outermost layer, meaning while we are waiting on concurrency limit + /// .connect_timeout(Duration::from_millis(200)) + /// // underneath the concurrency check, so only after concurrency limit lets us through + /// .connector_layer(tower::timeout::TimeoutLayer::new(Duration::from_millis(50))) + /// .connector_layer(tower::limit::concurrency::ConcurrencyLimitLayer::new(2)) + /// .build() + /// .unwrap(); + /// ``` + pub fn connector_layer(self, layer: L) -> ClientBuilder + where + L: Layer + Clone + Send + Sync + 'static, + L::Service: + Service + Clone + Send + Sync + 'static, + >::Future: Send + 'static, + { + self.with_inner(|inner| inner.connector_layer(layer)) + } + + // private + + fn with_inner(mut self, func: F) -> ClientBuilder + where + F: FnOnce(async_impl::ClientBuilder) -> async_impl::ClientBuilder, + { + self.inner = func(self.inner); + self + } +} + +impl From for ClientBuilder { + fn from(builder: async_impl::ClientBuilder) -> Self { + Self { + inner: builder, + timeout: Timeout::default(), + } + } +} + +impl Default for Client { + fn default() -> Self { + Self::new() + } +} + +impl Client { + /// Constructs a new `Client`. + /// + /// # Panic + /// + /// This method panics if TLS backend cannot be initialized, or the resolver + /// cannot load the system configuration. + /// + /// Use `Client::builder()` if you wish to handle the failure as an `Error` + /// instead of panicking. + /// + /// This method also panics if called from within an async runtime. See docs + /// on [`reqwest::blocking`][crate::blocking] for details. + pub fn new() -> Client { + ClientBuilder::new().build().expect("Client::new()") + } + + /// Creates a `ClientBuilder` to configure a `Client`. + /// + /// This is the same as `ClientBuilder::new()`. + pub fn builder() -> ClientBuilder { + ClientBuilder::new() + } + + /// Convenience method to make a `GET` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn get(&self, url: U) -> RequestBuilder { + self.request(Method::GET, url) + } + + /// Convenience method to make a `POST` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn post(&self, url: U) -> RequestBuilder { + self.request(Method::POST, url) + } + + /// Convenience method to make a `PUT` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn put(&self, url: U) -> RequestBuilder { + self.request(Method::PUT, url) + } + + /// Convenience method to make a `PATCH` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn patch(&self, url: U) -> RequestBuilder { + self.request(Method::PATCH, url) + } + + /// Convenience method to make a `DELETE` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn delete(&self, url: U) -> RequestBuilder { + self.request(Method::DELETE, url) + } + + /// Convenience method to make a `HEAD` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn head(&self, url: U) -> RequestBuilder { + self.request(Method::HEAD, url) + } + + /// Start building a `Request` with the `Method` and `Url`. + /// + /// Returns a `RequestBuilder`, which will allow setting headers and + /// request body before sending. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn request(&self, method: Method, url: U) -> RequestBuilder { + let req = url.into_url().map(move |url| Request::new(method, url)); + RequestBuilder::new(self.clone(), req) + } + + /// Executes a `Request`. + /// + /// A `Request` can be built manually with `Request::new()` or obtained + /// from a RequestBuilder with `RequestBuilder::build()`. + /// + /// You should prefer to use the `RequestBuilder` and + /// `RequestBuilder::send()`. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request, + /// or redirect limit was exhausted. + pub fn execute(&self, request: Request) -> crate::Result { + self.inner.execute_request(request) + } +} + +impl fmt::Debug for Client { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Client") + //.field("gzip", &self.inner.gzip) + //.field("redirect_policy", &self.inner.redirect_policy) + //.field("referer", &self.inner.referer) + .finish() + } +} + +impl fmt::Debug for ClientBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt(f) + } +} + +#[derive(Clone)] +struct ClientHandle { + timeout: Timeout, + inner: Arc, +} + +type OneshotResponse = oneshot::Sender>; +type ThreadSender = mpsc::UnboundedSender<(async_impl::Request, OneshotResponse)>; + +struct InnerClientHandle { + tx: Option, + thread: Option>, +} + +impl Drop for InnerClientHandle { + fn drop(&mut self) { + let id = self + .thread + .as_ref() + .map(|h| h.thread().id()) + .expect("thread not dropped yet"); + + trace!("closing runtime thread ({id:?})"); + self.tx.take(); + trace!("signaled close for runtime thread ({id:?})"); + self.thread.take().map(|h| h.join()); + trace!("closed runtime thread ({id:?})"); + } +} + +impl ClientHandle { + fn new(builder: ClientBuilder) -> crate::Result { + let timeout = builder.timeout; + let builder = builder.inner; + let (tx, rx) = mpsc::unbounded_channel::<(async_impl::Request, OneshotResponse)>(); + let (spawn_tx, spawn_rx) = oneshot::channel::>(); + let handle = thread::Builder::new() + .name("reqwest-internal-sync-runtime".into()) + .spawn(move || { + use tokio::runtime; + let rt = match runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(crate::error::builder) + { + Err(e) => { + if let Err(e) = spawn_tx.send(Err(e)) { + error!("Failed to communicate runtime creation failure: {e:?}"); + } + return; + } + Ok(v) => v, + }; + + let f = async move { + let client = match builder.build() { + Err(e) => { + if let Err(e) = spawn_tx.send(Err(e)) { + error!("Failed to communicate client creation failure: {e:?}"); + } + return; + } + Ok(v) => v, + }; + if let Err(e) = spawn_tx.send(Ok(())) { + error!("Failed to communicate successful startup: {e:?}"); + return; + } + + let mut rx = rx; + + while let Some((req, req_tx)) = rx.recv().await { + let req_fut = client.execute(req); + tokio::spawn(forward(req_fut, req_tx)); + } + + trace!("({:?}) Receiver is shutdown", thread::current().id()); + }; + + trace!("({:?}) start runtime::block_on", thread::current().id()); + rt.block_on(f); + trace!("({:?}) end runtime::block_on", thread::current().id()); + drop(rt); + trace!("({:?}) finished", thread::current().id()); + }) + .map_err(crate::error::builder)?; + + // Wait for the runtime thread to start up... + match wait::timeout(spawn_rx, None) { + Ok(Ok(())) => (), + Ok(Err(err)) => return Err(err), + Err(_canceled) => event_loop_panicked(), + } + + let inner_handle = Arc::new(InnerClientHandle { + tx: Some(tx), + thread: Some(handle), + }); + + Ok(ClientHandle { + timeout, + inner: inner_handle, + }) + } + + fn execute_request(&self, req: Request) -> crate::Result { + let (tx, rx) = oneshot::channel(); + let (req, body) = req.into_async(); + let url = req.url().clone(); + let timeout = req.timeout().copied().or(self.timeout.0); + + self.inner + .tx + .as_ref() + .expect("core thread exited early") + .send((req, tx)) + .expect("core thread panicked"); + + let result: Result, wait::Waited> = + if let Some(body) = body { + let f = async move { + body.send().await?; + rx.await.map_err(|_canceled| event_loop_panicked()) + }; + wait::timeout(f, timeout) + } else { + let f = async move { rx.await.map_err(|_canceled| event_loop_panicked()) }; + wait::timeout(f, timeout) + }; + + match result { + Ok(Err(err)) => Err(err.with_url(url)), + Ok(Ok(res)) => Ok(Response::new( + res, + timeout, + KeepCoreThreadAlive(Some(self.inner.clone())), + )), + Err(wait::Waited::TimedOut(e)) => Err(crate::error::request(e).with_url(url)), + Err(wait::Waited::Inner(err)) => Err(err.with_url(url)), + } + } +} + +async fn forward(fut: F, mut tx: OneshotResponse) +where + F: Future>, +{ + futures_util::pin_mut!(fut); + + // "select" on the sender being canceled, and the future completing + let res = std::future::poll_fn(|cx| { + match fut.as_mut().poll(cx) { + Poll::Ready(val) => Poll::Ready(Some(val)), + Poll::Pending => { + // check if the callback is canceled + ready!(tx.poll_closed(cx)); + Poll::Ready(None) + } + } + }) + .await; + + if let Some(res) = res { + let _ = tx.send(res); + } + // else request is canceled +} + +#[derive(Clone, Copy)] +struct Timeout(Option); + +impl Default for Timeout { + fn default() -> Timeout { + // default mentioned in ClientBuilder::timeout() doc comment + Timeout(Some(Duration::from_secs(30))) + } +} + +pub(crate) struct KeepCoreThreadAlive(#[allow(dead_code)] Option>); + +impl KeepCoreThreadAlive { + pub(crate) fn empty() -> KeepCoreThreadAlive { + KeepCoreThreadAlive(None) + } +} + +#[cold] +#[inline(never)] +fn event_loop_panicked() -> ! { + // The only possible reason there would be a Canceled error + // is if the thread running the event loop panicked. We could return + // an Err here, like a BrokenPipe, but the Client is not + // recoverable. Additionally, the panic in the other thread + // is not normal, and should likely be propagated. + panic!("event loop thread panicked"); +} diff --git a/rust/reqwest/src/blocking/mod.rs b/rust/reqwest/src/blocking/mod.rs new file mode 100644 index 0000000000..76ced8f595 --- /dev/null +++ b/rust/reqwest/src/blocking/mod.rs @@ -0,0 +1,108 @@ +//! A blocking Client API. +//! +//! The blocking `Client` will block the current thread to execute, instead +//! of returning futures that need to be executed on a runtime. +//! +//! Conversely, the functionality in `reqwest::blocking` must *not* be executed +//! within an async runtime, or it will panic when attempting to block. If +//! calling directly from an async function, consider using an async +//! [`reqwest::Client`][crate::Client] instead. If the immediate context is only +//! synchronous, but a transitive caller is async, consider changing that caller +//! to use [`tokio::task::spawn_blocking`] around the calls that need to block. +//! +//! # Optional +//! +//! This requires the optional `blocking` feature to be enabled. +//! +//! # Making a GET request +//! +//! For a single request, you can use the [`get`] shortcut method. +//! +//! ```rust +//! # use reqwest::{Error, Response}; +//! +//! # fn run() -> Result<(), Error> { +//! let body = reqwest::blocking::get("https://www.rust-lang.org")? +//! .text()?; +//! +//! println!("body = {body:?}"); +//! # Ok(()) +//! # } +//! ``` +//! +//! Additionally, the blocking [`Response`] struct implements Rust's +//! `Read` trait, so many useful standard library and third party crates will +//! have convenience methods that take a `Response` anywhere `T: Read` is +//! acceptable. +//! +//! **NOTE**: If you plan to perform multiple requests, it is best to create a +//! [`Client`] and reuse it, taking advantage of keep-alive connection pooling. +//! +//! # Making POST requests (or setting request bodies) +//! +//! There are several ways you can set the body of a request. The basic one is +//! by using the `body()` method of a [`RequestBuilder`]. This lets you set the +//! exact raw bytes of what the body should be. It accepts various types, +//! including `String`, `Vec`, and `File`. If you wish to pass a custom +//! Reader, you can use the `reqwest::blocking::Body::new()` constructor. +//! +//! ```rust +//! # use reqwest::Error; +//! # +//! # fn run() -> Result<(), Error> { +//! let client = reqwest::blocking::Client::new(); +//! let res = client.post("http://httpbin.org/post") +//! .body("the exact body that is sent") +//! .send()?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## And More +//! +//! Most features available to the asynchronous `Client` are also available, +//! on the blocking `Client`, see those docs for more. + +mod body; +mod client; +#[cfg(feature = "multipart")] +pub mod multipart; +mod request; +mod response; +mod wait; + +pub use self::body::Body; +pub use self::client::{Client, ClientBuilder}; +pub use self::request::{Request, RequestBuilder}; +pub use self::response::Response; + +/// Shortcut method to quickly make a *blocking* `GET` request. +/// +/// **NOTE**: This function creates a new internal `Client` on each call, +/// and so should not be used if making many requests. Create a +/// [`Client`](./struct.Client.html) instead. +/// +/// # Examples +/// +/// ```rust +/// # fn run() -> Result<(), reqwest::Error> { +/// let body = reqwest::blocking::get("https://www.rust-lang.org")? +/// .text()?; +/// # Ok(()) +/// # } +/// # fn main() { } +/// ``` +/// +/// # Errors +/// +/// This function fails if: +/// +/// - the native TLS backend cannot be initialized, +/// - the supplied `Url` cannot be parsed, +/// - there was an error while sending request, +/// - a redirect loop was detected, +/// - the redirect limit was exhausted, or +/// - the total download time exceeds 30 seconds. +pub fn get(url: T) -> crate::Result { + Client::builder().build()?.get(url).send() +} diff --git a/rust/reqwest/src/blocking/multipart.rs b/rust/reqwest/src/blocking/multipart.rs new file mode 100644 index 0000000000..9ceab79098 --- /dev/null +++ b/rust/reqwest/src/blocking/multipart.rs @@ -0,0 +1,494 @@ +//! multipart/form-data +//! +//! To send a `multipart/form-data` body, a [`Form`] is built up, adding +//! fields or customized [`Part`]s, and then calling the +//! [`multipart`][builder] method on the `RequestBuilder`. +//! +//! # Example +//! +//! ``` +//! use reqwest::blocking::multipart; +//! +//! # fn run() -> Result<(), Box> { +//! let form = multipart::Form::new() +//! // Adding just a simple text field... +//! .text("username", "seanmonstar") +//! // And a file... +//! .file("photo", "/path/to/photo.png")?; +//! +//! // Customize all the details of a Part if needed... +//! let bio = multipart::Part::text("hallo peeps") +//! .file_name("bio.txt") +//! .mime_str("text/plain")?; +//! +//! // Add the custom part to our form... +//! let form = form.part("biography", bio); +//! +//! // And finally, send the form +//! let client = reqwest::blocking::Client::new(); +//! let resp = client +//! .post("http://localhost:8080/user") +//! .multipart(form) +//! .send()?; +//! # Ok(()) +//! # } +//! # fn main() {} +//! ``` +//! +//! [builder]: ../struct.RequestBuilder.html#method.multipart +use std::borrow::Cow; +use std::fmt; +use std::fs::File; +use std::io::{self, Cursor, Read}; +use std::path::Path; + +use mime_guess::{self, Mime}; + +use super::Body; +use crate::async_impl::multipart::{FormParts, PartMetadata, PartProps}; +use crate::header::HeaderMap; + +/// A multipart/form-data request. +pub struct Form { + inner: FormParts, +} + +/// A field in a multipart form. +pub struct Part { + meta: PartMetadata, + value: Body, +} + +impl Default for Form { + fn default() -> Self { + Self::new() + } +} + +impl Form { + /// Creates a new Form without any content. + pub fn new() -> Form { + Form { + inner: FormParts::new(), + } + } + + /// Get the boundary that this form will use. + #[inline] + pub fn boundary(&self) -> &str { + self.inner.boundary() + } + + /// Add a data field with supplied name and value. + /// + /// # Examples + /// + /// ``` + /// let form = reqwest::blocking::multipart::Form::new() + /// .text("username", "seanmonstar") + /// .text("password", "secret"); + /// ``` + pub fn text(self, name: T, value: U) -> Form + where + T: Into>, + U: Into>, + { + self.part(name, Part::text(value)) + } + + /// Adds a file field. + /// + /// The path will be used to try to guess the filename and mime. + /// + /// # Examples + /// + /// ```no_run + /// # fn run() -> std::io::Result<()> { + /// let form = reqwest::blocking::multipart::Form::new() + /// .file("key", "/path/to/file")?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// Errors when the file cannot be opened. + pub fn file(self, name: T, path: U) -> io::Result + where + T: Into>, + U: AsRef, + { + Ok(self.part(name, Part::file(path)?)) + } + + /// Adds a customized Part. + pub fn part(self, name: T, part: Part) -> Form + where + T: Into>, + { + self.with_inner(move |inner| inner.part(name, part)) + } + + /// Configure this `Form` to percent-encode using the `path-segment` rules. + pub fn percent_encode_path_segment(self) -> Form { + self.with_inner(|inner| inner.percent_encode_path_segment()) + } + + /// Configure this `Form` to percent-encode using the `attr-char` rules. + pub fn percent_encode_attr_chars(self) -> Form { + self.with_inner(|inner| inner.percent_encode_attr_chars()) + } + + /// Configure this `Form` to skip percent-encoding + pub fn percent_encode_noop(self) -> Form { + self.with_inner(|inner| inner.percent_encode_noop()) + } + + pub(crate) fn reader(self) -> Reader { + Reader::new(self) + } + + /// Produce a reader over the multipart form data. + pub fn into_reader(self) -> impl Read { + self.reader() + } + + // If predictable, computes the length the request will have + // The length should be predictable if only String and file fields have been added, + // but not if a generic reader has been added; + pub(crate) fn compute_length(&mut self) -> Option { + self.inner.compute_length() + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(FormParts) -> FormParts, + { + Form { + inner: func(self.inner), + } + } +} + +impl fmt::Debug for Form { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt_fields("Form", f) + } +} + +impl Part { + /// Makes a text parameter. + pub fn text(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(string) => Body::from(string), + }; + Part::new(body) + } + + /// Makes a new parameter from arbitrary bytes. + pub fn bytes(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(vec) => Body::from(vec), + }; + Part::new(body) + } + + /// Adds a generic reader. + /// + /// Does not set filename or mime. + pub fn reader(value: T) -> Part { + Part::new(Body::new(value)) + } + + /// Adds a generic reader with known length. + /// + /// Does not set filename or mime. + pub fn reader_with_length(value: T, length: u64) -> Part { + Part::new(Body::sized(value, length)) + } + + /// Makes a file parameter. + /// + /// # Errors + /// + /// Errors when the file cannot be opened. + pub fn file>(path: T) -> io::Result { + let path = path.as_ref(); + let file_name = path + .file_name() + .map(|filename| filename.to_string_lossy().into_owned()); + let ext = path.extension().and_then(|ext| ext.to_str()).unwrap_or(""); + let mime = mime_guess::from_ext(ext).first_or_octet_stream(); + let file = File::open(path)?; + let field = Part::new(Body::from(file)).mime(mime); + + Ok(if let Some(file_name) = file_name { + field.file_name(file_name) + } else { + field + }) + } + + fn new(value: Body) -> Part { + Part { + meta: PartMetadata::new(), + value, + } + } + + /// Tries to set the mime of this part. + pub fn mime_str(self, mime: &str) -> crate::Result { + Ok(self.mime(mime.parse().map_err(crate::error::builder)?)) + } + + // Re-export when mime 0.4 is available, with split MediaType/MediaRange. + fn mime(self, mime: Mime) -> Part { + self.with_inner(move |inner| inner.mime(mime)) + } + + /// Sets the filename, builder style. + pub fn file_name(self, filename: T) -> Part + where + T: Into>, + { + self.with_inner(move |inner| inner.file_name(filename)) + } + + /// Sets custom headers for the part. + pub fn headers(self, headers: HeaderMap) -> Part { + self.with_inner(move |inner| inner.headers(headers)) + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(PartMetadata) -> PartMetadata, + { + Part { + meta: func(self.meta), + value: self.value, + } + } +} + +impl fmt::Debug for Part { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut dbg = f.debug_struct("Part"); + dbg.field("value", &self.value); + self.meta.fmt_fields(&mut dbg); + dbg.finish() + } +} + +impl PartProps for Part { + fn value_len(&self) -> Option { + self.value.len() + } + + fn metadata(&self) -> &PartMetadata { + &self.meta + } +} + +pub(crate) struct Reader { + form: Form, + active_reader: Option>, +} + +impl fmt::Debug for Reader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Reader").field("form", &self.form).finish() + } +} + +impl Reader { + fn new(form: Form) -> Reader { + let mut reader = Reader { + form, + active_reader: None, + }; + reader.next_reader(); + reader + } + + fn next_reader(&mut self) { + self.active_reader = if !self.form.inner.fields.is_empty() { + // We need to move out of the vector here because we are consuming the field's reader + let (name, field) = self.form.inner.fields.remove(0); + let boundary = Cursor::new(format!("--{}\r\n", self.form.boundary())); + let header = Cursor::new({ + // Try to use cached headers created by compute_length + let mut h = if !self.form.inner.computed_headers.is_empty() { + self.form.inner.computed_headers.remove(0) + } else { + self.form + .inner + .percent_encoding + .encode_headers(&name, field.metadata()) + }; + h.extend_from_slice(b"\r\n\r\n"); + h + }); + let reader = boundary + .chain(header) + .chain(field.value.into_reader()) + .chain(Cursor::new("\r\n")); + // According to https://tools.ietf.org/html/rfc2046#section-5.1.1 + // the very last field has a special boundary + if !self.form.inner.fields.is_empty() { + Some(Box::new(reader)) + } else { + Some(Box::new(reader.chain(Cursor::new(format!( + "--{}--\r\n", + self.form.boundary() + ))))) + } + } else { + None + } + } +} + +impl Read for Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut total_bytes_read = 0usize; + let mut last_read_bytes; + loop { + match self.active_reader { + Some(ref mut reader) => { + last_read_bytes = reader.read(&mut buf[total_bytes_read..])?; + total_bytes_read += last_read_bytes; + if total_bytes_read == buf.len() { + return Ok(total_bytes_read); + } + } + None => return Ok(total_bytes_read), + }; + if last_read_bytes == 0 && !buf.is_empty() { + self.next_reader(); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn form_empty() { + let mut output = Vec::new(); + let mut form = Form::new(); + let length = form.compute_length(); + form.reader().read_to_end(&mut output).unwrap(); + assert_eq!(output, b""); + assert_eq!(length.unwrap(), 0); + } + + #[test] + fn read_to_end() { + let mut output = Vec::new(); + let mut form = Form::new() + .part("reader1", Part::reader(std::io::empty())) + .part("key1", Part::text("value1")) + .part( + "key2", + Part::text("value2").mime(mime_guess::mime::IMAGE_BMP), + ) + .part("reader2", Part::reader(std::io::empty())) + .part("key3", Part::text("value3").file_name("filename")); + form.inner.boundary = "boundary".to_string(); + let length = form.compute_length(); + let expected = "--boundary\r\n\ + Content-Disposition: form-data; name=\"reader1\"\r\n\r\n\ + \r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key1\"\r\n\r\n\ + value1\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key2\"\r\n\ + Content-Type: image/bmp\r\n\r\n\ + value2\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"reader2\"\r\n\r\n\ + \r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key3\"; filename=\"filename\"\r\n\r\n\ + value3\r\n--boundary--\r\n"; + form.reader().read_to_end(&mut output).unwrap(); + // These prints are for debug purposes in case the test fails + println!( + "START REAL\n{}\nEND REAL", + std::str::from_utf8(&output).unwrap() + ); + println!("START EXPECTED\n{expected}\nEND EXPECTED"); + assert_eq!(std::str::from_utf8(&output).unwrap(), expected); + assert!(length.is_none()); + } + + #[test] + fn read_to_end_with_length() { + let mut output = Vec::new(); + let mut form = Form::new() + .text("key1", "value1") + .part( + "key2", + Part::text("value2").mime(mime_guess::mime::IMAGE_BMP), + ) + .part("key3", Part::text("value3").file_name("filename")); + form.inner.boundary = "boundary".to_string(); + let length = form.compute_length(); + let expected = "--boundary\r\n\ + Content-Disposition: form-data; name=\"key1\"\r\n\r\n\ + value1\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key2\"\r\n\ + Content-Type: image/bmp\r\n\r\n\ + value2\r\n\ + --boundary\r\n\ + Content-Disposition: form-data; name=\"key3\"; filename=\"filename\"\r\n\r\n\ + value3\r\n--boundary--\r\n"; + form.reader().read_to_end(&mut output).unwrap(); + // These prints are for debug purposes in case the test fails + println!( + "START REAL\n{}\nEND REAL", + std::str::from_utf8(&output).unwrap() + ); + println!("START EXPECTED\n{expected}\nEND EXPECTED"); + assert_eq!(std::str::from_utf8(&output).unwrap(), expected); + assert_eq!(length.unwrap(), expected.len() as u64); + } + + #[test] + fn read_to_end_with_header() { + let mut output = Vec::new(); + let mut part = Part::text("value2").mime(mime_guess::mime::IMAGE_BMP); + let mut headers = HeaderMap::new(); + headers.insert("Hdr3", "/a/b/c".parse().unwrap()); + part = part.headers(headers); + let mut form = Form::new().part("key2", part); + form.inner.boundary = "boundary".to_string(); + let expected = "--boundary\r\n\ + Content-Disposition: form-data; name=\"key2\"\r\n\ + Content-Type: image/bmp\r\n\ + hdr3: /a/b/c\r\n\ + \r\n\ + value2\r\n\ + --boundary--\r\n"; + form.reader().read_to_end(&mut output).unwrap(); + // These prints are for debug purposes in case the test fails + println!( + "START REAL\n{}\nEND REAL", + std::str::from_utf8(&output).unwrap() + ); + println!("START EXPECTED\n{expected}\nEND EXPECTED"); + assert_eq!(std::str::from_utf8(&output).unwrap(), expected); + } +} diff --git a/rust/reqwest/src/blocking/request.rs b/rust/reqwest/src/blocking/request.rs new file mode 100644 index 0000000000..7ac08a768f --- /dev/null +++ b/rust/reqwest/src/blocking/request.rs @@ -0,0 +1,1099 @@ +use std::convert::TryFrom; +use std::fmt; +use std::time::Duration; + +use http::{request::Parts, Request as HttpRequest, Version}; +use serde::Serialize; +#[cfg(feature = "json")] +use serde_json; +use serde_urlencoded; + +use super::body::{self, Body}; +#[cfg(feature = "multipart")] +use super::multipart; +use super::Client; +use crate::header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE}; +use crate::{async_impl, Method, Url}; + +/// A request which can be executed with `Client::execute()`. +pub struct Request { + body: Option, + inner: async_impl::Request, +} + +/// A builder to construct the properties of a `Request`. +/// +/// To construct a `RequestBuilder`, refer to the `Client` documentation. +#[derive(Debug)] +#[must_use = "RequestBuilder does nothing until you 'send' it"] +pub struct RequestBuilder { + client: Client, + request: crate::Result, +} + +impl Request { + /// Constructs a new request. + #[inline] + pub fn new(method: Method, url: Url) -> Self { + Request { + body: None, + inner: async_impl::Request::new(method, url), + } + } + + /// Get the method. + #[inline] + pub fn method(&self) -> &Method { + self.inner.method() + } + + /// Get a mutable reference to the method. + #[inline] + pub fn method_mut(&mut self) -> &mut Method { + self.inner.method_mut() + } + + /// Get the url. + #[inline] + pub fn url(&self) -> &Url { + self.inner.url() + } + + /// Get a mutable reference to the url. + #[inline] + pub fn url_mut(&mut self) -> &mut Url { + self.inner.url_mut() + } + + /// Get the headers. + #[inline] + pub fn headers(&self) -> &HeaderMap { + self.inner.headers() + } + + /// Get a mutable reference to the headers. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + self.inner.headers_mut() + } + + /// Get the http version. + #[inline] + pub fn version(&self) -> Version { + self.inner.version() + } + + /// Get a mutable reference to the http version. + #[inline] + pub fn version_mut(&mut self) -> &mut Version { + self.inner.version_mut() + } + + /// Get the body. + #[inline] + pub fn body(&self) -> Option<&Body> { + self.body.as_ref() + } + + /// Get a mutable reference to the body. + #[inline] + pub fn body_mut(&mut self) -> &mut Option { + &mut self.body + } + + /// Get the timeout. + #[inline] + pub fn timeout(&self) -> Option<&Duration> { + self.inner.timeout() + } + + /// Get a mutable reference to the timeout. + #[inline] + pub fn timeout_mut(&mut self) -> &mut Option { + self.inner.timeout_mut() + } + + /// Attempts to clone the `Request`. + /// + /// None is returned if a body is which can not be cloned. This can be because the body is a + /// stream. + pub fn try_clone(&self) -> Option { + let body = if let Some(ref body) = self.body.as_ref() { + if let Some(body) = body.try_clone() { + Some(body) + } else { + return None; + } + } else { + None + }; + let mut req = Request::new(self.method().clone(), self.url().clone()); + *req.timeout_mut() = self.timeout().copied(); + *req.headers_mut() = self.headers().clone(); + *req.version_mut() = self.version().clone(); + req.body = body; + Some(req) + } + + pub(crate) fn into_async(self) -> (async_impl::Request, Option) { + use crate::header::CONTENT_LENGTH; + + let mut req_async = self.inner; + let body = self.body.and_then(|body| { + let (tx, body, len) = body.into_async(); + if let Some(len) = len { + req_async.headers_mut().insert(CONTENT_LENGTH, len.into()); + } + *req_async.body_mut() = Some(body); + tx + }); + (req_async, body) + } +} + +impl RequestBuilder { + pub(crate) fn new(client: Client, request: crate::Result) -> RequestBuilder { + let mut builder = RequestBuilder { client, request }; + + let auth = builder + .request + .as_mut() + .ok() + .and_then(|req| async_impl::request::extract_authority(req.url_mut())); + + if let Some((username, password)) = auth { + builder.basic_auth(username, password) + } else { + builder + } + } + + /// Assemble a builder starting from an existing `Client` and a `Request`. + pub fn from_parts(client: Client, request: Request) -> RequestBuilder { + RequestBuilder { + client, + request: crate::Result::Ok(request), + } + } + + /// Add a `Header` to this Request. + /// + /// ```rust + /// use reqwest::header::USER_AGENT; + /// + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let res = client.get("https://www.rust-lang.org") + /// .header(USER_AGENT, "foo") + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn header(self, key: K, value: V) -> RequestBuilder + where + HeaderName: TryFrom, + HeaderValue: TryFrom, + >::Error: Into, + >::Error: Into, + { + self.header_sensitive(key, value, false) + } + + /// Add a `Header` to this Request with ability to define if header_value is sensitive. + fn header_sensitive(mut self, key: K, value: V, sensitive: bool) -> RequestBuilder + where + HeaderName: TryFrom, + HeaderValue: TryFrom, + >::Error: Into, + >::Error: Into, + { + let mut error = None; + if let Ok(ref mut req) = self.request { + match >::try_from(key) { + Ok(key) => match >::try_from(value) { + Ok(mut value) => { + // We want to potentially make an unsensitive header + // to be sensitive, not the reverse. So, don't turn off + // a previously sensitive header. + if sensitive { + value.set_sensitive(true); + } + req.headers_mut().append(key, value); + } + Err(e) => error = Some(crate::error::builder(e.into())), + }, + Err(e) => error = Some(crate::error::builder(e.into())), + }; + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Add a set of Headers to the existing ones on this Request. + /// + /// The headers will be merged in to any already set. + /// + /// ```rust + /// use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT, CONTENT_TYPE}; + /// # use std::fs; + /// + /// fn construct_headers() -> HeaderMap { + /// let mut headers = HeaderMap::new(); + /// headers.insert(USER_AGENT, HeaderValue::from_static("reqwest")); + /// headers.insert(CONTENT_TYPE, HeaderValue::from_static("image/png")); + /// headers + /// } + /// + /// # fn run() -> Result<(), Box> { + /// let file = fs::File::open("much_beauty.png")?; + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org/post") + /// .headers(construct_headers()) + /// .body(file) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn headers(mut self, headers: crate::header::HeaderMap) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + crate::util::replace_headers(req.headers_mut(), headers); + } + self + } + + /// Enable HTTP basic authentication. + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let resp = client.delete("http://httpbin.org/delete") + /// .basic_auth("admin", Some("good password")) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn basic_auth(self, username: U, password: Option

) -> RequestBuilder + where + U: fmt::Display, + P: fmt::Display, + { + let header_value = crate::util::basic_auth(username, password); + self.header_sensitive(crate::header::AUTHORIZATION, header_value, true) + } + + /// Enable HTTP bearer authentication. + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let resp = client.delete("http://httpbin.org/delete") + /// .bearer_auth("token") + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn bearer_auth(self, token: T) -> RequestBuilder + where + T: fmt::Display, + { + let header_value = format!("Bearer {token}"); + self.header_sensitive(crate::header::AUTHORIZATION, &*header_value, true) + } + + /// Set the request body. + /// + /// # Examples + /// + /// Using a string: + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org/post") + /// .body("from a &str!") + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// Using a `File`: + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let file = std::fs::File::open("from_a_file.txt")?; + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org/post") + /// .body(file) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// Using arbitrary bytes: + /// + /// ```rust + /// # use std::fs; + /// # fn run() -> Result<(), Box> { + /// // from bytes! + /// let bytes: Vec = vec![1, 10, 100]; + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org/post") + /// .body(bytes) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + pub fn body>(mut self, body: T) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.body_mut() = Some(body.into()); + } + self + } + + /// Enables a request timeout. + /// + /// The timeout is applied from when the request starts connecting until the + /// response body has finished. It affects only this request and overrides + /// the timeout configured using `ClientBuilder::timeout()`. + pub fn timeout(mut self, timeout: Duration) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.timeout_mut() = Some(timeout); + } + self + } + + /// Modify the query string of the URL. + /// + /// Modifies the URL of this request, adding the parameters provided. + /// This method appends and does not overwrite. This means that it can + /// be called multiple times and that existing query parameters are not + /// overwritten if the same key is used. The key will simply show up + /// twice in the query string. + /// Calling `.query(&[("foo", "a"), ("foo", "b")])` gives `"foo=a&foo=b"`. + /// + /// ```rust + /// # use reqwest::Error; + /// # + /// # fn run() -> Result<(), Error> { + /// let client = reqwest::blocking::Client::new(); + /// let res = client.get("http://httpbin.org") + /// .query(&[("lang", "rust")]) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Note + /// This method does not support serializing a single key-value + /// pair. Instead of using `.query(("key", "val"))`, use a sequence, such + /// as `.query(&[("key", "val")])`. It's also possible to serialize structs + /// and maps into a key-value pair. + /// + /// # Errors + /// This method will fail if the object you provide cannot be serialized + /// into a query string. + pub fn query(mut self, query: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + let url = req.url_mut(); + let mut pairs = url.query_pairs_mut(); + let serializer = serde_urlencoded::Serializer::new(&mut pairs); + + if let Err(err) = query.serialize(serializer) { + error = Some(crate::error::builder(err)); + } + } + if let Ok(ref mut req) = self.request { + if let Some("") = req.url().query() { + req.url_mut().set_query(None); + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Set HTTP version + pub fn version(mut self, version: Version) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.version_mut() = version; + } + self + } + + /// Send a form body. + /// + /// Sets the body to the url encoded serialization of the passed value, + /// and also sets the `Content-Type: application/x-www-form-urlencoded` + /// header. + /// + /// ```rust + /// # use reqwest::Error; + /// # use std::collections::HashMap; + /// # + /// # fn run() -> Result<(), Error> { + /// let mut params = HashMap::new(); + /// params.insert("lang", "rust"); + /// + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org") + /// .form(¶ms) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// This method fails if the passed value cannot be serialized into + /// url encoded format + pub fn form(mut self, form: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_urlencoded::to_string(form) { + Ok(body) => { + req.headers_mut() + .entry(CONTENT_TYPE) + .or_insert(HeaderValue::from_static( + "application/x-www-form-urlencoded", + )); + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Send a JSON body. + /// + /// Sets the body to the JSON serialization of the passed value, and + /// also sets the `Content-Type: application/json` header. + /// + /// # Optional + /// + /// This requires the optional `json` feature enabled. + /// + /// # Examples + /// + /// ```rust + /// # use reqwest::Error; + /// # use std::collections::HashMap; + /// # + /// # fn run() -> Result<(), Error> { + /// let mut map = HashMap::new(); + /// map.insert("lang", "rust"); + /// + /// let client = reqwest::blocking::Client::new(); + /// let res = client.post("http://httpbin.org") + /// .json(&map) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// + /// Serialization can fail if `T`'s implementation of `Serialize` decides to + /// fail, or if `T` contains a map with non-string keys. + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + pub fn json(mut self, json: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_json::to_vec(json) { + Ok(body) => { + if !req.headers().contains_key(CONTENT_TYPE) { + req.headers_mut() + .insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + } + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Sends a multipart/form-data body. + /// + /// ``` + /// # use reqwest::Error; + /// + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let form = reqwest::blocking::multipart::Form::new() + /// .text("key3", "value3") + /// .file("file", "/path/to/field")?; + /// + /// let response = client.post("your url") + /// .multipart(form) + /// .send()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// See [`multipart`](multipart/) for more examples. + #[cfg(feature = "multipart")] + #[cfg_attr(docsrs, doc(cfg(feature = "multipart")))] + pub fn multipart(self, mut multipart: multipart::Form) -> RequestBuilder { + let mut builder = self.header( + CONTENT_TYPE, + format!("multipart/form-data; boundary={}", multipart.boundary()).as_str(), + ); + if let Ok(ref mut req) = builder.request { + *req.body_mut() = Some(match multipart.compute_length() { + Some(length) => Body::sized(multipart.reader(), length), + None => Body::new(multipart.reader()), + }) + } + builder + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + pub fn build(self) -> crate::Result { + self.request + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + /// + /// This is similar to [`RequestBuilder::build()`], but also returns the + /// embedded `Client`. + pub fn build_split(self) -> (Client, crate::Result) { + (self.client, self.request) + } + + /// Constructs the Request and sends it the target URL, returning a Response. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request, + /// redirect loop was detected or redirect limit was exhausted. + pub fn send(self) -> crate::Result { + self.client.execute(self.request?) + } + + /// Attempts to clone the `RequestBuilder`. + /// + /// None is returned if a body is which can not be cloned. This can be because the body is a + /// stream. + /// + /// # Examples + /// + /// With a static body + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let builder = client.post("http://httpbin.org/post") + /// .body("from a &str!"); + /// let clone = builder.try_clone(); + /// assert!(clone.is_some()); + /// # Ok(()) + /// # } + /// ``` + /// + /// Without a body + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let builder = client.get("http://httpbin.org/get"); + /// let clone = builder.try_clone(); + /// assert!(clone.is_some()); + /// # Ok(()) + /// # } + /// ``` + /// + /// With a non-cloneable body + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::blocking::Client::new(); + /// let builder = client.get("http://httpbin.org/get") + /// .body(reqwest::blocking::Body::new(std::io::empty())); + /// let clone = builder.try_clone(); + /// assert!(clone.is_none()); + /// # Ok(()) + /// # } + /// ``` + pub fn try_clone(&self) -> Option { + self.request + .as_ref() + .ok() + .and_then(|req| req.try_clone()) + .map(|req| RequestBuilder { + client: self.client.clone(), + request: Ok(req), + }) + } +} + +impl TryFrom> for Request +where + T: Into, +{ + type Error = crate::Error; + + fn try_from(req: HttpRequest) -> crate::Result { + let (parts, body) = req.into_parts(); + let Parts { + method, + uri, + headers, + .. + } = parts; + let url = Url::parse(&uri.to_string()).map_err(crate::error::builder)?; + let mut inner = async_impl::Request::new(method, url); + crate::util::replace_headers(inner.headers_mut(), headers); + Ok(Request { + body: Some(body.into()), + inner, + }) + } +} + +impl fmt::Debug for Request { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt_request_fields(&mut f.debug_struct("Request"), self).finish() + } +} + +fn fmt_request_fields<'a, 'b>( + f: &'a mut fmt::DebugStruct<'a, 'b>, + req: &Request, +) -> &'a mut fmt::DebugStruct<'a, 'b> { + f.field("method", req.method()) + .field("url", req.url()) + .field("headers", req.headers()) +} + +#[cfg(test)] +mod tests { + use super::super::{body, Client}; + use super::{HttpRequest, Request, Version}; + use crate::header::{HeaderMap, HeaderValue, ACCEPT, CONTENT_TYPE, HOST}; + use crate::Method; + use serde::Serialize; + #[cfg(feature = "json")] + use serde_json; + use serde_urlencoded; + use std::collections::{BTreeMap, HashMap}; + use std::time::Duration; + + #[test] + fn basic_get_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.get(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::GET); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn basic_head_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.head(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::HEAD); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn basic_post_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::POST); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn basic_put_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.put(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::PUT); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn basic_patch_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.patch(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::PATCH); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn basic_delete_request() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.delete(some_url).build().unwrap(); + + assert_eq!(r.method(), &Method::DELETE); + assert_eq!(r.url().as_str(), some_url); + } + + #[test] + fn add_header() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let header = HeaderValue::from_static("google.com"); + + // Add a copy of the header to the request builder + let r = r.header(HOST, header.clone()).build().unwrap(); + + // then check it was actually added + assert_eq!(r.headers().get(HOST), Some(&header)); + } + + #[test] + fn add_headers() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let header = HeaderValue::from_static("google.com"); + + let mut headers = HeaderMap::new(); + headers.insert(HOST, header); + + // Add a copy of the headers to the request builder + let r = r.headers(headers.clone()).build().unwrap(); + + // then make sure they were added correctly + assert_eq!(r.headers(), &headers); + } + + #[test] + fn add_headers_multi() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let header_json = HeaderValue::from_static("application/json"); + let header_xml = HeaderValue::from_static("application/xml"); + + let mut headers = HeaderMap::new(); + headers.append(ACCEPT, header_json); + headers.append(ACCEPT, header_xml); + + // Add a copy of the headers to the request builder + let r = r.headers(headers.clone()).build().unwrap(); + + // then make sure they were added correctly + assert_eq!(r.headers(), &headers); + let mut all_values = r.headers().get_all(ACCEPT).iter(); + assert_eq!(all_values.next().unwrap(), &"application/json"); + assert_eq!(all_values.next().unwrap(), &"application/xml"); + assert_eq!(all_values.next(), None); + } + + #[test] + fn add_body() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let body = "Some interesting content"; + + let mut r = r.body(body).build().unwrap(); + + let buf = body::read_to_string(r.body_mut().take().unwrap()).unwrap(); + + assert_eq!(buf, body); + } + + #[test] + fn add_query_append() { + let client = Client::new(); + let some_url = "https://google.com/"; + let mut r = client.get(some_url); + + r = r.query(&[("foo", "bar")]); + r = r.query(&[("qux", 3)]); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=3")); + } + + #[test] + fn add_query_append_same() { + let client = Client::new(); + let some_url = "https://google.com/"; + let mut r = client.get(some_url); + + r = r.query(&[("foo", "a"), ("foo", "b")]); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=a&foo=b")); + } + + #[test] + fn add_query_struct() { + #[derive(Serialize)] + struct Params { + foo: String, + qux: i32, + } + + let client = Client::new(); + let some_url = "https://google.com/"; + let mut r = client.get(some_url); + + let params = Params { + foo: "bar".into(), + qux: 3, + }; + + r = r.query(¶ms); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=3")); + } + + #[test] + fn add_query_map() { + let mut params = BTreeMap::new(); + params.insert("foo", "bar"); + params.insert("qux", "three"); + + let client = Client::new(); + let some_url = "https://google.com/"; + let mut r = client.get(some_url); + + r = r.query(¶ms); + + let req = r.build().expect("request is valid"); + assert_eq!(req.url().query(), Some("foo=bar&qux=three")); + } + + #[test] + fn add_form() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let mut form_data = HashMap::new(); + form_data.insert("foo", "bar"); + + let mut r = r.form(&form_data).build().unwrap(); + + // Make sure the content type was set + assert_eq!( + r.headers().get(CONTENT_TYPE).unwrap(), + &"application/x-www-form-urlencoded" + ); + + let buf = body::read_to_string(r.body_mut().take().unwrap()).unwrap(); + + let body_should_be = serde_urlencoded::to_string(&form_data).unwrap(); + assert_eq!(buf, body_should_be); + } + + #[test] + #[cfg(feature = "json")] + fn add_json() { + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + + let mut json_data = HashMap::new(); + json_data.insert("foo", "bar"); + + let mut r = r.json(&json_data).build().unwrap(); + + // Make sure the content type was set + assert_eq!(r.headers().get(CONTENT_TYPE).unwrap(), &"application/json"); + + let buf = body::read_to_string(r.body_mut().take().unwrap()).unwrap(); + + let body_should_be = serde_json::to_string(&json_data).unwrap(); + assert_eq!(buf, body_should_be); + } + + #[test] + #[cfg(feature = "json")] + fn add_json_fail() { + use serde::ser::Error as _; + use serde::{Serialize, Serializer}; + use std::error::Error as _; + struct MyStruct; + impl Serialize for MyStruct { + fn serialize(&self, _serializer: S) -> Result + where + S: Serializer, + { + Err(S::Error::custom("nope")) + } + } + + let client = Client::new(); + let some_url = "https://google.com/"; + let r = client.post(some_url); + let json_data = MyStruct; + let err = r.json(&json_data).build().unwrap_err(); + assert!(err.is_builder()); // well, duh ;) + assert!(err.source().unwrap().is::()); + } + + #[test] + fn test_replace_headers() { + use http::HeaderMap; + + let mut headers = HeaderMap::new(); + headers.insert("foo", "bar".parse().unwrap()); + headers.append("foo", "baz".parse().unwrap()); + + let client = Client::new(); + let req = client + .get("https://hyper.rs") + .header("im-a", "keeper") + .header("foo", "pop me") + .headers(headers) + .build() + .expect("request build"); + + assert_eq!(req.headers()["im-a"], "keeper"); + + let foo = req.headers().get_all("foo").iter().collect::>(); + assert_eq!(foo.len(), 2); + assert_eq!(foo[0], "bar"); + assert_eq!(foo[1], "baz"); + } + + #[test] + fn normalize_empty_query() { + let client = Client::new(); + let some_url = "https://google.com/"; + let empty_query: &[(&str, &str)] = &[]; + + let req = client + .get(some_url) + .query(empty_query) + .build() + .expect("request build"); + + assert_eq!(req.url().query(), None); + assert_eq!(req.url().as_str(), "https://google.com/"); + } + + #[test] + fn convert_url_authority_into_basic_auth() { + let client = Client::new(); + let some_url = "https://Aladdin:open sesame@localhost/"; + + let req = client.get(some_url).build().expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!( + req.headers()["authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + } + + #[test] + fn convert_from_http_request() { + let http_request = HttpRequest::builder() + .method("GET") + .uri("http://localhost/") + .header("User-Agent", "my-awesome-agent/1.0") + .body("test test test") + .unwrap(); + let req: Request = Request::try_from(http_request).unwrap(); + assert_eq!(req.body().is_none(), false); + let test_data = b"test test test"; + assert_eq!(req.body().unwrap().as_bytes(), Some(&test_data[..])); + let headers = req.headers(); + assert_eq!(headers.get("User-Agent").unwrap(), "my-awesome-agent/1.0"); + assert_eq!(req.method(), Method::GET); + assert_eq!(req.url().as_str(), "http://localhost/"); + } + + #[test] + fn set_http_request_version() { + let http_request = HttpRequest::builder() + .method("GET") + .uri("http://localhost/") + .header("User-Agent", "my-awesome-agent/1.0") + .version(Version::HTTP_11) + .body("test test test") + .unwrap(); + let req: Request = Request::try_from(http_request).unwrap(); + assert_eq!(req.body().is_none(), false); + let test_data = b"test test test"; + assert_eq!(req.body().unwrap().as_bytes(), Some(&test_data[..])); + let headers = req.headers(); + assert_eq!(headers.get("User-Agent").unwrap(), "my-awesome-agent/1.0"); + assert_eq!(req.method(), Method::GET); + assert_eq!(req.url().as_str(), "http://localhost/"); + assert_eq!(req.version(), Version::HTTP_11); + } + + #[test] + fn test_basic_auth_sensitive_header() { + let client = Client::new(); + let some_url = "https://localhost/"; + + let req = client + .get(some_url) + .basic_auth("Aladdin", Some("open sesame")) + .build() + .expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!( + req.headers()["authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + assert_eq!(req.headers()["authorization"].is_sensitive(), true); + } + + #[test] + fn test_bearer_auth_sensitive_header() { + let client = Client::new(); + let some_url = "https://localhost/"; + + let req = client + .get(some_url) + .bearer_auth("Hold my bear") + .build() + .expect("request build"); + + assert_eq!(req.url().as_str(), "https://localhost/"); + assert_eq!(req.headers()["authorization"], "Bearer Hold my bear"); + assert_eq!(req.headers()["authorization"].is_sensitive(), true); + } + + #[test] + fn test_request_cloning() { + let mut request = Request::new(Method::GET, "https://example.com".try_into().unwrap()); + *request.timeout_mut() = Some(Duration::from_secs(42)); + *request.version_mut() = Version::HTTP_11; + + let clone = request.try_clone().unwrap(); + assert_eq!(request.version(), clone.version()); + assert_eq!(request.headers(), clone.headers()); + assert_eq!(request.timeout(), clone.timeout()); + } +} diff --git a/rust/reqwest/src/blocking/response.rs b/rust/reqwest/src/blocking/response.rs new file mode 100644 index 0000000000..86c81772c8 --- /dev/null +++ b/rust/reqwest/src/blocking/response.rs @@ -0,0 +1,445 @@ +use std::fmt; +use std::io::{self, Read}; +use std::mem; +use std::net::SocketAddr; +use std::pin::Pin; +use std::time::Duration; + +use bytes::Bytes; +use http; +use hyper::header::HeaderMap; +#[cfg(feature = "json")] +use serde::de::DeserializeOwned; + +use super::client::KeepCoreThreadAlive; +use super::wait; +#[cfg(feature = "cookies")] +use crate::cookie; +use crate::{async_impl, StatusCode, Url, Version}; + +/// A Response to a submitted `Request`. +pub struct Response { + inner: async_impl::Response, + body: Option>>, + timeout: Option, + _thread_handle: KeepCoreThreadAlive, +} + +impl fmt::Debug for Response { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.inner, f) + } +} + +impl Response { + pub(crate) fn new( + res: async_impl::Response, + timeout: Option, + thread: KeepCoreThreadAlive, + ) -> Response { + Response { + inner: res, + body: None, + timeout, + _thread_handle: thread, + } + } + + /// Get the `StatusCode` of this `Response`. + /// + /// # Examples + /// + /// Checking for general status class: + /// + /// ```rust + /// # #[cfg(feature = "json")] + /// # fn run() -> Result<(), Box> { + /// let resp = reqwest::blocking::get("http://httpbin.org/get")?; + /// if resp.status().is_success() { + /// println!("success!"); + /// } else if resp.status().is_server_error() { + /// println!("server error!"); + /// } else { + /// println!("Something else happened. Status: {:?}", resp.status()); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Checking for specific status codes: + /// + /// ```rust + /// use reqwest::blocking::Client; + /// use reqwest::StatusCode; + /// # fn run() -> Result<(), Box> { + /// let client = Client::new(); + /// + /// let resp = client.post("http://httpbin.org/post") + /// .body("possibly too large") + /// .send()?; + /// + /// match resp.status() { + /// StatusCode::OK => println!("success!"), + /// StatusCode::PAYLOAD_TOO_LARGE => { + /// println!("Request payload is too large!"); + /// } + /// s => println!("Received response status: {s:?}"), + /// }; + /// # Ok(()) + /// # } + /// ``` + #[inline] + pub fn status(&self) -> StatusCode { + self.inner.status() + } + + /// Get the `Headers` of this `Response`. + /// + /// # Example + /// + /// Saving an etag when caching a file: + /// + /// ``` + /// use reqwest::blocking::Client; + /// use reqwest::header::ETAG; + /// + /// # fn run() -> Result<(), Box> { + /// let client = Client::new(); + /// + /// let mut resp = client.get("http://httpbin.org/cache").send()?; + /// if resp.status().is_success() { + /// if let Some(etag) = resp.headers().get(ETAG) { + /// std::fs::write("etag", etag.as_bytes()); + /// } + /// let mut file = std::fs::File::create("file")?; + /// resp.copy_to(&mut file)?; + /// } + /// # Ok(()) + /// # } + /// ``` + #[inline] + pub fn headers(&self) -> &HeaderMap { + self.inner.headers() + } + + /// Get a mutable reference to the `Headers` of this `Response`. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + self.inner.headers_mut() + } + + /// Retrieve the cookies contained in the response. + /// + /// Note that invalid 'Set-Cookie' headers will be ignored. + /// + /// # Optional + /// + /// This requires the optional `cookies` feature to be enabled. + #[cfg(feature = "cookies")] + #[cfg_attr(docsrs, doc(cfg(feature = "cookies")))] + pub fn cookies<'a>(&'a self) -> impl Iterator> + 'a { + cookie::extract_response_cookies(self.headers()).filter_map(Result::ok) + } + + /// Get the HTTP `Version` of this `Response`. + #[inline] + pub fn version(&self) -> Version { + self.inner.version() + } + + /// Get the final `Url` of this `Response`. + /// + /// # Example + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let resp = reqwest::blocking::get("http://httpbin.org/redirect/1")?; + /// assert_eq!(resp.url().as_str(), "http://httpbin.org/get"); + /// # Ok(()) + /// # } + /// ``` + #[inline] + pub fn url(&self) -> &Url { + self.inner.url() + } + + /// Get the remote address used to get this `Response`. + /// + /// # Example + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let resp = reqwest::blocking::get("http://httpbin.org/redirect/1")?; + /// println!("httpbin.org address: {:?}", resp.remote_addr()); + /// # Ok(()) + /// # } + /// ``` + pub fn remote_addr(&self) -> Option { + self.inner.remote_addr() + } + + /// Returns a reference to the associated extensions. + pub fn extensions(&self) -> &http::Extensions { + self.inner.extensions() + } + + /// Returns a mutable reference to the associated extensions. + pub fn extensions_mut(&mut self) -> &mut http::Extensions { + self.inner.extensions_mut() + } + + /// Get the content length of the response, if it is known. + /// + /// + /// This value does not directly represents the value of the `Content-Length` + /// header, but rather the size of the response's body. To read the header's + /// value, please use the [`Response::headers`] method instead. + /// + /// Reasons it may not be known: + /// + /// - The response does not include a body (e.g. it responds to a `HEAD` + /// request). + /// - The response is gzipped and automatically decoded (thus changing the + /// actual decoded length). + pub fn content_length(&self) -> Option { + self.inner.content_length() + } + + /// Try and deserialize the response body as JSON using `serde`. + /// + /// # Optional + /// + /// This requires the optional `json` feature enabled. + /// + /// # Examples + /// + /// ```rust + /// # extern crate reqwest; + /// # extern crate serde; + /// # + /// # use reqwest::Error; + /// # use serde::Deserialize; + /// # + /// // This `derive` requires the `serde` dependency. + /// #[derive(Deserialize)] + /// struct Ip { + /// origin: String, + /// } + /// + /// # fn run() -> Result<(), Error> { + /// let json: Ip = reqwest::blocking::get("http://httpbin.org/ip")?.json()?; + /// # Ok(()) + /// # } + /// # + /// # fn main() { } + /// ``` + /// + /// # Errors + /// + /// This method fails whenever the response body is not in JSON format, + /// or it cannot be properly deserialized to target type `T`. For more + /// details please see [`serde_json::from_reader`]. + /// + /// [`serde_json::from_reader`]: https://docs.serde.rs/serde_json/fn.from_reader.html + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + pub fn json(self) -> crate::Result { + wait::timeout(self.inner.json(), self.timeout).map_err(|e| match e { + wait::Waited::TimedOut(e) => crate::error::decode(e), + wait::Waited::Inner(e) => e, + }) + } + + /// Get the full response body as `Bytes`. + /// + /// # Example + /// + /// ``` + /// # fn run() -> Result<(), Box> { + /// let bytes = reqwest::blocking::get("http://httpbin.org/ip")?.bytes()?; + /// + /// println!("bytes: {bytes:?}"); + /// # Ok(()) + /// # } + /// ``` + pub fn bytes(self) -> crate::Result { + wait::timeout(self.inner.bytes(), self.timeout).map_err(|e| match e { + wait::Waited::TimedOut(e) => crate::error::decode(e), + wait::Waited::Inner(e) => e, + }) + } + + /// Get the response text. + /// + /// This method decodes the response body with BOM sniffing + /// and with malformed sequences replaced with the [`char::REPLACEMENT_CHARACTER`]. + /// Encoding is determined from the `charset` parameter of `Content-Type` header, + /// and defaults to `utf-8` if not presented. + /// + /// # Note + /// + /// If the `charset` feature is disabled the method will only attempt to decode the + /// response as UTF-8, regardless of the given `Content-Type` + /// + /// # Example + /// + /// ```rust + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let content = reqwest::blocking::get("http://httpbin.org/range/26")?.text()?; + /// # Ok(()) + /// # } + /// ``` + pub fn text(self) -> crate::Result { + wait::timeout(self.inner.text(), self.timeout).map_err(|e| match e { + wait::Waited::TimedOut(e) => crate::error::decode(e), + wait::Waited::Inner(e) => e, + }) + } + + /// Get the response text given a specific encoding. + /// + /// This method decodes the response body with BOM sniffing + /// and with malformed sequences replaced with the [`char::REPLACEMENT_CHARACTER`]. + /// You can provide a default encoding for decoding the raw message, while the + /// `charset` parameter of `Content-Type` header is still prioritized. For more information + /// about the possible encoding name, please go to [`encoding_rs`] docs. + /// + /// [`encoding_rs`]: https://docs.rs/encoding_rs/0.8/encoding_rs/#relationship-with-windows-code-pages + /// + /// # Optional + /// + /// This requires the optional `charset` feature enabled. + /// + /// # Example + /// + /// ```rust + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let content = reqwest::blocking::get("http://httpbin.org/range/26")? + /// .text_with_charset("utf-8")?; + /// # Ok(()) + /// # } + /// ``` + #[cfg(feature = "charset")] + #[cfg_attr(docsrs, doc(cfg(feature = "charset")))] + pub fn text_with_charset(self, default_encoding: &str) -> crate::Result { + wait::timeout(self.inner.text_with_charset(default_encoding), self.timeout).map_err(|e| { + match e { + wait::Waited::TimedOut(e) => crate::error::decode(e), + wait::Waited::Inner(e) => e, + } + }) + } + + /// Copy the response body into a writer. + /// + /// This function internally uses [`std::io::copy`] and hence will continuously read data from + /// the body and then write it into writer in a streaming fashion until EOF is met. + /// + /// On success, the total number of bytes that were copied to `writer` is returned. + /// + /// [`std::io::copy`]: https://doc.rust-lang.org/std/io/fn.copy.html + /// + /// # Example + /// + /// ```rust + /// # fn run() -> Result<(), Box> { + /// let mut resp = reqwest::blocking::get("http://httpbin.org/range/5")?; + /// let mut buf: Vec = vec![]; + /// resp.copy_to(&mut buf)?; + /// assert_eq!(b"abcde", buf.as_slice()); + /// # Ok(()) + /// # } + /// ``` + pub fn copy_to(&mut self, w: &mut W) -> crate::Result + where + W: io::Write, + { + io::copy(self, w).map_err(crate::error::decode_io) + } + + /// Turn a response into an error if the server returned an error. + /// + /// # Example + /// + /// ```rust,no_run + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let res = reqwest::blocking::get("http://httpbin.org/status/400")? + /// .error_for_status(); + /// if let Err(err) = res { + /// assert_eq!(err.status(), Some(reqwest::StatusCode::BAD_REQUEST)); + /// } + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn error_for_status(self) -> crate::Result { + let Response { + body, + inner, + timeout, + _thread_handle, + } = self; + inner.error_for_status().map(move |inner| Response { + inner, + body, + timeout, + _thread_handle, + }) + } + + /// Turn a reference to a response into an error if the server returned an error. + /// + /// # Example + /// + /// ```rust,no_run + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let res = reqwest::blocking::get("http://httpbin.org/status/400")?; + /// let res = res.error_for_status_ref(); + /// if let Err(err) = res { + /// assert_eq!(err.status(), Some(reqwest::StatusCode::BAD_REQUEST)); + /// } + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn error_for_status_ref(&self) -> crate::Result<&Self> { + self.inner.error_for_status_ref().and_then(|_| Ok(self)) + } + + // private + + fn body_mut(&mut self) -> Pin<&mut dyn futures_util::io::AsyncRead> { + use futures_util::TryStreamExt; + if self.body.is_none() { + let body = mem::replace(self.inner.body_mut(), async_impl::Decoder::empty()); + + let body = body.into_stream().into_async_read(); + + self.body = Some(Box::pin(body)); + } + self.body.as_mut().expect("body was init").as_mut() + } +} + +impl Read for Response { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + use futures_util::io::AsyncReadExt; + + let timeout = self.timeout; + wait::timeout(self.body_mut().read(buf), timeout).map_err(|e| match e { + wait::Waited::TimedOut(e) => crate::error::decode(e).into_io(), + wait::Waited::Inner(e) => e, + }) + } +} + +impl> From> for Response { + fn from(r: http::Response) -> Response { + let response = async_impl::Response::from(r); + Response::new(response, None, KeepCoreThreadAlive::empty()) + } +} diff --git a/rust/reqwest/src/blocking/wait.rs b/rust/reqwest/src/blocking/wait.rs new file mode 100644 index 0000000000..7a61c2e27c --- /dev/null +++ b/rust/reqwest/src/blocking/wait.rs @@ -0,0 +1,82 @@ +use std::future::Future; +use std::sync::Arc; +use std::task::{Context, Poll, Wake, Waker}; +use std::thread::{self, Thread}; +use std::time::Duration; + +use tokio::time::Instant; + +pub(crate) fn timeout(fut: F, timeout: Option) -> Result> +where + F: Future>, +{ + enter(); + + let deadline = timeout.map(|d| { + log::trace!("wait at most {d:?}"); + Instant::now() + d + }); + + let thread = ThreadWaker(thread::current()); + // Arc shouldn't be necessary, since `Thread` is reference counted internally, + // but let's just stay safe for now. + let waker = Waker::from(Arc::new(thread)); + let mut cx = Context::from_waker(&waker); + + futures_util::pin_mut!(fut); + + loop { + match fut.as_mut().poll(&mut cx) { + Poll::Ready(Ok(val)) => return Ok(val), + Poll::Ready(Err(err)) => return Err(Waited::Inner(err)), + Poll::Pending => (), // fallthrough + } + + if let Some(deadline) = deadline { + let now = Instant::now(); + if now >= deadline { + log::trace!("wait timeout exceeded"); + return Err(Waited::TimedOut(crate::error::TimedOut)); + } + + log::trace!( + "({:?}) park timeout {:?}", + thread::current().id(), + deadline - now + ); + thread::park_timeout(deadline - now); + } else { + log::trace!("({:?}) park without timeout", thread::current().id()); + thread::park(); + } + } +} + +#[derive(Debug)] +pub(crate) enum Waited { + TimedOut(crate::error::TimedOut), + Inner(E), +} + +struct ThreadWaker(Thread); + +impl Wake for ThreadWaker { + fn wake(self: Arc) { + self.wake_by_ref(); + } + + fn wake_by_ref(self: &Arc) { + self.0.unpark(); + } +} + +fn enter() { + // Check we aren't already in a runtime + #[cfg(debug_assertions)] + { + let _enter = tokio::runtime::Builder::new_current_thread() + .build() + .expect("build shell runtime") + .enter(); + } +} diff --git a/rust/reqwest/src/config.rs b/rust/reqwest/src/config.rs new file mode 100644 index 0000000000..417c8076df --- /dev/null +++ b/rust/reqwest/src/config.rs @@ -0,0 +1,110 @@ +//! The `config` module provides a generic mechanism for loading and managing +//! request-scoped configuration. +//! +//! # Design Overview +//! +//! This module is centered around two abstractions: +//! +//! - The [`RequestConfigValue`] trait, used to associate a config key type with its value type. +//! - The [`RequestConfig`] struct, which wraps an optional value of the type linked via [`RequestConfigValue`]. +//! +//! Under the hood, the [`RequestConfig`] struct holds a single value for the associated config type. +//! This value can be conveniently accessed, inserted, or mutated using [`http::Extensions`], +//! enabling type-safe configuration storage and retrieval on a per-request basis. +//! +//! # Motivation +//! +//! The key design benefit is the ability to store multiple config types—potentially even with the same +//! value type (e.g., [`Duration`])—without code duplication or ambiguity. By leveraging trait association, +//! each config key is distinct at the type level, while code for storage and access remains totally generic. +//! +//! # Usage +//! +//! Implement [`RequestConfigValue`] for any marker type you wish to use as a config key, +//! specifying the associated value type. Then use [`RequestConfig`] in [`Extensions`] +//! to set or retrieve config values for each key type in a uniform way. + +use std::any::type_name; +use std::fmt::Debug; +use std::time::Duration; + +use http::Extensions; + +/// This trait is empty and is only used to associate a configuration key type with its +/// corresponding value type. +pub(crate) trait RequestConfigValue: Copy + Clone + 'static { + type Value: Clone + Debug + Send + Sync + 'static; +} + +/// RequestConfig carries a request-scoped configuration value. +#[derive(Clone, Copy)] +pub(crate) struct RequestConfig(Option); + +impl Default for RequestConfig { + fn default() -> Self { + RequestConfig(None) + } +} + +impl RequestConfig +where + T: RequestConfigValue, +{ + pub(crate) fn new(v: Option) -> Self { + RequestConfig(v) + } + + /// format request config value as struct field. + /// + /// We provide this API directly to avoid leak internal value to callers. + pub(crate) fn fmt_as_field(&self, f: &mut std::fmt::DebugStruct<'_, '_>) { + if let Some(v) = &self.0 { + f.field(type_name::(), v); + } + } + + /// Retrieve the value from the request-scoped configuration. + /// + /// If the request specifies a value, use that value; otherwise, attempt to retrieve it from the current instance (typically a client instance). + pub(crate) fn fetch<'client, 'request>( + &'client self, + ext: &'request Extensions, + ) -> Option<&'request T::Value> + where + 'client: 'request, + { + ext.get::>() + .and_then(|v| v.0.as_ref()) + .or(self.0.as_ref()) + } + + /// Retrieve the value from the request's Extensions. + pub(crate) fn get(ext: &Extensions) -> Option<&T::Value> { + ext.get::>().and_then(|v| v.0.as_ref()) + } + + /// Retrieve the mutable value from the request's Extensions. + pub(crate) fn get_mut(ext: &mut Extensions) -> &mut Option { + let cfg = ext.get_or_insert_default::>(); + &mut cfg.0 + } +} + +// ================================ +// +// The following sections are all configuration types +// provided by reqwest. +// +// To add a new config: +// +// 1. create a new struct for the config key like `RequestTimeout`. +// 2. implement `RequestConfigValue` for the struct, the `Value` is the config value's type. +// +// ================================ + +#[derive(Clone, Copy)] +pub(crate) struct TotalTimeout; + +impl RequestConfigValue for TotalTimeout { + type Value = Duration; +} diff --git a/rust/reqwest/src/connect.rs b/rust/reqwest/src/connect.rs new file mode 100644 index 0000000000..ca52dd5e05 --- /dev/null +++ b/rust/reqwest/src/connect.rs @@ -0,0 +1,1816 @@ +#[cfg(feature = "__tls")] +use http::header::HeaderValue; +#[cfg(feature = "__tls")] +use http::uri::Scheme; +use http::Uri; +use hyper::rt::{Read, ReadBufCursor, Write}; +use hyper_util::client::legacy::connect::{Connected, Connection}; +#[cfg(any(feature = "socks", feature = "__tls", unix))] +use hyper_util::rt::TokioIo; +#[cfg(feature = "default-tls")] +use native_tls_crate::{TlsConnector, TlsConnectorBuilder}; +use pin_project_lite::pin_project; +use tower::util::{BoxCloneSyncServiceLayer, MapRequestLayer}; +use tower::{timeout::TimeoutLayer, util::BoxCloneSyncService, ServiceBuilder}; +use tower_service::Service; + +use std::future::Future; +use std::io::{self, IoSlice}; +use std::net::IpAddr; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::Duration; + +#[cfg(feature = "default-tls")] +use self::native_tls_conn::NativeTlsConn; +#[cfg(feature = "__rustls")] +use self::rustls_tls_conn::RustlsTlsConn; +use crate::dns::DynResolver; +use crate::error::{cast_to_internal_error, BoxError}; +use crate::proxy::{Intercepted, Matcher as ProxyMatcher}; +use sealed::{Conn, Unnameable}; + +pub(crate) type HttpConnector = hyper_util::client::legacy::connect::HttpConnector; + +#[derive(Clone)] +pub(crate) enum Connector { + // base service, with or without an embedded timeout + Simple(ConnectorService), + // at least one custom layer along with maybe an outer timeout layer + // from `builder.connect_timeout()` + WithLayers(BoxCloneSyncService), +} + +impl Service for Connector { + type Response = Conn; + type Error = BoxError; + type Future = Connecting; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + match self { + Connector::Simple(service) => service.poll_ready(cx), + Connector::WithLayers(service) => service.poll_ready(cx), + } + } + + fn call(&mut self, dst: Uri) -> Self::Future { + match self { + Connector::Simple(service) => service.call(dst), + Connector::WithLayers(service) => service.call(Unnameable(dst)), + } + } +} + +pub(crate) type BoxedConnectorService = BoxCloneSyncService; + +pub(crate) type BoxedConnectorLayer = + BoxCloneSyncServiceLayer; + +pub(crate) struct ConnectorBuilder { + inner: Inner, + proxies: Arc>, + verbose: verbose::Wrapper, + timeout: Option, + #[cfg(feature = "__tls")] + nodelay: bool, + #[cfg(feature = "__tls")] + tls_info: bool, + #[cfg(feature = "__tls")] + user_agent: Option, + #[cfg(feature = "socks")] + resolver: Option, + #[cfg(unix)] + unix_socket: Option>, +} + +impl ConnectorBuilder { + pub(crate) fn build(self, layers: Vec) -> Connector +where { + // construct the inner tower service + let mut base_service = ConnectorService { + inner: self.inner, + proxies: self.proxies, + verbose: self.verbose, + #[cfg(feature = "__tls")] + nodelay: self.nodelay, + #[cfg(feature = "__tls")] + tls_info: self.tls_info, + #[cfg(feature = "__tls")] + user_agent: self.user_agent, + simple_timeout: None, + #[cfg(feature = "socks")] + resolver: self.resolver.unwrap_or_else(DynResolver::gai), + #[cfg(unix)] + unix_socket: self.unix_socket, + }; + + #[cfg(unix)] + if base_service.unix_socket.is_some() && !base_service.proxies.is_empty() { + base_service.proxies = Default::default(); + log::trace!("unix_socket() set, proxies are ignored"); + } + + if layers.is_empty() { + // we have no user-provided layers, only use concrete types + base_service.simple_timeout = self.timeout; + return Connector::Simple(base_service); + } + + // otherwise we have user provided layers + // so we need type erasure all the way through + // as well as mapping the unnameable type of the layers back to Uri for the inner service + let unnameable_service = ServiceBuilder::new() + .layer(MapRequestLayer::new(|request: Unnameable| request.0)) + .service(base_service); + let mut service = BoxCloneSyncService::new(unnameable_service); + + for layer in layers { + service = ServiceBuilder::new().layer(layer).service(service); + } + + // now we handle the concrete stuff - any `connect_timeout`, + // plus a final map_err layer we can use to cast default tower layer + // errors to internal errors + match self.timeout { + Some(timeout) => { + let service = ServiceBuilder::new() + .layer(TimeoutLayer::new(timeout)) + .service(service); + let service = ServiceBuilder::new() + .map_err(|error: BoxError| cast_to_internal_error(error)) + .service(service); + let service = BoxCloneSyncService::new(service); + + Connector::WithLayers(service) + } + None => { + // no timeout, but still map err + // no named timeout layer but we still map errors since + // we might have user-provided timeout layer + let service = ServiceBuilder::new().service(service); + let service = ServiceBuilder::new() + .map_err(|error: BoxError| cast_to_internal_error(error)) + .service(service); + let service = BoxCloneSyncService::new(service); + Connector::WithLayers(service) + } + } + } + + #[cfg(not(feature = "__tls"))] + pub(crate) fn new( + mut http: HttpConnector, + proxies: Arc>, + local_addr: T, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: Option<&str>, + nodelay: bool, + ) -> ConnectorBuilder + where + T: Into>, + { + http.set_local_address(local_addr.into()); + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + if let Some(interface) = interface { + http.set_interface(interface.to_owned()); + } + http.set_nodelay(nodelay); + + ConnectorBuilder { + inner: Inner::Http(http), + proxies, + verbose: verbose::OFF, + timeout: None, + #[cfg(feature = "socks")] + resolver: None, + #[cfg(unix)] + unix_socket: None, + } + } + + #[cfg(feature = "default-tls")] + pub(crate) fn new_default_tls( + http: HttpConnector, + tls: TlsConnectorBuilder, + proxies: Arc>, + user_agent: Option, + local_addr: T, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: Option<&str>, + nodelay: bool, + tls_info: bool, + ) -> crate::Result + where + T: Into>, + { + let tls = tls.build().map_err(crate::error::builder)?; + Ok(Self::from_built_default_tls( + http, + tls, + proxies, + user_agent, + local_addr, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface, + nodelay, + tls_info, + )) + } + + #[cfg(feature = "default-tls")] + pub(crate) fn from_built_default_tls( + mut http: HttpConnector, + tls: TlsConnector, + proxies: Arc>, + user_agent: Option, + local_addr: T, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: Option<&str>, + nodelay: bool, + tls_info: bool, + ) -> ConnectorBuilder + where + T: Into>, + { + http.set_local_address(local_addr.into()); + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + if let Some(interface) = interface { + http.set_interface(interface); + } + http.set_nodelay(nodelay); + http.enforce_http(false); + + ConnectorBuilder { + inner: Inner::DefaultTls(http, tls), + proxies, + verbose: verbose::OFF, + nodelay, + tls_info, + user_agent, + timeout: None, + #[cfg(feature = "socks")] + resolver: None, + #[cfg(unix)] + unix_socket: None, + } + } + + #[cfg(feature = "__rustls")] + pub(crate) fn new_rustls_tls( + mut http: HttpConnector, + tls: rustls::ClientConfig, + proxies: Arc>, + user_agent: Option, + local_addr: T, + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + interface: Option<&str>, + nodelay: bool, + tls_info: bool, + ) -> ConnectorBuilder + where + T: Into>, + { + http.set_local_address(local_addr.into()); + #[cfg(any( + target_os = "android", + target_os = "fuchsia", + target_os = "illumos", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "solaris", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + if let Some(interface) = interface { + http.set_interface(interface.to_owned()); + } + http.set_nodelay(nodelay); + http.enforce_http(false); + + let (tls, tls_proxy) = if proxies.is_empty() { + let tls = Arc::new(tls); + (tls.clone(), tls) + } else { + let mut tls_proxy = tls.clone(); + tls_proxy.alpn_protocols.clear(); + (Arc::new(tls), Arc::new(tls_proxy)) + }; + + ConnectorBuilder { + inner: Inner::RustlsTls { + http, + tls, + tls_proxy, + }, + proxies, + verbose: verbose::OFF, + nodelay, + tls_info, + user_agent, + timeout: None, + #[cfg(feature = "socks")] + resolver: None, + #[cfg(unix)] + unix_socket: None, + } + } + + pub(crate) fn set_timeout(&mut self, timeout: Option) { + self.timeout = timeout; + } + + pub(crate) fn set_verbose(&mut self, enabled: bool) { + self.verbose.0 = enabled; + } + + pub(crate) fn set_keepalive(&mut self, dur: Option) { + match &mut self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, _tls) => http.set_keepalive(dur), + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, .. } => http.set_keepalive(dur), + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => http.set_keepalive(dur), + } + } + + pub(crate) fn set_keepalive_interval(&mut self, dur: Option) { + match &mut self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, _tls) => http.set_keepalive_interval(dur), + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, .. } => http.set_keepalive_interval(dur), + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => http.set_keepalive_interval(dur), + } + } + + pub(crate) fn set_keepalive_retries(&mut self, retries: Option) { + match &mut self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, _tls) => http.set_keepalive_retries(retries), + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, .. } => http.set_keepalive_retries(retries), + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => http.set_keepalive_retries(retries), + } + } + + #[cfg(feature = "socks")] + pub(crate) fn set_socks_resolver(&mut self, resolver: DynResolver) { + self.resolver = Some(resolver); + } + + #[cfg(any(target_os = "android", target_os = "fuchsia", target_os = "linux"))] + pub(crate) fn set_tcp_user_timeout(&mut self, dur: Option) { + match &mut self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, _tls) => http.set_tcp_user_timeout(dur), + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, .. } => http.set_tcp_user_timeout(dur), + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => http.set_tcp_user_timeout(dur), + } + } + + #[cfg(unix)] + pub(crate) fn set_unix_socket(&mut self, path: Option>) { + self.unix_socket = path; + } +} + +#[allow(missing_debug_implementations)] +#[derive(Clone)] +pub(crate) struct ConnectorService { + inner: Inner, + proxies: Arc>, + verbose: verbose::Wrapper, + /// When there is a single timeout layer and no other layers, + /// we embed it directly inside our base Service::call(). + /// This lets us avoid an extra `Box::pin` indirection layer + /// since `tokio::time::Timeout` is `Unpin` + simple_timeout: Option, + #[cfg(feature = "__tls")] + nodelay: bool, + #[cfg(feature = "__tls")] + tls_info: bool, + #[cfg(feature = "__tls")] + user_agent: Option, + #[cfg(feature = "socks")] + resolver: DynResolver, + /// If set, this always takes priority over TCP. + #[cfg(unix)] + unix_socket: Option>, +} + +#[derive(Clone)] +enum Inner { + #[cfg(not(feature = "__tls"))] + Http(HttpConnector), + #[cfg(feature = "default-tls")] + DefaultTls(HttpConnector, TlsConnector), + #[cfg(feature = "__rustls")] + RustlsTls { + http: HttpConnector, + tls: Arc, + tls_proxy: Arc, + }, +} + +impl Inner { + #[cfg(feature = "socks")] + fn get_http_connector(&mut self) -> &mut crate::connect::HttpConnector { + match self { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, _) => http, + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, .. } => http, + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => http, + } + } +} + +impl ConnectorService { + #[cfg(feature = "socks")] + async fn connect_socks(mut self, dst: Uri, proxy: Intercepted) -> Result { + let dns = match proxy.uri().scheme_str() { + Some("socks4") | Some("socks5") => socks::DnsResolve::Local, + Some("socks4a") | Some("socks5h") => socks::DnsResolve::Proxy, + _ => { + unreachable!("connect_socks is only called for socks proxies"); + } + }; + + match &mut self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, tls) => { + if dst.scheme() == Some(&Scheme::HTTPS) { + let host = dst.host().ok_or("no host in url")?.to_string(); + let conn = socks::connect(proxy, dst, dns, &self.resolver, http).await?; + let conn = TokioIo::new(conn); + let conn = TokioIo::new(conn); + let tls_connector = tokio_native_tls::TlsConnector::from(tls.clone()); + let io = tls_connector.connect(&host, conn).await?; + let io = TokioIo::new(io); + return Ok(Conn { + inner: self.verbose.wrap(NativeTlsConn { inner: io }), + is_proxy: false, + tls_info: self.tls_info, + }); + } + } + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, tls, .. } => { + if dst.scheme() == Some(&Scheme::HTTPS) { + use std::convert::TryFrom; + use tokio_rustls::TlsConnector as RustlsConnector; + + let tls = tls.clone(); + let host = dst.host().ok_or("no host in url")?.to_string(); + let conn = socks::connect(proxy, dst, dns, &self.resolver, http).await?; + let conn = TokioIo::new(conn); + let conn = TokioIo::new(conn); + let server_name = + rustls_pki_types::ServerName::try_from(host.as_str().to_owned()) + .map_err(|_| "Invalid Server Name")?; + let io = RustlsConnector::from(tls) + .connect(server_name, conn) + .await?; + let io = TokioIo::new(io); + return Ok(Conn { + inner: self.verbose.wrap(RustlsTlsConn { inner: io }), + is_proxy: false, + tls_info: false, + }); + } + } + #[cfg(not(feature = "__tls"))] + Inner::Http(http) => { + let conn = socks::connect(proxy, dst, dns, &self.resolver, http).await?; + return Ok(Conn { + inner: self.verbose.wrap(TokioIo::new(conn)), + is_proxy: false, + tls_info: false, + }); + } + } + + let resolver = &self.resolver; + let http = self.inner.get_http_connector(); + socks::connect(proxy, dst, dns, resolver, http) + .await + .map(|tcp| Conn { + inner: self.verbose.wrap(TokioIo::new(tcp)), + is_proxy: false, + tls_info: false, + }) + .map_err(Into::into) + } + + async fn connect_with_maybe_proxy(self, dst: Uri, is_proxy: bool) -> Result { + match self.inner { + #[cfg(not(feature = "__tls"))] + Inner::Http(mut http) => { + let io = http.call(dst).await?; + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, tls) => { + let mut http = http.clone(); + + // Disable Nagle's algorithm for TLS handshake + // + // https://www.openssl.org/docs/man1.1.1/man3/SSL_connect.html#NOTES + if !self.nodelay && (dst.scheme() == Some(&Scheme::HTTPS)) { + http.set_nodelay(true); + } + + let tls_connector = tokio_native_tls::TlsConnector::from(tls.clone()); + let mut http = hyper_tls::HttpsConnector::from((http, tls_connector)); + let io = http.call(dst).await?; + + if let hyper_tls::MaybeHttpsStream::Https(stream) = io { + if !self.nodelay { + stream + .inner() + .get_ref() + .get_ref() + .get_ref() + .inner() + .inner() + .set_nodelay(false)?; + } + Ok(Conn { + inner: self.verbose.wrap(NativeTlsConn { inner: stream }), + is_proxy, + tls_info: self.tls_info, + }) + } else { + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + } + #[cfg(feature = "__rustls")] + Inner::RustlsTls { http, tls, .. } => { + let mut http = http.clone(); + + // Disable Nagle's algorithm for TLS handshake + // + // https://www.openssl.org/docs/man1.1.1/man3/SSL_connect.html#NOTES + if !self.nodelay && (dst.scheme() == Some(&Scheme::HTTPS)) { + http.set_nodelay(true); + } + + let mut http = hyper_rustls::HttpsConnector::from((http, tls.clone())); + let io = http.call(dst).await?; + + if let hyper_rustls::MaybeHttpsStream::Https(stream) = io { + if !self.nodelay { + let (io, _) = stream.inner().get_ref(); + io.inner().inner().set_nodelay(false)?; + } + Ok(Conn { + inner: self.verbose.wrap(RustlsTlsConn { inner: stream }), + is_proxy, + tls_info: self.tls_info, + }) + } else { + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + } + } + } + + /// Connect over Unix Domain Socket (or Windows?). + #[cfg(unix)] + async fn connect_local_transport(self, dst: Uri) -> Result { + let path = self + .unix_socket + .as_ref() + .expect("connect local must have socket path") + .clone(); + let svc = tower::service_fn(move |_| { + let fut = tokio::net::UnixStream::connect(path.clone()); + async move { + let io = fut.await?; + Ok::<_, std::io::Error>(TokioIo::new(io)) + } + }); + let is_proxy = false; + match self.inner { + #[cfg(not(feature = "__tls"))] + Inner::Http(..) => { + let mut svc = svc; + let io = svc.call(dst).await?; + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + #[cfg(feature = "default-tls")] + Inner::DefaultTls(_, tls) => { + let tls_connector = tokio_native_tls::TlsConnector::from(tls.clone()); + let mut http = hyper_tls::HttpsConnector::from((svc, tls_connector)); + let io = http.call(dst).await?; + + if let hyper_tls::MaybeHttpsStream::Https(stream) = io { + Ok(Conn { + inner: self.verbose.wrap(NativeTlsConn { inner: stream }), + is_proxy, + tls_info: self.tls_info, + }) + } else { + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + } + #[cfg(feature = "__rustls")] + Inner::RustlsTls { tls, .. } => { + let mut http = hyper_rustls::HttpsConnector::from((svc, tls.clone())); + let io = http.call(dst).await?; + + if let hyper_rustls::MaybeHttpsStream::Https(stream) = io { + Ok(Conn { + inner: self.verbose.wrap(RustlsTlsConn { inner: stream }), + is_proxy, + tls_info: self.tls_info, + }) + } else { + Ok(Conn { + inner: self.verbose.wrap(io), + is_proxy, + tls_info: false, + }) + } + } + } + } + + async fn connect_via_proxy(self, dst: Uri, proxy: Intercepted) -> Result { + log::debug!("proxy({proxy:?}) intercepts '{dst:?}'"); + + #[cfg(feature = "socks")] + match proxy.uri().scheme_str().ok_or("proxy scheme expected")? { + "socks4" | "socks4a" | "socks5" | "socks5h" => { + return self.connect_socks(dst, proxy).await + } + _ => (), + } + + let proxy_dst = proxy.uri().clone(); + #[cfg(feature = "__tls")] + let auth = proxy.basic_auth().cloned(); + + #[cfg(feature = "__tls")] + let misc = proxy.custom_headers().clone(); + + match &self.inner { + #[cfg(feature = "default-tls")] + Inner::DefaultTls(http, tls) => { + if dst.scheme() == Some(&Scheme::HTTPS) { + log::trace!("tunneling HTTPS over proxy"); + let tls_connector = tokio_native_tls::TlsConnector::from(tls.clone()); + let inner = + hyper_tls::HttpsConnector::from((http.clone(), tls_connector.clone())); + // TODO: we could cache constructing this + let mut tunnel = + hyper_util::client::legacy::connect::proxy::Tunnel::new(proxy_dst, inner); + if let Some(auth) = auth { + tunnel = tunnel.with_auth(auth); + } + if let Some(ua) = self.user_agent { + let mut headers = http::HeaderMap::new(); + headers.insert(http::header::USER_AGENT, ua); + tunnel = tunnel.with_headers(headers); + } + // Note that custom headers may override the user agent header. + if let Some(custom_headers) = misc { + tunnel = tunnel.with_headers(custom_headers.clone()); + } + // We don't wrap this again in an HttpsConnector since that uses Maybe, + // and we know this is definitely HTTPS. + let tunneled = tunnel.call(dst.clone()).await?; + let tls_connector = tokio_native_tls::TlsConnector::from(tls.clone()); + let io = tls_connector + .connect(dst.host().ok_or("no host in url")?, TokioIo::new(tunneled)) + .await?; + return Ok(Conn { + inner: self.verbose.wrap(NativeTlsConn { + inner: TokioIo::new(io), + }), + is_proxy: false, + tls_info: false, + }); + } + } + #[cfg(feature = "__rustls")] + Inner::RustlsTls { + http, + tls, + tls_proxy, + } => { + if dst.scheme() == Some(&Scheme::HTTPS) { + use rustls_pki_types::ServerName; + use std::convert::TryFrom; + use tokio_rustls::TlsConnector as RustlsConnector; + + log::trace!("tunneling HTTPS over proxy"); + let http = http.clone(); + let inner = hyper_rustls::HttpsConnector::from((http, tls_proxy.clone())); + // TODO: we could cache constructing this + let mut tunnel = + hyper_util::client::legacy::connect::proxy::Tunnel::new(proxy_dst, inner); + if let Some(auth) = auth { + tunnel = tunnel.with_auth(auth); + } + if let Some(custom_headers) = misc { + tunnel = tunnel.with_headers(custom_headers.clone()); + } + if let Some(ua) = self.user_agent { + let mut headers = http::HeaderMap::new(); + headers.insert(http::header::USER_AGENT, ua); + tunnel = tunnel.with_headers(headers); + } + // We don't wrap this again in an HttpsConnector since that uses Maybe, + // and we know this is definitely HTTPS. + let tunneled = tunnel.call(dst.clone()).await?; + let host = dst.host().ok_or("no host in url")?.to_string(); + let server_name = ServerName::try_from(host.as_str().to_owned()) + .map_err(|_| "Invalid Server Name")?; + let io = RustlsConnector::from(tls.clone()) + .connect(server_name, TokioIo::new(tunneled)) + .await?; + + return Ok(Conn { + inner: self.verbose.wrap(RustlsTlsConn { + inner: TokioIo::new(io), + }), + is_proxy: false, + tls_info: false, + }); + } + } + #[cfg(not(feature = "__tls"))] + Inner::Http(_) => (), + } + + self.connect_with_maybe_proxy(proxy_dst, true).await + } +} + +async fn with_timeout(f: F, timeout: Option) -> Result +where + F: Future>, +{ + if let Some(to) = timeout { + match tokio::time::timeout(to, f).await { + Err(_elapsed) => Err(Box::new(crate::error::TimedOut) as BoxError), + Ok(Ok(try_res)) => Ok(try_res), + Ok(Err(e)) => Err(e), + } + } else { + f.await + } +} + +impl Service for ConnectorService { + type Response = Conn; + type Error = BoxError; + type Future = Connecting; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, dst: Uri) -> Self::Future { + log::debug!("starting new connection: {dst:?}"); + let timeout = self.simple_timeout; + + // Local transports (UDS) skip proxies + #[cfg(unix)] + if self.unix_socket.is_some() { + return Box::pin(with_timeout( + self.clone().connect_local_transport(dst), + timeout, + )); + } + + for prox in self.proxies.iter() { + if let Some(intercepted) = prox.intercept(&dst) { + return Box::pin(with_timeout( + self.clone().connect_via_proxy(dst, intercepted), + timeout, + )); + } + } + + Box::pin(with_timeout( + self.clone().connect_with_maybe_proxy(dst, false), + timeout, + )) + } +} + +#[cfg(feature = "__tls")] +trait TlsInfoFactory { + fn tls_info(&self) -> Option; +} + +#[cfg(feature = "__tls")] +impl TlsInfoFactory for TokioIo { + fn tls_info(&self) -> Option { + self.inner().tls_info() + } +} + +// ===== TcpStream ===== + +#[cfg(feature = "__tls")] +impl TlsInfoFactory for tokio::net::TcpStream { + fn tls_info(&self) -> Option { + None + } +} + +#[cfg(feature = "default-tls")] +impl TlsInfoFactory for tokio_native_tls::TlsStream>> { + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .peer_certificate() + .ok() + .flatten() + .and_then(|c| c.to_der().ok()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "default-tls")] +impl TlsInfoFactory + for tokio_native_tls::TlsStream< + TokioIo>>, + > +{ + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .peer_certificate() + .ok() + .flatten() + .and_then(|c| c.to_der().ok()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "default-tls")] +impl TlsInfoFactory for hyper_tls::MaybeHttpsStream> { + fn tls_info(&self) -> Option { + match self { + hyper_tls::MaybeHttpsStream::Https(tls) => tls.tls_info(), + hyper_tls::MaybeHttpsStream::Http(_) => None, + } + } +} + +#[cfg(feature = "__rustls")] +impl TlsInfoFactory for tokio_rustls::client::TlsStream>> { + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .1 + .peer_certificates() + .and_then(|certs| certs.first()) + .map(|c| c.to_vec()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "__rustls")] +impl TlsInfoFactory + for tokio_rustls::client::TlsStream< + TokioIo>>, + > +{ + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .1 + .peer_certificates() + .and_then(|certs| certs.first()) + .map(|c| c.to_vec()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "__rustls")] +impl TlsInfoFactory for hyper_rustls::MaybeHttpsStream> { + fn tls_info(&self) -> Option { + match self { + hyper_rustls::MaybeHttpsStream::Https(tls) => tls.tls_info(), + hyper_rustls::MaybeHttpsStream::Http(_) => None, + } + } +} + +// ===== UnixStream ===== + +#[cfg(feature = "__tls")] +#[cfg(unix)] +impl TlsInfoFactory for tokio::net::UnixStream { + fn tls_info(&self) -> Option { + None + } +} + +#[cfg(feature = "default-tls")] +#[cfg(unix)] +impl TlsInfoFactory for tokio_native_tls::TlsStream>> { + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .peer_certificate() + .ok() + .flatten() + .and_then(|c| c.to_der().ok()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "default-tls")] +#[cfg(unix)] +impl TlsInfoFactory + for tokio_native_tls::TlsStream< + TokioIo>>, + > +{ + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .peer_certificate() + .ok() + .flatten() + .and_then(|c| c.to_der().ok()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "default-tls")] +#[cfg(unix)] +impl TlsInfoFactory for hyper_tls::MaybeHttpsStream> { + fn tls_info(&self) -> Option { + match self { + hyper_tls::MaybeHttpsStream::Https(tls) => tls.tls_info(), + hyper_tls::MaybeHttpsStream::Http(_) => None, + } + } +} + +#[cfg(feature = "__rustls")] +#[cfg(unix)] +impl TlsInfoFactory for tokio_rustls::client::TlsStream>> { + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .1 + .peer_certificates() + .and_then(|certs| certs.first()) + .map(|c| c.to_vec()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "__rustls")] +#[cfg(unix)] +impl TlsInfoFactory + for tokio_rustls::client::TlsStream< + TokioIo>>, + > +{ + fn tls_info(&self) -> Option { + let peer_certificate = self + .get_ref() + .1 + .peer_certificates() + .and_then(|certs| certs.first()) + .map(|c| c.to_vec()); + Some(crate::tls::TlsInfo { peer_certificate }) + } +} + +#[cfg(feature = "__rustls")] +#[cfg(unix)] +impl TlsInfoFactory for hyper_rustls::MaybeHttpsStream> { + fn tls_info(&self) -> Option { + match self { + hyper_rustls::MaybeHttpsStream::Https(tls) => tls.tls_info(), + hyper_rustls::MaybeHttpsStream::Http(_) => None, + } + } +} + +pub(crate) trait AsyncConn: + Read + Write + Connection + Send + Sync + Unpin + 'static +{ +} + +impl AsyncConn for T {} + +#[cfg(feature = "__tls")] +trait AsyncConnWithInfo: AsyncConn + TlsInfoFactory {} +#[cfg(not(feature = "__tls"))] +trait AsyncConnWithInfo: AsyncConn {} + +#[cfg(feature = "__tls")] +impl AsyncConnWithInfo for T {} +#[cfg(not(feature = "__tls"))] +impl AsyncConnWithInfo for T {} + +type BoxConn = Box; + +pub(crate) mod sealed { + use super::*; + #[derive(Debug)] + pub struct Unnameable(pub(super) Uri); + + pin_project! { + /// Note: the `is_proxy` member means *is plain text HTTP proxy*. + /// This tells hyper whether the URI should be written in + /// * origin-form (`GET /just/a/path HTTP/1.1`), when `is_proxy == false`, or + /// * absolute-form (`GET http://foo.bar/and/a/path HTTP/1.1`), otherwise. + #[allow(missing_debug_implementations)] + pub struct Conn { + #[pin] + pub(super)inner: BoxConn, + pub(super) is_proxy: bool, + // Only needed for __tls, but #[cfg()] on fields breaks pin_project! + pub(super) tls_info: bool, + } + } + + impl Connection for Conn { + fn connected(&self) -> Connected { + let connected = self.inner.connected().proxy(self.is_proxy); + #[cfg(feature = "__tls")] + if self.tls_info { + if let Some(tls_info) = self.inner.tls_info() { + connected.extra(tls_info) + } else { + connected + } + } else { + connected + } + #[cfg(not(feature = "__tls"))] + connected + } + } + + impl Read for Conn { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: ReadBufCursor<'_>, + ) -> Poll> { + let this = self.project(); + Read::poll_read(this.inner, cx, buf) + } + } + + impl Write for Conn { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &[u8], + ) -> Poll> { + let this = self.project(); + Write::poll_write(this.inner, cx, buf) + } + + fn poll_write_vectored( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[IoSlice<'_>], + ) -> Poll> { + let this = self.project(); + Write::poll_write_vectored(this.inner, cx, bufs) + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } + + fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let this = self.project(); + Write::poll_flush(this.inner, cx) + } + + fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let this = self.project(); + Write::poll_shutdown(this.inner, cx) + } + } +} + +// Some sealed things for UDS +#[cfg(unix)] +pub(crate) mod uds { + use std::path::Path; + + /// A provider for Unix Domain Socket paths. + /// + /// This trait is sealed. This allows us expand the support in the future + /// by controlling who can implement the trait. + /// + /// It's available in the docs to see what type may be passed in. + #[cfg(unix)] + pub trait UnixSocketProvider { + #[doc(hidden)] + fn reqwest_uds_path(&self, _: Internal) -> &Path; + } + + #[allow(missing_debug_implementations)] + pub struct Internal; + + macro_rules! as_path { + ($($t:ty,)+) => { + $( + impl UnixSocketProvider for $t { + #[doc(hidden)] + fn reqwest_uds_path(&self, _: Internal) -> &Path { + self.as_ref() + } + } + )+ + } + } + + as_path![ + String, + &'_ str, + &'_ Path, + std::path::PathBuf, + std::sync::Arc, + ]; +} + +pub(crate) type Connecting = Pin> + Send>>; + +#[cfg(feature = "default-tls")] +mod native_tls_conn { + use super::TlsInfoFactory; + use hyper::rt::{Read, ReadBufCursor, Write}; + use hyper_tls::MaybeHttpsStream; + use hyper_util::client::legacy::connect::{Connected, Connection}; + use hyper_util::rt::TokioIo; + use pin_project_lite::pin_project; + use std::{ + io::{self, IoSlice}, + pin::Pin, + task::{Context, Poll}, + }; + use tokio::io::{AsyncRead, AsyncWrite}; + use tokio::net::TcpStream; + use tokio_native_tls::TlsStream; + + pin_project! { + pub(super) struct NativeTlsConn { + #[pin] pub(super) inner: TokioIo>, + } + } + + impl Connection for NativeTlsConn>> { + fn connected(&self) -> Connected { + let connected = self + .inner + .inner() + .get_ref() + .get_ref() + .get_ref() + .inner() + .connected(); + #[cfg(feature = "native-tls-alpn")] + match self.inner.inner().get_ref().negotiated_alpn().ok() { + Some(Some(alpn_protocol)) if alpn_protocol == b"h2" => connected.negotiated_h2(), + _ => connected, + } + #[cfg(not(feature = "native-tls-alpn"))] + connected + } + } + + impl Connection for NativeTlsConn>>> { + fn connected(&self) -> Connected { + let connected = self + .inner + .inner() + .get_ref() + .get_ref() + .get_ref() + .inner() + .connected(); + #[cfg(feature = "native-tls-alpn")] + match self.inner.inner().get_ref().negotiated_alpn().ok() { + Some(Some(alpn_protocol)) if alpn_protocol == b"h2" => connected.negotiated_h2(), + _ => connected, + } + #[cfg(not(feature = "native-tls-alpn"))] + connected + } + } + + #[cfg(unix)] + impl Connection for NativeTlsConn>> { + fn connected(&self) -> Connected { + let connected = Connected::new(); + #[cfg(feature = "native-tls-alpn")] + match self.inner.inner().get_ref().negotiated_alpn().ok() { + Some(Some(alpn_protocol)) if alpn_protocol == b"h2" => connected.negotiated_h2(), + _ => connected, + } + #[cfg(not(feature = "native-tls-alpn"))] + connected + } + } + + #[cfg(unix)] + impl Connection for NativeTlsConn>>> { + fn connected(&self) -> Connected { + let connected = Connected::new(); + #[cfg(feature = "native-tls-alpn")] + match self.inner.inner().get_ref().negotiated_alpn().ok() { + Some(Some(alpn_protocol)) if alpn_protocol == b"h2" => connected.negotiated_h2(), + _ => connected, + } + #[cfg(not(feature = "native-tls-alpn"))] + connected + } + } + + impl Read for NativeTlsConn { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: ReadBufCursor<'_>, + ) -> Poll> { + let this = self.project(); + Read::poll_read(this.inner, cx, buf) + } + } + + impl Write for NativeTlsConn { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &[u8], + ) -> Poll> { + let this = self.project(); + Write::poll_write(this.inner, cx, buf) + } + + fn poll_write_vectored( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[IoSlice<'_>], + ) -> Poll> { + let this = self.project(); + Write::poll_write_vectored(this.inner, cx, bufs) + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + let this = self.project(); + Write::poll_flush(this.inner, cx) + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + let this = self.project(); + Write::poll_shutdown(this.inner, cx) + } + } + + impl TlsInfoFactory for NativeTlsConn + where + TokioIo>: TlsInfoFactory, + { + fn tls_info(&self) -> Option { + self.inner.tls_info() + } + } +} + +#[cfg(feature = "__rustls")] +mod rustls_tls_conn { + use super::TlsInfoFactory; + use hyper::rt::{Read, ReadBufCursor, Write}; + use hyper_rustls::MaybeHttpsStream; + use hyper_util::client::legacy::connect::{Connected, Connection}; + use hyper_util::rt::TokioIo; + use pin_project_lite::pin_project; + use std::{ + io::{self, IoSlice}, + pin::Pin, + task::{Context, Poll}, + }; + use tokio::io::{AsyncRead, AsyncWrite}; + use tokio::net::TcpStream; + use tokio_rustls::client::TlsStream; + + pin_project! { + pub(super) struct RustlsTlsConn { + #[pin] pub(super) inner: TokioIo>, + } + } + + impl Connection for RustlsTlsConn>> { + fn connected(&self) -> Connected { + if self.inner.inner().get_ref().1.alpn_protocol() == Some(b"h2") { + self.inner + .inner() + .get_ref() + .0 + .inner() + .connected() + .negotiated_h2() + } else { + self.inner.inner().get_ref().0.inner().connected() + } + } + } + impl Connection for RustlsTlsConn>>> { + fn connected(&self) -> Connected { + if self.inner.inner().get_ref().1.alpn_protocol() == Some(b"h2") { + self.inner + .inner() + .get_ref() + .0 + .inner() + .connected() + .negotiated_h2() + } else { + self.inner.inner().get_ref().0.inner().connected() + } + } + } + + #[cfg(unix)] + impl Connection for RustlsTlsConn>> { + fn connected(&self) -> Connected { + if self.inner.inner().get_ref().1.alpn_protocol() == Some(b"h2") { + self.inner + .inner() + .get_ref() + .0 + .inner() + .connected() + .negotiated_h2() + } else { + self.inner.inner().get_ref().0.inner().connected() + } + } + } + + #[cfg(unix)] + impl Connection for RustlsTlsConn>>> { + fn connected(&self) -> Connected { + if self.inner.inner().get_ref().1.alpn_protocol() == Some(b"h2") { + self.inner + .inner() + .get_ref() + .0 + .inner() + .connected() + .negotiated_h2() + } else { + self.inner.inner().get_ref().0.inner().connected() + } + } + } + + impl Read for RustlsTlsConn { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: ReadBufCursor<'_>, + ) -> Poll> { + let this = self.project(); + Read::poll_read(this.inner, cx, buf) + } + } + + impl Write for RustlsTlsConn { + fn poll_write( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &[u8], + ) -> Poll> { + let this = self.project(); + Write::poll_write(this.inner, cx, buf) + } + + fn poll_write_vectored( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[IoSlice<'_>], + ) -> Poll> { + let this = self.project(); + Write::poll_write_vectored(this.inner, cx, bufs) + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } + + fn poll_flush( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + let this = self.project(); + Write::poll_flush(this.inner, cx) + } + + fn poll_shutdown( + self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + let this = self.project(); + Write::poll_shutdown(this.inner, cx) + } + } + impl TlsInfoFactory for RustlsTlsConn + where + TokioIo>: TlsInfoFactory, + { + fn tls_info(&self) -> Option { + self.inner.tls_info() + } + } +} + +#[cfg(feature = "socks")] +mod socks { + use tower_service::Service; + + use http::uri::Scheme; + use http::Uri; + use hyper_util::client::legacy::connect::proxy::{SocksV4, SocksV5}; + use tokio::net::TcpStream; + + use super::BoxError; + use crate::proxy::Intercepted; + + pub(super) enum DnsResolve { + Local, + Proxy, + } + + #[derive(Debug)] + pub(super) enum SocksProxyError { + SocksNoHostInUrl, + SocksLocalResolve(BoxError), + SocksConnect(BoxError), + } + + pub(super) async fn connect( + proxy: Intercepted, + dst: Uri, + dns_mode: DnsResolve, + resolver: &crate::dns::DynResolver, + http_connector: &mut crate::connect::HttpConnector, + ) -> Result { + let https = dst.scheme() == Some(&Scheme::HTTPS); + let original_host = dst.host().ok_or(SocksProxyError::SocksNoHostInUrl)?; + let mut host = original_host.to_owned(); + let port = match dst.port() { + Some(p) => p.as_u16(), + None if https => 443u16, + _ => 80u16, + }; + + if let DnsResolve::Local = dns_mode { + let maybe_new_target = resolver + .http_resolve(&dst) + .await + .map_err(SocksProxyError::SocksLocalResolve)? + .next(); + if let Some(new_target) = maybe_new_target { + log::trace!("socks local dns resolved {new_target:?}"); + // If the resolved IP is IPv6, wrap it in brackets for URI formatting + let ip = new_target.ip(); + if ip.is_ipv6() { + host = format!("[{}]", ip); + } else { + host = ip.to_string(); + } + } + } + + let proxy_uri = proxy.uri().clone(); + // Build a Uri for the destination + let dst_uri = format!( + "{}://{}:{}", + if https { "https" } else { "http" }, + host, + port + ) + .parse::() + .map_err(|e| SocksProxyError::SocksConnect(e.into()))?; + + // TODO: can `Scheme::from_static()` be const fn, compare with a SOCKS5 constant? + match proxy.uri().scheme_str() { + Some("socks4") | Some("socks4a") => { + let mut svc = SocksV4::new(proxy_uri, http_connector); + let stream = Service::call(&mut svc, dst_uri) + .await + .map_err(|e| SocksProxyError::SocksConnect(e.into()))?; + Ok(stream.into_inner()) + } + Some("socks5") | Some("socks5h") => { + let mut svc = if let Some((username, password)) = proxy.raw_auth() { + SocksV5::new(proxy_uri, http_connector) + .with_auth(username.to_string(), password.to_string()) + } else { + SocksV5::new(proxy_uri, http_connector) + }; + let stream = Service::call(&mut svc, dst_uri) + .await + .map_err(|e| SocksProxyError::SocksConnect(e.into()))?; + Ok(stream.into_inner()) + } + _ => unreachable!(), + } + } + + impl std::fmt::Display for SocksProxyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::SocksNoHostInUrl => f.write_str("socks proxy destination has no host"), + Self::SocksLocalResolve(_) => f.write_str("error resolving for socks proxy"), + Self::SocksConnect(_) => f.write_str("error connecting to socks proxy"), + } + } + } + + impl std::error::Error for SocksProxyError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::SocksNoHostInUrl => None, + Self::SocksLocalResolve(ref e) => Some(&**e), + Self::SocksConnect(ref e) => Some(&**e), + } + } + } +} + +mod verbose { + use crate::util::Escape; + use hyper::rt::{Read, ReadBufCursor, Write}; + use hyper_util::client::legacy::connect::{Connected, Connection}; + use std::cmp::min; + use std::fmt; + use std::io::{self, IoSlice}; + use std::pin::Pin; + use std::task::{Context, Poll}; + + pub(super) const OFF: Wrapper = Wrapper(false); + + #[derive(Clone, Copy)] + pub(super) struct Wrapper(pub(super) bool); + + impl Wrapper { + pub(super) fn wrap(&self, conn: T) -> super::BoxConn { + if self.0 && log::log_enabled!(log::Level::Trace) { + Box::new(Verbose { + // truncate is fine + id: crate::util::fast_random() as u32, + inner: conn, + }) + } else { + Box::new(conn) + } + } + } + + struct Verbose { + id: u32, + inner: T, + } + + impl Connection for Verbose { + fn connected(&self) -> Connected { + self.inner.connected() + } + } + + impl Read for Verbose { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context, + mut buf: ReadBufCursor<'_>, + ) -> Poll> { + // TODO: This _does_ forget the `init` len, so it could result in + // re-initializing twice. Needs upstream support, perhaps. + // SAFETY: Passing to a ReadBuf will never de-initialize any bytes. + let mut vbuf = hyper::rt::ReadBuf::uninit(unsafe { buf.as_mut() }); + match Pin::new(&mut self.inner).poll_read(cx, vbuf.unfilled()) { + Poll::Ready(Ok(())) => { + log::trace!("{:08x} read: {:?}", self.id, Escape::new(vbuf.filled())); + let len = vbuf.filled().len(); + // SAFETY: The two cursors were for the same buffer. What was + // filled in one is safe in the other. + unsafe { + buf.advance(len); + } + Poll::Ready(Ok(())) + } + Poll::Ready(Err(e)) => Poll::Ready(Err(e)), + Poll::Pending => Poll::Pending, + } + } + } + + impl Write for Verbose { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context, + buf: &[u8], + ) -> Poll> { + match Pin::new(&mut self.inner).poll_write(cx, buf) { + Poll::Ready(Ok(n)) => { + log::trace!("{:08x} write: {:?}", self.id, Escape::new(&buf[..n])); + Poll::Ready(Ok(n)) + } + Poll::Ready(Err(e)) => Poll::Ready(Err(e)), + Poll::Pending => Poll::Pending, + } + } + + fn poll_write_vectored( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + bufs: &[IoSlice<'_>], + ) -> Poll> { + match Pin::new(&mut self.inner).poll_write_vectored(cx, bufs) { + Poll::Ready(Ok(nwritten)) => { + log::trace!( + "{:08x} write (vectored): {:?}", + self.id, + Vectored { bufs, nwritten } + ); + Poll::Ready(Ok(nwritten)) + } + Poll::Ready(Err(e)) => Poll::Ready(Err(e)), + Poll::Pending => Poll::Pending, + } + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + Pin::new(&mut self.inner).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context, + ) -> Poll> { + Pin::new(&mut self.inner).poll_shutdown(cx) + } + } + + #[cfg(feature = "__tls")] + impl super::TlsInfoFactory for Verbose { + fn tls_info(&self) -> Option { + self.inner.tls_info() + } + } + + struct Vectored<'a, 'b> { + bufs: &'a [IoSlice<'b>], + nwritten: usize, + } + + impl fmt::Debug for Vectored<'_, '_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut left = self.nwritten; + for buf in self.bufs.iter() { + if left == 0 { + break; + } + let n = min(left, buf.len()); + Escape::new(&buf[..n]).fmt(f)?; + left -= n; + } + Ok(()) + } + } +} diff --git a/rust/reqwest/src/cookie.rs b/rust/reqwest/src/cookie.rs new file mode 100644 index 0000000000..c3f24c1d72 --- /dev/null +++ b/rust/reqwest/src/cookie.rs @@ -0,0 +1,293 @@ +//! HTTP Cookies + +use crate::header::{HeaderValue, SET_COOKIE}; +use bytes::Bytes; +use std::convert::TryInto; +use std::fmt; +use std::sync::RwLock; +use std::time::SystemTime; + +/// Actions for a persistent cookie store providing session support. +pub trait CookieStore: Send + Sync { + /// Store a set of Set-Cookie header values received from `url` + fn set_cookies(&self, cookie_headers: &mut dyn Iterator, url: &url::Url); + /// Get any Cookie values in the store for `url` + fn cookies(&self, url: &url::Url) -> Option; +} + +/// A single HTTP cookie. +pub struct Cookie<'a>(cookie_crate::Cookie<'a>); + +/// A good default `CookieStore` implementation. +/// +/// This is the implementation used when simply calling `cookie_store(true)`. +/// This type is exposed to allow creating one and filling it with some +/// existing cookies more easily, before creating a `Client`. +/// +/// For more advanced scenarios, such as needing to serialize the store or +/// manipulate it between requests, you may refer to the +/// [reqwest_cookie_store crate](https://crates.io/crates/reqwest_cookie_store). +#[derive(Debug, Default)] +pub struct Jar(RwLock); + +// ===== impl Cookie ===== + +impl<'a> Cookie<'a> { + fn parse(value: &'a HeaderValue) -> Result, CookieParseError> { + std::str::from_utf8(value.as_bytes()) + .map_err(cookie_crate::ParseError::from) + .and_then(cookie_crate::Cookie::parse) + .map_err(CookieParseError) + .map(Cookie) + } + + /// The name of the cookie. + pub fn name(&self) -> &str { + self.0.name() + } + + /// The value of the cookie. + pub fn value(&self) -> &str { + self.0.value() + } + + /// Returns true if the 'HttpOnly' directive is enabled. + pub fn http_only(&self) -> bool { + self.0.http_only().unwrap_or(false) + } + + /// Returns true if the 'Secure' directive is enabled. + pub fn secure(&self) -> bool { + self.0.secure().unwrap_or(false) + } + + /// Returns true if 'SameSite' directive is 'Lax'. + pub fn same_site_lax(&self) -> bool { + self.0.same_site() == Some(cookie_crate::SameSite::Lax) + } + + /// Returns true if 'SameSite' directive is 'Strict'. + pub fn same_site_strict(&self) -> bool { + self.0.same_site() == Some(cookie_crate::SameSite::Strict) + } + + /// Returns the path directive of the cookie, if set. + pub fn path(&self) -> Option<&str> { + self.0.path() + } + + /// Returns the domain directive of the cookie, if set. + pub fn domain(&self) -> Option<&str> { + self.0.domain() + } + + /// Get the Max-Age information. + pub fn max_age(&self) -> Option { + self.0.max_age().map(|d| { + d.try_into() + .expect("time::Duration into std::time::Duration") + }) + } + + /// The cookie expiration time. + pub fn expires(&self) -> Option { + match self.0.expires() { + Some(cookie_crate::Expiration::DateTime(offset)) => Some(SystemTime::from(offset)), + None | Some(cookie_crate::Expiration::Session) => None, + } + } +} + +impl<'a> fmt::Debug for Cookie<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +pub(crate) fn extract_response_cookie_headers<'a>( + headers: &'a hyper::HeaderMap, +) -> impl Iterator + 'a { + headers.get_all(SET_COOKIE).iter() +} + +pub(crate) fn extract_response_cookies<'a>( + headers: &'a hyper::HeaderMap, +) -> impl Iterator, CookieParseError>> + 'a { + headers + .get_all(SET_COOKIE) + .iter() + .map(|value| Cookie::parse(value)) +} + +/// Error representing a parse failure of a 'Set-Cookie' header. +pub(crate) struct CookieParseError(cookie_crate::ParseError); + +impl<'a> fmt::Debug for CookieParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl<'a> fmt::Display for CookieParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl std::error::Error for CookieParseError {} + +// ===== impl Jar ===== + +impl Jar { + /// Add a cookie to this jar. + /// + /// # Example + /// + /// ``` + /// use reqwest::{cookie::Jar, Url}; + /// + /// let cookie = "foo=bar; Domain=yolo.local"; + /// let url = "https://yolo.local".parse::().unwrap(); + /// + /// let jar = Jar::default(); + /// jar.add_cookie_str(cookie, &url); + /// + /// // and now add to a `ClientBuilder`? + /// ``` + pub fn add_cookie_str(&self, cookie: &str, url: &url::Url) { + let cookies = cookie_crate::Cookie::parse(cookie) + .ok() + .map(|c| c.into_owned()) + .into_iter(); + self.0.write().unwrap().store_response_cookies(cookies, url); + } +} + +impl CookieStore for Jar { + fn set_cookies(&self, cookie_headers: &mut dyn Iterator, url: &url::Url) { + let iter = + cookie_headers.filter_map(|val| Cookie::parse(val).map(|c| c.0.into_owned()).ok()); + + self.0.write().unwrap().store_response_cookies(iter, url); + } + + fn cookies(&self, url: &url::Url) -> Option { + let s = self + .0 + .read() + .unwrap() + .get_request_values(url) + .map(|(name, value)| format!("{name}={value}")) + .collect::>() + .join("; "); + + if s.is_empty() { + return None; + } + + HeaderValue::from_maybe_shared(Bytes::from(s)).ok() + } +} + +pub(crate) mod service { + use crate::cookie; + use http::{Request, Response}; + use http_body::Body; + use pin_project_lite::pin_project; + use std::future::Future; + use std::pin::Pin; + use std::sync::Arc; + use std::task::ready; + use std::task::Context; + use std::task::Poll; + use tower::Service; + use url::Url; + + /// A [`Service`] that adds cookie support to a lower-level [`Service`]. + #[derive(Clone)] + pub struct CookieService { + inner: S, + cookie_store: Option>, + } + + impl CookieService { + /// Create a new [`CookieService`]. + pub fn new(inner: S, cookie_store: Option>) -> Self { + Self { + inner, + cookie_store, + } + } + } + + impl Service> for CookieService + where + S: Service, Response = Response> + Clone, + ReqBody: Body + Default, + { + type Response = Response; + type Error = S::Error; + type Future = ResponseFuture; + + #[inline] + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, mut req: Request) -> Self::Future { + let clone = self.inner.clone(); + let mut inner = std::mem::replace(&mut self.inner, clone); + let url = Url::parse(req.uri().to_string().as_str()).expect("invalid URL"); + if let Some(cookie_store) = self.cookie_store.as_ref() { + if req.headers().get(crate::header::COOKIE).is_none() { + let headers = req.headers_mut(); + crate::util::add_cookie_header(headers, &**cookie_store, &url); + } + } + + let cookie_store = self.cookie_store.clone(); + ResponseFuture { + future: inner.call(req), + cookie_store, + url, + } + } + } + + pin_project! { + #[allow(missing_debug_implementations)] + #[derive(Clone)] + /// A [`Future`] that adds cookie support to a lower-level [`Future`]. + pub struct ResponseFuture + where + S: Service>, + { + #[pin] + future: S::Future, + cookie_store: Option>, + url: Url, + } + } + + impl Future for ResponseFuture + where + S: Service, Response = Response> + Clone, + ReqBody: Body + Default, + { + type Output = Result, S::Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let cookie_store = self.cookie_store.clone(); + let url = self.url.clone(); + let res = ready!(self.project().future.as_mut().poll(cx)?); + + if let Some(cookie_store) = cookie_store.as_ref() { + let mut cookies = cookie::extract_response_cookie_headers(res.headers()).peekable(); + if cookies.peek().is_some() { + cookie_store.set_cookies(&mut cookies, &url); + } + } + Poll::Ready(Ok(res)) + } + } +} diff --git a/rust/reqwest/src/dns/gai.rs b/rust/reqwest/src/dns/gai.rs new file mode 100644 index 0000000000..be0895a750 --- /dev/null +++ b/rust/reqwest/src/dns/gai.rs @@ -0,0 +1,32 @@ +use hyper_util::client::legacy::connect::dns::GaiResolver as HyperGaiResolver; +use tower_service::Service; + +use crate::dns::{Addrs, Name, Resolve, Resolving}; +use crate::error::BoxError; + +#[derive(Debug)] +pub struct GaiResolver(HyperGaiResolver); + +impl GaiResolver { + pub fn new() -> Self { + Self(HyperGaiResolver::new()) + } +} + +impl Default for GaiResolver { + fn default() -> Self { + GaiResolver::new() + } +} + +impl Resolve for GaiResolver { + fn resolve(&self, name: Name) -> Resolving { + let mut this = self.0.clone(); + Box::pin(async move { + this.call(name.0) + .await + .map(|addrs| Box::new(addrs) as Addrs) + .map_err(|err| Box::new(err) as BoxError) + }) + } +} diff --git a/rust/reqwest/src/dns/hickory.rs b/rust/reqwest/src/dns/hickory.rs new file mode 100644 index 0000000000..f720e3613a --- /dev/null +++ b/rust/reqwest/src/dns/hickory.rs @@ -0,0 +1,73 @@ +//! DNS resolution via the [hickory-resolver](https://github.com/hickory-dns/hickory-dns) crate + +use hickory_resolver::{ + config::LookupIpStrategy, lookup_ip::LookupIpIntoIter, ResolveError, TokioResolver, +}; +use once_cell::sync::OnceCell; + +use std::fmt; +use std::net::SocketAddr; +use std::sync::Arc; + +use super::{Addrs, Name, Resolve, Resolving}; + +/// Wrapper around an `AsyncResolver`, which implements the `Resolve` trait. +#[derive(Debug, Default, Clone)] +pub(crate) struct HickoryDnsResolver { + /// Since we might not have been called in the context of a + /// Tokio Runtime in initialization, so we must delay the actual + /// construction of the resolver. + state: Arc>, +} + +struct SocketAddrs { + iter: LookupIpIntoIter, +} + +#[derive(Debug)] +struct HickoryDnsSystemConfError(ResolveError); + +impl Resolve for HickoryDnsResolver { + fn resolve(&self, name: Name) -> Resolving { + let resolver = self.clone(); + Box::pin(async move { + let resolver = resolver.state.get_or_try_init(new_resolver)?; + + let lookup = resolver.lookup_ip(name.as_str()).await?; + let addrs: Addrs = Box::new(SocketAddrs { + iter: lookup.into_iter(), + }); + Ok(addrs) + }) + } +} + +impl Iterator for SocketAddrs { + type Item = SocketAddr; + + fn next(&mut self) -> Option { + self.iter.next().map(|ip_addr| SocketAddr::new(ip_addr, 0)) + } +} + +/// Create a new resolver with the default configuration, +/// which reads from `/etc/resolve.conf`. The options are +/// overridden to look up for both IPv4 and IPv6 addresses +/// to work with "happy eyeballs" algorithm. +fn new_resolver() -> Result { + let mut builder = TokioResolver::builder_tokio().map_err(HickoryDnsSystemConfError)?; + builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4AndIpv6; + Ok(builder.build()) +} + +impl fmt::Display for HickoryDnsSystemConfError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("error reading DNS system conf for hickory-dns") + } +} + +impl std::error::Error for HickoryDnsSystemConfError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + Some(&self.0) + } +} diff --git a/rust/reqwest/src/dns/mod.rs b/rust/reqwest/src/dns/mod.rs new file mode 100644 index 0000000000..7d3bd0dae7 --- /dev/null +++ b/rust/reqwest/src/dns/mod.rs @@ -0,0 +1,12 @@ +//! DNS resolution + +pub use resolve::{Addrs, Name, Resolve, Resolving}; +pub(crate) use resolve::{DnsResolverWithOverrides, DynResolver}; + +#[cfg(docsrs)] +pub use resolve::IntoResolve; + +pub(crate) mod gai; +#[cfg(feature = "hickory-dns")] +pub(crate) mod hickory; +pub(crate) mod resolve; diff --git a/rust/reqwest/src/dns/resolve.rs b/rust/reqwest/src/dns/resolve.rs new file mode 100644 index 0000000000..ea25bf8cbf --- /dev/null +++ b/rust/reqwest/src/dns/resolve.rs @@ -0,0 +1,193 @@ +use hyper_util::client::legacy::connect::dns::Name as HyperName; +use tower_service::Service; + +use std::collections::HashMap; +use std::future::Future; +use std::net::SocketAddr; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use crate::error::BoxError; + +/// Alias for an `Iterator` trait object over `SocketAddr`. +pub type Addrs = Box + Send>; + +/// Alias for the `Future` type returned by a DNS resolver. +pub type Resolving = Pin> + Send>>; + +/// Trait for customizing DNS resolution in reqwest. +pub trait Resolve: Send + Sync { + /// Performs DNS resolution on a `Name`. + /// The return type is a future containing an iterator of `SocketAddr`. + /// + /// It differs from `tower_service::Service` in several ways: + /// * It is assumed that `resolve` will always be ready to poll. + /// * It does not need a mutable reference to `self`. + /// * Since trait objects cannot make use of associated types, it requires + /// wrapping the returned `Future` and its contained `Iterator` with `Box`. + /// + /// Explicitly specified port in the URL will override any port in the resolved `SocketAddr`s. + /// Otherwise, port `0` will be replaced by the conventional port for the given scheme (e.g. 80 for http). + fn resolve(&self, name: Name) -> Resolving; +} + +/// A name that must be resolved to addresses. +#[derive(Debug)] +pub struct Name(pub(super) HyperName); + +/// A more general trait implemented for types implementing `Resolve`. +/// +/// Unnameable, only exported to aid seeing what implements this. +pub trait IntoResolve { + #[doc(hidden)] + fn into_resolve(self) -> Arc; +} + +impl Name { + /// View the name as a string. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl FromStr for Name { + type Err = sealed::InvalidNameError; + + fn from_str(host: &str) -> Result { + HyperName::from_str(host) + .map(Name) + .map_err(|_| sealed::InvalidNameError { _ext: () }) + } +} + +#[derive(Clone)] +pub(crate) struct DynResolver { + resolver: Arc, +} + +impl DynResolver { + pub(crate) fn new(resolver: Arc) -> Self { + Self { resolver } + } + + #[cfg(feature = "socks")] + pub(crate) fn gai() -> Self { + Self::new(Arc::new(super::gai::GaiResolver::new())) + } + + /// Resolve an HTTP host and port, not just a domain name. + /// + /// This does the same thing that hyper-util's HttpConnector does, before + /// calling out to its underlying DNS resolver. + #[cfg(feature = "socks")] + pub(crate) async fn http_resolve( + &self, + target: &http::Uri, + ) -> Result, BoxError> { + let host = target.host().ok_or("missing host")?; + let port = target + .port_u16() + .unwrap_or_else(|| match target.scheme_str() { + Some("https") => 443, + Some("socks4") | Some("socks4a") | Some("socks5") | Some("socks5h") => 1080, + _ => 80, + }); + + let explicit_port = target.port().is_some(); + + let addrs = self.resolver.resolve(host.parse()?).await?; + + Ok(addrs.map(move |mut addr| { + if explicit_port || addr.port() == 0 { + addr.set_port(port); + } + addr + })) + } +} + +impl Service for DynResolver { + type Response = Addrs; + type Error = BoxError; + type Future = Resolving; + + fn poll_ready(&mut self, _: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, name: HyperName) -> Self::Future { + self.resolver.resolve(Name(name)) + } +} + +pub(crate) struct DnsResolverWithOverrides { + dns_resolver: Arc, + overrides: Arc>>, +} + +impl DnsResolverWithOverrides { + pub(crate) fn new( + dns_resolver: Arc, + overrides: HashMap>, + ) -> Self { + DnsResolverWithOverrides { + dns_resolver, + overrides: Arc::new(overrides), + } + } +} + +impl Resolve for DnsResolverWithOverrides { + fn resolve(&self, name: Name) -> Resolving { + match self.overrides.get(name.as_str()) { + Some(dest) => { + let addrs: Addrs = Box::new(dest.clone().into_iter()); + Box::pin(std::future::ready(Ok(addrs))) + } + None => self.dns_resolver.resolve(name), + } + } +} + +impl IntoResolve for Arc { + fn into_resolve(self) -> Arc { + self + } +} + +impl IntoResolve for Arc +where + R: Resolve + 'static, +{ + fn into_resolve(self) -> Arc { + self + } +} + +impl IntoResolve for R +where + R: Resolve + 'static, +{ + fn into_resolve(self) -> Arc { + Arc::new(self) + } +} + +mod sealed { + use std::fmt; + + #[derive(Debug)] + pub struct InvalidNameError { + pub(super) _ext: (), + } + + impl fmt::Display for InvalidNameError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("invalid DNS name") + } + } + + impl std::error::Error for InvalidNameError {} +} diff --git a/rust/reqwest/src/error.rs b/rust/reqwest/src/error.rs new file mode 100644 index 0000000000..32d9b40246 --- /dev/null +++ b/rust/reqwest/src/error.rs @@ -0,0 +1,470 @@ +#![cfg_attr(target_arch = "wasm32", allow(unused))] +use std::error::Error as StdError; +use std::fmt; +use std::io; +use tracing::error; + +use crate::util::Escape; +use crate::{StatusCode, Url}; + +/// A `Result` alias where the `Err` case is `reqwest::Error`. +pub type Result = std::result::Result; + +/// The Errors that may occur when processing a `Request`. +/// +/// Note: Errors may include the full URL used to make the `Request`. If the URL +/// contains sensitive information (e.g. an API key as a query parameter), be +/// sure to remove it ([`without_url`](Error::without_url)) +pub struct Error { + inner: Box, +} + +pub(crate) type BoxError = Box; + +struct Inner { + kind: Kind, + source: Option, + url: Option, +} + +impl Error { + pub(crate) fn new(kind: Kind, source: Option) -> Error + where + E: Into, + { + error!("BT: {}", std::backtrace::Backtrace::force_capture()); + Error { + inner: Box::new(Inner { + kind, + source: source.map(Into::into), + url: None, + }), + } + } + + /// Returns a possible URL related to this error. + /// + /// # Examples + /// + /// ``` + /// # async fn run() { + /// // displays last stop of a redirect loop + /// let response = reqwest::get("http://site.with.redirect.loop").await; + /// if let Err(e) = response { + /// if e.is_redirect() { + /// if let Some(final_stop) = e.url() { + /// println!("redirect loop at {final_stop}"); + /// } + /// } + /// } + /// # } + /// ``` + pub fn url(&self) -> Option<&Url> { + self.inner.url.as_ref() + } + + /// Returns a mutable reference to the URL related to this error + /// + /// This is useful if you need to remove sensitive information from the URL + /// (e.g. an API key in the query), but do not want to remove the URL + /// entirely. + pub fn url_mut(&mut self) -> Option<&mut Url> { + self.inner.url.as_mut() + } + + /// Add a url related to this error (overwriting any existing) + pub fn with_url(mut self, url: Url) -> Self { + self.inner.url = Some(url); + self + } + + pub(crate) fn if_no_url(mut self, f: impl FnOnce() -> Url) -> Self { + if self.inner.url.is_none() { + self.inner.url = Some(f()); + } + self + } + + /// Strip the related url from this error (if, for example, it contains + /// sensitive information) + pub fn without_url(mut self) -> Self { + self.inner.url = None; + self + } + + /// Returns true if the error is from a type Builder. + pub fn is_builder(&self) -> bool { + matches!(self.inner.kind, Kind::Builder) + } + + /// Returns true if the error is from a `RedirectPolicy`. + pub fn is_redirect(&self) -> bool { + matches!(self.inner.kind, Kind::Redirect) + } + + /// Returns true if the error is from `Response::error_for_status`. + pub fn is_status(&self) -> bool { + #[cfg(not(target_arch = "wasm32"))] + { + matches!(self.inner.kind, Kind::Status(_, _)) + } + #[cfg(target_arch = "wasm32")] + { + matches!(self.inner.kind, Kind::Status(_)) + } + } + + /// Returns true if the error is related to a timeout. + pub fn is_timeout(&self) -> bool { + let mut source = self.source(); + + while let Some(err) = source { + if err.is::() { + return true; + } + #[cfg(not(target_arch = "wasm32"))] + if let Some(hyper_err) = err.downcast_ref::() { + if hyper_err.is_timeout() { + return true; + } + } + if let Some(io) = err.downcast_ref::() { + if io.kind() == io::ErrorKind::TimedOut { + return true; + } + } + source = err.source(); + } + + false + } + + /// Returns true if the error is related to the request + pub fn is_request(&self) -> bool { + matches!(self.inner.kind, Kind::Request) + } + + #[cfg(not(target_arch = "wasm32"))] + /// Returns true if the error is related to connect + pub fn is_connect(&self) -> bool { + let mut source = self.source(); + + while let Some(err) = source { + if let Some(hyper_err) = err.downcast_ref::() { + if hyper_err.is_connect() { + return true; + } + } + + source = err.source(); + } + + false + } + + /// Returns true if the error is related to the request or response body + pub fn is_body(&self) -> bool { + matches!(self.inner.kind, Kind::Body) + } + + /// Returns true if the error is related to decoding the response's body + pub fn is_decode(&self) -> bool { + matches!(self.inner.kind, Kind::Decode) + } + + /// Returns the status code, if the error was generated from a response. + pub fn status(&self) -> Option { + match self.inner.kind { + #[cfg(target_arch = "wasm32")] + Kind::Status(code) => Some(code), + #[cfg(not(target_arch = "wasm32"))] + Kind::Status(code, _) => Some(code), + _ => None, + } + } + + // private + + #[allow(unused)] + pub(crate) fn into_io(self) -> io::Error { + io::Error::new(io::ErrorKind::Other, self) + } +} + +/// Converts from external types to reqwest's +/// internal equivalents. +/// +/// Currently only is used for `tower::timeout::error::Elapsed`. +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn cast_to_internal_error(error: BoxError) -> BoxError { + if error.is::() { + Box::new(crate::error::TimedOut) as BoxError + } else { + error + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("reqwest::Error"); + + builder.field("kind", &self.inner.kind); + + if let Some(ref url) = self.inner.url { + builder.field("url", &url.as_str()); + } + if let Some(ref source) = self.inner.source { + builder.field("source", source); + } + + builder.finish() + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.inner.kind { + Kind::Builder => f.write_str("builder error")?, + Kind::Request => f.write_str("error sending request")?, + Kind::Body => f.write_str("request or response body error")?, + Kind::Decode => f.write_str("error decoding response body")?, + Kind::Redirect => f.write_str("error following redirect")?, + Kind::Upgrade => f.write_str("error upgrading connection")?, + #[cfg(target_arch = "wasm32")] + Kind::Status(ref code) => { + let prefix = if code.is_client_error() { + "HTTP status client error" + } else { + debug_assert!(code.is_server_error()); + "HTTP status server error" + }; + write!(f, "{prefix} ({code})")?; + } + #[cfg(not(target_arch = "wasm32"))] + Kind::Status(ref code, ref reason) => { + let prefix = if code.is_client_error() { + "HTTP status client error" + } else { + debug_assert!(code.is_server_error()); + "HTTP status server error" + }; + if let Some(reason) = reason { + write!( + f, + "{prefix} ({} {})", + code.as_str(), + Escape::new(reason.as_bytes()) + )?; + } else { + write!(f, "{prefix} ({code})")?; + } + } + }; + + if let Some(url) = &self.inner.url { + write!(f, " for url ({url})")?; + } + + Ok(()) + } +} + +impl StdError for Error { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + self.inner.source.as_ref().map(|e| &**e as _) + } +} + +#[cfg(target_arch = "wasm32")] +impl From for wasm_bindgen::JsValue { + fn from(err: Error) -> wasm_bindgen::JsValue { + js_sys::Error::from(err).into() + } +} + +#[cfg(target_arch = "wasm32")] +impl From for js_sys::Error { + fn from(err: Error) -> js_sys::Error { + js_sys::Error::new(&format!("{err}")) + } +} + +#[derive(Debug)] +pub(crate) enum Kind { + Builder, + Request, + Redirect, + #[cfg(not(target_arch = "wasm32"))] + Status(StatusCode, Option), + #[cfg(target_arch = "wasm32")] + Status(StatusCode), + Body, + Decode, + Upgrade, +} + +// constructors + +pub(crate) fn builder>(e: E) -> Error { + Error::new(Kind::Builder, Some(e)) +} + +pub(crate) fn body>(e: E) -> Error { + Error::new(Kind::Body, Some(e)) +} + +pub(crate) fn decode>(e: E) -> Error { + Error::new(Kind::Decode, Some(e)) +} + +pub(crate) fn request>(e: E) -> Error { + Error::new(Kind::Request, Some(e)) +} + +pub(crate) fn redirect>(e: E, url: Url) -> Error { + Error::new(Kind::Redirect, Some(e)).with_url(url) +} + +pub(crate) fn status_code( + url: Url, + status: StatusCode, + #[cfg(not(target_arch = "wasm32"))] reason: Option, +) -> Error { + Error::new( + Kind::Status( + status, + #[cfg(not(target_arch = "wasm32"))] + reason, + ), + None::, + ) + .with_url(url) +} + +pub(crate) fn url_bad_scheme(url: Url) -> Error { + Error::new(Kind::Builder, Some(BadScheme)).with_url(url) +} + +pub(crate) fn url_invalid_uri(url: Url) -> Error { + Error::new(Kind::Builder, Some("Parsed Url is not a valid Uri")).with_url(url) +} + +if_wasm! { + pub(crate) fn wasm(js_val: wasm_bindgen::JsValue) -> BoxError { + format!("{js_val:?}").into() + } +} + +pub(crate) fn upgrade>(e: E) -> Error { + Error::new(Kind::Upgrade, Some(e)) +} + +// io::Error helpers + +#[cfg(any( + feature = "gzip", + feature = "zstd", + feature = "brotli", + feature = "deflate", + feature = "blocking", +))] +pub(crate) fn into_io(e: BoxError) -> io::Error { + io::Error::new(io::ErrorKind::Other, e) +} + +#[allow(unused)] +pub(crate) fn decode_io(e: io::Error) -> Error { + if e.get_ref().map(|r| r.is::()).unwrap_or(false) { + *e.into_inner() + .expect("io::Error::get_ref was Some(_)") + .downcast::() + .expect("StdError::is() was true") + } else { + decode(e) + } +} + +// internal Error "sources" + +#[derive(Debug)] +pub(crate) struct TimedOut; + +impl fmt::Display for TimedOut { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("operation timed out") + } +} + +impl StdError for TimedOut {} + +#[derive(Debug)] +pub(crate) struct BadScheme; + +impl fmt::Display for BadScheme { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("URL scheme is not allowed") + } +} + +impl StdError for BadScheme {} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_send() {} + fn assert_sync() {} + + #[test] + fn test_source_chain() { + let root = Error::new(Kind::Request, None::); + assert!(root.source().is_none()); + + let link = super::body(root); + assert!(link.source().is_some()); + assert_send::(); + assert_sync::(); + } + + #[test] + fn mem_size_of() { + use std::mem::size_of; + assert_eq!(size_of::(), size_of::()); + } + + #[test] + fn roundtrip_io_error() { + let orig = super::request("orig"); + // Convert reqwest::Error into an io::Error... + let io = orig.into_io(); + // Convert that io::Error back into a reqwest::Error... + let err = super::decode_io(io); + // It should have pulled out the original, not nested it... + match err.inner.kind { + Kind::Request => (), + _ => panic!("{err:?}"), + } + } + + #[test] + fn from_unknown_io_error() { + let orig = io::Error::new(io::ErrorKind::Other, "orly"); + let err = super::decode_io(orig); + match err.inner.kind { + Kind::Decode => (), + _ => panic!("{err:?}"), + } + } + + #[test] + fn is_timeout() { + let err = super::request(super::TimedOut); + assert!(err.is_timeout()); + + // todo: test `hyper::Error::is_timeout` when we can easily construct one + + let io = io::Error::from(io::ErrorKind::TimedOut); + let nested = super::request(io); + assert!(nested.is_timeout()); + } +} diff --git a/rust/reqwest/src/into_url.rs b/rust/reqwest/src/into_url.rs new file mode 100644 index 0000000000..5191181cb8 --- /dev/null +++ b/rust/reqwest/src/into_url.rs @@ -0,0 +1,117 @@ +use url::Url; + +/// A trait to try to convert some type into a `Url`. +/// +/// This trait is "sealed", such that only types within reqwest can +/// implement it. +pub trait IntoUrl: IntoUrlSealed {} + +impl IntoUrl for Url {} +impl IntoUrl for String {} +impl<'a> IntoUrl for &'a str {} +impl<'a> IntoUrl for &'a String {} + +pub trait IntoUrlSealed { + // Besides parsing as a valid `Url`, the `Url` must be a valid + // `http::Uri`, in that it makes sense to use in a network request. + fn into_url(self) -> crate::Result; + + fn as_str(&self) -> &str; +} + +impl IntoUrlSealed for Url { + fn into_url(self) -> crate::Result { + // With blob url the `self.has_host()` check is always false, so we + // remove the `blob:` scheme and check again if the url is valid. + #[cfg(target_arch = "wasm32")] + if self.scheme() == "blob" + && self.path().starts_with("http") // Check if the path starts with http or https to avoid validating a `blob:blob:...` url. + && self.as_str()[5..].into_url().is_ok() + { + return Ok(self); + } + + if self.has_host() { + Ok(self) + } else { + Err(crate::error::url_bad_scheme(self)) + } + } + + fn as_str(&self) -> &str { + self.as_ref() + } +} + +impl<'a> IntoUrlSealed for &'a str { + fn into_url(self) -> crate::Result { + Url::parse(self).map_err(crate::error::builder)?.into_url() + } + + fn as_str(&self) -> &str { + self + } +} + +impl<'a> IntoUrlSealed for &'a String { + fn into_url(self) -> crate::Result { + (&**self).into_url() + } + + fn as_str(&self) -> &str { + self.as_ref() + } +} + +impl IntoUrlSealed for String { + fn into_url(self) -> crate::Result { + (&*self).into_url() + } + + fn as_str(&self) -> &str { + self.as_ref() + } +} + +if_hyper! { + pub(crate) fn try_uri(url: &Url) -> crate::Result { + url.as_str() + .parse() + .map_err(|_| crate::error::url_invalid_uri(url.clone())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::error::Error; + + #[test] + fn into_url_file_scheme() { + let err = "file:///etc/hosts".into_url().unwrap_err(); + assert_eq!( + err.source().unwrap().to_string(), + "URL scheme is not allowed" + ); + } + + #[test] + fn into_url_blob_scheme() { + let err = "blob:https://example.com".into_url().unwrap_err(); + assert_eq!( + err.source().unwrap().to_string(), + "URL scheme is not allowed" + ); + } + + if_wasm! { + use wasm_bindgen_test::*; + + #[wasm_bindgen_test] + fn into_url_blob_scheme_wasm() { + let url = "blob:http://example.com".into_url().unwrap(); + + assert_eq!(url.as_str(), "blob:http://example.com"); + } + } +} diff --git a/rust/reqwest/src/lib.rs b/rust/reqwest/src/lib.rs new file mode 100644 index 0000000000..99a7efc54c --- /dev/null +++ b/rust/reqwest/src/lib.rs @@ -0,0 +1,395 @@ +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![cfg_attr(not(test), warn(unused_crate_dependencies))] +#![cfg_attr(test, deny(warnings))] + +//! # reqwest +//! +//! The `reqwest` crate provides a convenient, higher-level HTTP +//! [`Client`][client]. +//! +//! It handles many of the things that most people just expect an HTTP client +//! to do for them. +//! +//! - Async and [blocking] Clients +//! - Plain bodies, [JSON](#json), [urlencoded](#forms), [multipart] +//! - Customizable [redirect policy](#redirect-policies) +//! - HTTP [Proxies](#proxies) +//! - Uses [TLS](#tls) by default +//! - Cookies +//! +//! The [`reqwest::Client`][client] is asynchronous (requiring Tokio). For +//! applications wishing to only make a few HTTP requests, the +//! [`reqwest::blocking`](blocking) API may be more convenient. +//! +//! Additional learning resources include: +//! +//! - [The Rust Cookbook](https://rust-lang-nursery.github.io/rust-cookbook/web/clients.html) +//! - [reqwest Repository Examples](https://github.com/seanmonstar/reqwest/tree/master/examples) +//! +//! ## Commercial Support +//! +//! For private advice, support, reviews, access to the maintainer, and the +//! like, reach out for [commercial support][sponsor]. +//! +//! ## Making a GET request +//! +//! For a single request, you can use the [`get`][get] shortcut method. +//! +//! ```rust +//! # async fn run() -> Result<(), reqwest::Error> { +//! let body = reqwest::get("https://www.rust-lang.org") +//! .await? +//! .text() +//! .await?; +//! +//! println!("body = {body:?}"); +//! # Ok(()) +//! # } +//! ``` +//! +//! **NOTE**: If you plan to perform multiple requests, it is best to create a +//! [`Client`][client] and reuse it, taking advantage of keep-alive connection +//! pooling. +//! +//! ## Making POST requests (or setting request bodies) +//! +//! There are several ways you can set the body of a request. The basic one is +//! by using the `body()` method of a [`RequestBuilder`][builder]. This lets you set the +//! exact raw bytes of what the body should be. It accepts various types, +//! including `String` and `Vec`. If you wish to pass a custom +//! type, you can use the `reqwest::Body` constructors. +//! +//! ```rust +//! # use reqwest::Error; +//! # +//! # async fn run() -> Result<(), Error> { +//! let client = reqwest::Client::new(); +//! let res = client.post("http://httpbin.org/post") +//! .body("the exact body that is sent") +//! .send() +//! .await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ### Forms +//! +//! It's very common to want to send form data in a request body. This can be +//! done with any type that can be serialized into form data. +//! +//! This can be an array of tuples, or a `HashMap`, or a custom type that +//! implements [`Serialize`][serde]. +//! +//! ```rust +//! # use reqwest::Error; +//! # +//! # async fn run() -> Result<(), Error> { +//! // This will POST a body of `foo=bar&baz=quux` +//! let params = [("foo", "bar"), ("baz", "quux")]; +//! let client = reqwest::Client::new(); +//! let res = client.post("http://httpbin.org/post") +//! .form(¶ms) +//! .send() +//! .await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ### JSON +//! +//! There is also a `json` method helper on the [`RequestBuilder`][builder] that works in +//! a similar fashion the `form` method. It can take any value that can be +//! serialized into JSON. The feature `json` is required. +//! +//! ```rust +//! # use reqwest::Error; +//! # use std::collections::HashMap; +//! # +//! # #[cfg(feature = "json")] +//! # async fn run() -> Result<(), Error> { +//! // This will POST a body of `{"lang":"rust","body":"json"}` +//! let mut map = HashMap::new(); +//! map.insert("lang", "rust"); +//! map.insert("body", "json"); +//! +//! let client = reqwest::Client::new(); +//! let res = client.post("http://httpbin.org/post") +//! .json(&map) +//! .send() +//! .await?; +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Redirect Policies +//! +//! By default, a `Client` will automatically handle HTTP redirects, having a +//! maximum redirect chain of 10 hops. To customize this behavior, a +//! [`redirect::Policy`][redirect] can be used with a `ClientBuilder`. +//! +//! ## Cookies +//! +//! The automatic storing and sending of session cookies can be enabled with +//! the [`cookie_store`][ClientBuilder::cookie_store] method on `ClientBuilder`. +//! +//! ## Proxies +//! +//! **NOTE**: System proxies are enabled by default. +//! +//! System proxies look in environment variables to set HTTP or HTTPS proxies. +//! +//! `HTTP_PROXY` or `http_proxy` provide HTTP proxies for HTTP connections while +//! `HTTPS_PROXY` or `https_proxy` provide HTTPS proxies for HTTPS connections. +//! `ALL_PROXY` or `all_proxy` provide proxies for both HTTP and HTTPS connections. +//! If both the all proxy and HTTP or HTTPS proxy variables are set the more specific +//! HTTP or HTTPS proxies take precedence. +//! +//! These can be overwritten by adding a [`Proxy`] to `ClientBuilder` +//! i.e. `let proxy = reqwest::Proxy::http("https://secure.example")?;` +//! or disabled by calling `ClientBuilder::no_proxy()`. +//! +//! `socks` feature is required if you have configured socks proxy like this: +//! +//! ```bash +//! export https_proxy=socks5://127.0.0.1:1086 +//! ``` +//! +//! ## TLS +//! +//! A `Client` will use transport layer security (TLS) by default to connect to +//! HTTPS destinations. +//! +//! - Additional server certificates can be configured on a `ClientBuilder` +//! with the [`Certificate`] type. +//! - Client certificates can be added to a `ClientBuilder` with the +//! [`Identity`] type. +//! - Various parts of TLS can also be configured or even disabled on the +//! `ClientBuilder`. +//! +//! See more details in the [`tls`] module. +//! +//! ## WASM +//! +//! The Client implementation automatically switches to the WASM one when the target_arch is wasm32, +//! the usage is basically the same as the async api. Some of the features are disabled in wasm +//! : [`tls`], [`cookie`], [`blocking`], as well as various `ClientBuilder` methods such as `timeout()` and `connector_layer()`. +//! +//! TLS and cookies are provided through the browser environment, so reqwest can issue TLS requests with cookies, +//! but has limited configuration. +//! +//! ## Optional Features +//! +//! The following are a list of [Cargo features][cargo-features] that can be +//! enabled or disabled: +//! +//! - **http2** *(enabled by default)*: Enables HTTP/2 support. +//! - **default-tls** *(enabled by default)*: Provides TLS support to connect +//! over HTTPS. +//! - **native-tls**: Enables TLS functionality provided by `native-tls`. +//! - **native-tls-vendored**: Enables the `vendored` feature of `native-tls`. +//! - **native-tls-alpn**: Enables the `alpn` feature of `native-tls`. +//! - **rustls-tls**: Enables TLS functionality provided by `rustls`. +//! Equivalent to `rustls-tls-webpki-roots`. +//! - **rustls-tls-manual-roots**: Enables TLS functionality provided by `rustls`, +//! without setting any root certificates. Roots have to be specified manually. +//! - **rustls-tls-webpki-roots**: Enables TLS functionality provided by `rustls`, +//! while using root certificates from the `webpki-roots` crate. +//! - **rustls-tls-native-roots**: Enables TLS functionality provided by `rustls`, +//! while using root certificates from the `rustls-native-certs` crate. +//! - **blocking**: Provides the [blocking][] client API. +//! - **charset** *(enabled by default)*: Improved support for decoding text. +//! - **cookies**: Provides cookie session support. +//! - **gzip**: Provides response body gzip decompression. +//! - **brotli**: Provides response body brotli decompression. +//! - **zstd**: Provides response body zstd decompression. +//! - **deflate**: Provides response body deflate decompression. +//! - **json**: Provides serialization and deserialization for JSON bodies. +//! - **multipart**: Provides functionality for multipart forms. +//! - **stream**: Adds support for `futures::Stream`. +//! - **socks**: Provides SOCKS5 proxy support. +//! - **hickory-dns**: Enables a hickory-dns async resolver instead of default +//! threadpool using `getaddrinfo`. +//! - **system-proxy** *(enabled by default)*: Use Windows and macOS system +//! proxy settings automatically. +//! +//! ## Unstable Features +//! +//! Some feature flags require additional opt-in by the application, by setting +//! a `reqwest_unstable` flag. +//! +//! - **http3** *(unstable)*: Enables support for sending HTTP/3 requests. +//! +//! These features are unstable, and experimental. Details about them may be +//! changed in patch releases. +//! +//! You can pass such a flag to the compiler via `.cargo/config`, or +//! environment variables, such as: +//! +//! ```notrust +//! RUSTFLAGS="--cfg reqwest_unstable" cargo build +//! ``` +//! +//! ## Sponsors +//! +//! Support this project by becoming a [sponsor][]. +//! +//! [hyper]: https://hyper.rs +//! [blocking]: ./blocking/index.html +//! [client]: ./struct.Client.html +//! [response]: ./struct.Response.html +//! [get]: ./fn.get.html +//! [builder]: ./struct.RequestBuilder.html +//! [serde]: http://serde.rs +//! [redirect]: crate::redirect +//! [Proxy]: ./struct.Proxy.html +//! [cargo-features]: https://doc.rust-lang.org/stable/cargo/reference/manifest.html#the-features-section +//! [sponsor]: https://seanmonstar.com/sponsor + +#[cfg(all(feature = "http3", not(reqwest_unstable)))] +compile_error!( + "\ + The `http3` feature is unstable, and requires the \ + `RUSTFLAGS='--cfg reqwest_unstable'` environment variable to be set.\ +" +); + +// Ignore `unused_crate_dependencies` warnings. +// Used in many features that they're not worth making it optional. +use futures_core as _; +use sync_wrapper as _; + +macro_rules! if_wasm { + ($($item:item)*) => {$( + #[cfg(target_arch = "wasm32")] + $item + )*} +} + +macro_rules! if_hyper { + ($($item:item)*) => {$( + #[cfg(not(target_arch = "wasm32"))] + $item + )*} +} + +pub use http::header; +pub use http::Method; +pub use http::{StatusCode, Version}; +pub use url::Url; + +// universal mods +#[macro_use] +mod error; +// TODO: remove `if_hyper` if wasm has been migrated to new config system. +if_hyper! { + mod config; +} +mod into_url; +mod response; + +pub use self::error::{Error, Result}; +pub use self::into_url::IntoUrl; +pub use self::response::ResponseBuilderExt; + +/// Shortcut method to quickly make a `GET` request. +/// +/// See also the methods on the [`reqwest::Response`](./struct.Response.html) +/// type. +/// +/// **NOTE**: This function creates a new internal `Client` on each call, +/// and so should not be used if making many requests. Create a +/// [`Client`](./struct.Client.html) instead. +/// +/// # Examples +/// +/// ```rust +/// # async fn run() -> Result<(), reqwest::Error> { +/// let body = reqwest::get("https://www.rust-lang.org").await? +/// .text().await?; +/// # Ok(()) +/// # } +/// ``` +/// +/// # Errors +/// +/// This function fails if: +/// +/// - native TLS backend cannot be initialized +/// - supplied `Url` cannot be parsed +/// - there was an error while sending request +/// - redirect limit was exhausted +pub async fn get(url: T) -> crate::Result { + Client::builder().build()?.get(url).send().await +} + +fn _assert_impls() { + fn assert_send() {} + fn assert_sync() {} + fn assert_clone() {} + + assert_send::(); + assert_sync::(); + assert_clone::(); + + assert_send::(); + assert_send::(); + + #[cfg(not(target_arch = "wasm32"))] + { + assert_send::(); + } + + assert_send::(); + assert_sync::(); + + assert_send::(); + assert_sync::(); +} + +if_hyper! { + #[cfg(test)] + #[macro_use] + extern crate doc_comment; + + #[cfg(test)] + doctest!("../README.md"); + + pub use self::async_impl::{ + Body, Client, ClientBuilder, Request, RequestBuilder, Response, Upgraded, + }; + pub use self::proxy::{Proxy,NoProxy}; + #[cfg(feature = "__tls")] + // Re-exports, to be removed in a future release + pub use tls::{Certificate, Identity}; + #[cfg(feature = "multipart")] + pub use self::async_impl::multipart; + + + mod async_impl; + #[cfg(feature = "blocking")] + pub mod blocking; + mod connect; + #[cfg(feature = "cookies")] + pub mod cookie; + pub mod dns; + mod proxy; + pub mod redirect; + pub mod retry; + #[cfg(feature = "__tls")] + pub mod tls; + mod util; + + #[cfg(docsrs)] + pub use connect::uds::UnixSocketProvider; +} + +if_wasm! { + mod wasm; + mod util; + + pub use self::wasm::{Body, Client, ClientBuilder, Request, RequestBuilder, Response}; + #[cfg(feature = "multipart")] + pub use self::wasm::multipart; +} diff --git a/rust/reqwest/src/proxy.rs b/rust/reqwest/src/proxy.rs new file mode 100644 index 0000000000..add809ac68 --- /dev/null +++ b/rust/reqwest/src/proxy.rs @@ -0,0 +1,942 @@ +use std::error::Error; +use std::fmt; +use std::sync::Arc; + +use http::{header::HeaderValue, HeaderMap, Uri}; +use hyper_util::client::proxy::matcher; + +use crate::into_url::{IntoUrl, IntoUrlSealed}; +use crate::Url; + +// # Internals +// +// This module is a couple pieces: +// +// - The public builder API +// - The internal built types that our Connector knows how to use. +// +// The user creates a builder (`reqwest::Proxy`), and configures any extras. +// Once that type is passed to the `ClientBuilder`, we convert it into the +// built matcher types, making use of `hyper-util`'s matchers. + +/// Configuration of a proxy that a `Client` should pass requests to. +/// +/// A `Proxy` has a couple pieces to it: +/// +/// - a URL of how to talk to the proxy +/// - rules on what `Client` requests should be directed to the proxy +/// +/// For instance, let's look at `Proxy::http`: +/// +/// ```rust +/// # fn run() -> Result<(), Box> { +/// let proxy = reqwest::Proxy::http("https://secure.example")?; +/// # Ok(()) +/// # } +/// ``` +/// +/// This proxy will intercept all HTTP requests, and make use of the proxy +/// at `https://secure.example`. A request to `http://hyper.rs` will talk +/// to your proxy. A request to `https://hyper.rs` will not. +/// +/// Multiple `Proxy` rules can be configured for a `Client`. The `Client` will +/// check each `Proxy` in the order it was added. This could mean that a +/// `Proxy` added first with eager intercept rules, such as `Proxy::all`, +/// would prevent a `Proxy` later in the list from ever working, so take care. +/// +/// By enabling the `"socks"` feature it is possible to use a socks proxy: +/// ```rust +/// # fn run() -> Result<(), Box> { +/// let proxy = reqwest::Proxy::http("socks5://192.168.1.1:9000")?; +/// # Ok(()) +/// # } +/// ``` +#[derive(Clone)] +pub struct Proxy { + extra: Extra, + intercept: Intercept, + no_proxy: Option, +} + +/// A configuration for filtering out requests that shouldn't be proxied +#[derive(Clone, Debug, Default)] +pub struct NoProxy { + inner: String, +} + +#[derive(Clone)] +struct Extra { + auth: Option, + misc: Option, +} + +// ===== Internal ===== + +pub(crate) struct Matcher { + inner: Matcher_, + extra: Extra, + maybe_has_http_auth: bool, + maybe_has_http_custom_headers: bool, +} + +enum Matcher_ { + Util(matcher::Matcher), + Custom(Custom), +} + +/// Our own type, wrapping an `Intercept`, since we may have a few additional +/// pieces attached thanks to `reqwest`s extra proxy configuration. +pub(crate) struct Intercepted { + inner: matcher::Intercept, + /// This is because of `reqwest::Proxy`'s design which allows configuring + /// an explicit auth, besides what might have been in the URL (or Custom). + extra: Extra, +} + +/* +impl ProxyScheme { + fn maybe_http_auth(&self) -> Option<&HeaderValue> { + match self { + ProxyScheme::Http { auth, .. } | ProxyScheme::Https { auth, .. } => auth.as_ref(), + #[cfg(feature = "socks")] + _ => None, + } + } + + fn maybe_http_custom_headers(&self) -> Option<&HeaderMap> { + match self { + ProxyScheme::Http { misc, .. } | ProxyScheme::Https { misc, .. } => misc.as_ref(), + #[cfg(feature = "socks")] + _ => None, + } + } +} +*/ + +/// Trait used for converting into a proxy scheme. This trait supports +/// parsing from a URL-like type, whilst also supporting proxy schemes +/// built directly using the factory methods. +pub trait IntoProxy { + fn into_proxy(self) -> crate::Result; +} + +impl IntoProxy for S { + fn into_proxy(self) -> crate::Result { + match self.as_str().into_url() { + Ok(mut url) => { + // If the scheme is a SOCKS protocol and no port is specified, set the default + if url.port().is_none() + && matches!(url.scheme(), "socks4" | "socks4a" | "socks5" | "socks5h") + { + let _ = url.set_port(Some(1080)); + } + Ok(url) + } + Err(e) => { + let mut presumed_to_have_scheme = true; + let mut source = e.source(); + while let Some(err) = source { + if let Some(parse_error) = err.downcast_ref::() { + if *parse_error == url::ParseError::RelativeUrlWithoutBase { + presumed_to_have_scheme = false; + break; + } + } else if err.downcast_ref::().is_some() { + presumed_to_have_scheme = false; + break; + } + source = err.source(); + } + if presumed_to_have_scheme { + return Err(crate::error::builder(e)); + } + // the issue could have been caused by a missing scheme, so we try adding http:// + let try_this = format!("http://{}", self.as_str()); + try_this.into_url().map_err(|_| { + // return the original error + crate::error::builder(e) + }) + } + } + } +} + +// These bounds are accidentally leaked by the blanket impl of IntoProxy +// for all types that implement IntoUrl. So, this function exists to detect +// if we were to break those bounds for a user. +fn _implied_bounds() { + fn prox(_t: T) {} + + fn url(t: T) { + prox(t); + } +} + +impl Proxy { + /// Proxy all HTTP traffic to the passed URL. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::Client::builder() + /// .proxy(reqwest::Proxy::http("https://my.prox")?) + /// .build()?; + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn http(proxy_scheme: U) -> crate::Result { + Ok(Proxy::new(Intercept::Http(proxy_scheme.into_proxy()?))) + } + + /// Proxy all HTTPS traffic to the passed URL. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::Client::builder() + /// .proxy(reqwest::Proxy::https("https://example.prox:4545")?) + /// .build()?; + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn https(proxy_scheme: U) -> crate::Result { + Ok(Proxy::new(Intercept::Https(proxy_scheme.into_proxy()?))) + } + + /// Proxy **all** traffic to the passed URL. + /// + /// "All" refers to `https` and `http` URLs. Other schemes are not + /// recognized by reqwest. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let client = reqwest::Client::builder() + /// .proxy(reqwest::Proxy::all("http://pro.xy")?) + /// .build()?; + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn all(proxy_scheme: U) -> crate::Result { + Ok(Proxy::new(Intercept::All(proxy_scheme.into_proxy()?))) + } + + /// Provide a custom function to determine what traffic to proxy to where. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let target = reqwest::Url::parse("https://my.prox")?; + /// let client = reqwest::Client::builder() + /// .proxy(reqwest::Proxy::custom(move |url| { + /// if url.host_str() == Some("hyper.rs") { + /// Some(target.clone()) + /// } else { + /// None + /// } + /// })) + /// .build()?; + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn custom(fun: F) -> Proxy + where + F: Fn(&Url) -> Option + Send + Sync + 'static, + { + Proxy::new(Intercept::Custom(Custom { + func: Arc::new(move |url| fun(url).map(IntoProxy::into_proxy)), + no_proxy: None, + })) + } + + fn new(intercept: Intercept) -> Proxy { + Proxy { + extra: Extra { + auth: None, + misc: None, + }, + intercept, + no_proxy: None, + } + } + + /// Set the `Proxy-Authorization` header using Basic auth. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let proxy = reqwest::Proxy::https("http://localhost:1234")? + /// .basic_auth("Aladdin", "open sesame"); + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn basic_auth(mut self, username: &str, password: &str) -> Proxy { + match self.intercept { + Intercept::All(ref mut s) + | Intercept::Http(ref mut s) + | Intercept::Https(ref mut s) => url_auth(s, username, password), + Intercept::Custom(_) => { + let header = encode_basic_auth(username, password); + self.extra.auth = Some(header); + } + } + + self + } + + /// Set the `Proxy-Authorization` header to a specified value. + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # use reqwest::header::*; + /// # fn run() -> Result<(), Box> { + /// let proxy = reqwest::Proxy::https("http://localhost:1234")? + /// .custom_http_auth(HeaderValue::from_static("justletmeinalreadyplease")); + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn custom_http_auth(mut self, header_value: HeaderValue) -> Proxy { + self.extra.auth = Some(header_value); + self + } + + /// Adds a Custom Headers to Proxy + /// Adds custom headers to this Proxy + /// + /// # Example + /// ``` + /// # extern crate reqwest; + /// # use reqwest::header::*; + /// # fn run() -> Result<(), Box> { + /// let mut headers = HeaderMap::new(); + /// headers.insert(USER_AGENT, "reqwest".parse().unwrap()); + /// let proxy = reqwest::Proxy::https("http://localhost:1234")? + /// .headers(headers); + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn headers(mut self, headers: HeaderMap) -> Proxy { + match self.intercept { + Intercept::All(_) | Intercept::Http(_) | Intercept::Https(_) | Intercept::Custom(_) => { + self.extra.misc = Some(headers); + } + } + + self + } + + /// Adds a `No Proxy` exclusion list to this Proxy + /// + /// # Example + /// + /// ``` + /// # extern crate reqwest; + /// # fn run() -> Result<(), Box> { + /// let proxy = reqwest::Proxy::https("http://localhost:1234")? + /// .no_proxy(reqwest::NoProxy::from_string("direct.tld, sub.direct2.tld")); + /// # Ok(()) + /// # } + /// # fn main() {} + /// ``` + pub fn no_proxy(mut self, no_proxy: Option) -> Proxy { + self.no_proxy = no_proxy; + self + } + + pub(crate) fn into_matcher(self) -> Matcher { + let Proxy { + intercept, + extra, + no_proxy, + } = self; + + let maybe_has_http_auth; + let maybe_has_http_custom_headers; + + let inner = match intercept { + Intercept::All(url) => { + maybe_has_http_auth = cache_maybe_has_http_auth(&url, &extra.auth); + maybe_has_http_custom_headers = + cache_maybe_has_http_custom_headers(&url, &extra.misc); + Matcher_::Util( + matcher::Matcher::builder() + .all(String::from(url)) + .no(no_proxy.as_ref().map(|n| n.inner.as_ref()).unwrap_or("")) + .build(), + ) + } + Intercept::Http(url) => { + maybe_has_http_auth = cache_maybe_has_http_auth(&url, &extra.auth); + maybe_has_http_custom_headers = + cache_maybe_has_http_custom_headers(&url, &extra.misc); + Matcher_::Util( + matcher::Matcher::builder() + .http(String::from(url)) + .no(no_proxy.as_ref().map(|n| n.inner.as_ref()).unwrap_or("")) + .build(), + ) + } + Intercept::Https(url) => { + maybe_has_http_auth = cache_maybe_has_http_auth(&url, &extra.auth); + maybe_has_http_custom_headers = + cache_maybe_has_http_custom_headers(&url, &extra.misc); + Matcher_::Util( + matcher::Matcher::builder() + .https(String::from(url)) + .no(no_proxy.as_ref().map(|n| n.inner.as_ref()).unwrap_or("")) + .build(), + ) + } + Intercept::Custom(mut custom) => { + maybe_has_http_auth = true; // never know + maybe_has_http_custom_headers = true; + custom.no_proxy = no_proxy; + Matcher_::Custom(custom) + } + }; + + Matcher { + inner, + extra, + maybe_has_http_auth, + maybe_has_http_custom_headers, + } + } + + /* + pub(crate) fn maybe_has_http_auth(&self) -> bool { + match &self.intercept { + Intercept::All(p) | Intercept::Http(p) => p.maybe_http_auth().is_some(), + // Custom *may* match 'http', so assume so. + Intercept::Custom(_) => true, + Intercept::System(system) => system + .get("http") + .and_then(|s| s.maybe_http_auth()) + .is_some(), + Intercept::Https(_) => false, + } + } + + pub(crate) fn http_basic_auth(&self, uri: &D) -> Option { + match &self.intercept { + Intercept::All(p) | Intercept::Http(p) => p.maybe_http_auth().cloned(), + Intercept::System(system) => system + .get("http") + .and_then(|s| s.maybe_http_auth().cloned()), + Intercept::Custom(custom) => { + custom.call(uri).and_then(|s| s.maybe_http_auth().cloned()) + } + Intercept::Https(_) => None, + } + } + */ +} + +fn cache_maybe_has_http_auth(url: &Url, extra: &Option) -> bool { + url.scheme() == "http" && (url.password().is_some() || extra.is_some()) +} + +fn cache_maybe_has_http_custom_headers(url: &Url, extra: &Option) -> bool { + url.scheme() == "http" && extra.is_some() +} + +impl fmt::Debug for Proxy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("Proxy") + .field(&self.intercept) + .field(&self.no_proxy) + .finish() + } +} + +impl NoProxy { + /// Returns a new no-proxy configuration based on environment variables (or `None` if no variables are set) + /// see [self::NoProxy::from_string()] for the string format + pub fn from_env() -> Option { + let raw = std::env::var("NO_PROXY") + .or_else(|_| std::env::var("no_proxy")) + .ok()?; + + // Per the docs, this returns `None` if no environment variable is set. We can only reach + // here if an env var is set, so we return `Some(NoProxy::default)` if `from_string` + // returns None, which occurs with an empty string. + Some(Self::from_string(&raw).unwrap_or_default()) + } + + /// Returns a new no-proxy configuration based on a `no_proxy` string (or `None` if no variables + /// are set) + /// The rules are as follows: + /// * The environment variable `NO_PROXY` is checked, if it is not set, `no_proxy` is checked + /// * If neither environment variable is set, `None` is returned + /// * Entries are expected to be comma-separated (whitespace between entries is ignored) + /// * IP addresses (both IPv4 and IPv6) are allowed, as are optional subnet masks (by adding /size, + /// for example "`192.168.1.0/24`"). + /// * An entry "`*`" matches all hostnames (this is the only wildcard allowed) + /// * Any other entry is considered a domain name (and may contain a leading dot, for example `google.com` + /// and `.google.com` are equivalent) and would match both that domain AND all subdomains. + /// + /// For example, if `"NO_PROXY=google.com, 192.168.1.0/24"` was set, all the following would match + /// (and therefore would bypass the proxy): + /// * `http://google.com/` + /// * `http://www.google.com/` + /// * `http://192.168.1.42/` + /// + /// The URL `http://notgoogle.com/` would not match. + pub fn from_string(no_proxy_list: &str) -> Option { + // lazy parsed, to not make the type public in hyper-util + Some(NoProxy { + inner: no_proxy_list.into(), + }) + } +} + +impl Matcher { + pub(crate) fn system() -> Self { + Self { + inner: Matcher_::Util(matcher::Matcher::from_system()), + extra: Extra { + auth: None, + misc: None, + }, + // maybe env vars have auth! + maybe_has_http_auth: true, + maybe_has_http_custom_headers: true, + } + } + + pub(crate) fn intercept(&self, dst: &Uri) -> Option { + let inner = match self.inner { + Matcher_::Util(ref m) => m.intercept(dst), + Matcher_::Custom(ref c) => c.call(dst), + }; + + inner.map(|inner| Intercepted { + inner, + extra: self.extra.clone(), + }) + } + + /// Return whether this matcher might provide HTTP (not s) auth. + /// + /// This is very specific. If this proxy needs auth to be part of a Forward + /// request (instead of a tunnel), this should return true. + /// + /// If it's not sure, this should return true. + /// + /// This is meant as a hint to allow skipping a more expensive check + /// (calling `intercept()`) if it will never need auth when Forwarding. + pub(crate) fn maybe_has_http_auth(&self) -> bool { + self.maybe_has_http_auth + } + + pub(crate) fn http_non_tunnel_basic_auth(&self, dst: &Uri) -> Option { + if let Some(proxy) = self.intercept(dst) { + if proxy.uri().scheme_str() == Some("http") { + return proxy.basic_auth().cloned(); + } + } + + None + } + + pub(crate) fn maybe_has_http_custom_headers(&self) -> bool { + self.maybe_has_http_custom_headers + } + + pub(crate) fn http_non_tunnel_custom_headers(&self, dst: &Uri) -> Option { + if let Some(proxy) = self.intercept(dst) { + if proxy.uri().scheme_str() == Some("http") { + return proxy.custom_headers().cloned(); + } + } + + None + } +} + +impl fmt::Debug for Matcher { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.inner { + Matcher_::Util(ref m) => m.fmt(f), + Matcher_::Custom(ref m) => m.fmt(f), + } + } +} + +impl Intercepted { + pub(crate) fn uri(&self) -> &http::Uri { + self.inner.uri() + } + + pub(crate) fn basic_auth(&self) -> Option<&HeaderValue> { + if let Some(ref val) = self.extra.auth { + return Some(val); + } + self.inner.basic_auth() + } + + pub(crate) fn custom_headers(&self) -> Option<&HeaderMap> { + if let Some(ref val) = self.extra.misc { + return Some(val); + } + None + } + + #[cfg(feature = "socks")] + pub(crate) fn raw_auth(&self) -> Option<(&str, &str)> { + self.inner.raw_auth() + } +} + +impl fmt::Debug for Intercepted { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.inner.uri().fmt(f) + } +} + +/* +impl ProxyScheme { + /// Use a username and password when connecting to the proxy server + fn with_basic_auth, U: Into>( + mut self, + username: T, + password: U, + ) -> Self { + self.set_basic_auth(username, password); + self + } + + fn set_basic_auth, U: Into>(&mut self, username: T, password: U) { + match *self { + ProxyScheme::Http { ref mut auth, .. } => { + let header = encode_basic_auth(&username.into(), &password.into()); + *auth = Some(header); + } + ProxyScheme::Https { ref mut auth, .. } => { + let header = encode_basic_auth(&username.into(), &password.into()); + *auth = Some(header); + } + #[cfg(feature = "socks")] + ProxyScheme::Socks4 { .. } => { + panic!("Socks4 is not supported for this method") + } + #[cfg(feature = "socks")] + ProxyScheme::Socks5 { ref mut auth, .. } => { + *auth = Some((username.into(), password.into())); + } + } + } + + fn set_custom_http_auth(&mut self, header_value: HeaderValue) { + match *self { + ProxyScheme::Http { ref mut auth, .. } => { + *auth = Some(header_value); + } + ProxyScheme::Https { ref mut auth, .. } => { + *auth = Some(header_value); + } + #[cfg(feature = "socks")] + ProxyScheme::Socks4 { .. } => { + panic!("Socks4 is not supported for this method") + } + #[cfg(feature = "socks")] + ProxyScheme::Socks5 { .. } => { + panic!("Socks5 is not supported for this method") + } + } + } + + fn set_custom_headers(&mut self, headers: HeaderMap) { + match *self { + ProxyScheme::Http { ref mut misc, .. } => { + misc.get_or_insert_with(HeaderMap::new).extend(headers) + } + ProxyScheme::Https { ref mut misc, .. } => { + misc.get_or_insert_with(HeaderMap::new).extend(headers) + } + #[cfg(feature = "socks")] + ProxyScheme::Socks4 { .. } => { + panic!("Socks4 is not supported for this method") + } + #[cfg(feature = "socks")] + ProxyScheme::Socks5 { .. } => { + panic!("Socks5 is not supported for this method") + } + } + } + + fn if_no_auth(mut self, update: &Option) -> Self { + match self { + ProxyScheme::Http { ref mut auth, .. } => { + if auth.is_none() { + *auth = update.clone(); + } + } + ProxyScheme::Https { ref mut auth, .. } => { + if auth.is_none() { + *auth = update.clone(); + } + } + #[cfg(feature = "socks")] + ProxyScheme::Socks4 { .. } => {} + #[cfg(feature = "socks")] + ProxyScheme::Socks5 { .. } => {} + } + + self + } + + /// Convert a URL into a proxy scheme + /// + /// Supported schemes: HTTP, HTTPS, (SOCKS4, SOCKS5, SOCKS5H if `socks` feature is enabled). + // Private for now... + fn parse(url: Url) -> crate::Result { + use url::Position; + + // Resolve URL to a host and port + #[cfg(feature = "socks")] + let to_addr = || { + let addrs = url + .socket_addrs(|| match url.scheme() { + "socks4" | "socks4a" | "socks5" | "socks5h" => Some(1080), + _ => None, + }) + .map_err(crate::error::builder)?; + addrs + .into_iter() + .next() + .ok_or_else(|| crate::error::builder("unknown proxy scheme")) + }; + + let mut scheme = match url.scheme() { + "http" => Self::http(&url[Position::BeforeHost..Position::AfterPort])?, + "https" => Self::https(&url[Position::BeforeHost..Position::AfterPort])?, + #[cfg(feature = "socks")] + "socks4" => Self::socks4(to_addr()?)?, + #[cfg(feature = "socks")] + "socks4a" => Self::socks4a(to_addr()?)?, + #[cfg(feature = "socks")] + "socks5" => Self::socks5(to_addr()?)?, + #[cfg(feature = "socks")] + "socks5h" => Self::socks5h(to_addr()?)?, + _ => return Err(crate::error::builder("unknown proxy scheme")), + }; + + if let Some(pwd) = url.password() { + let decoded_username = percent_decode(url.username().as_bytes()).decode_utf8_lossy(); + let decoded_password = percent_decode(pwd.as_bytes()).decode_utf8_lossy(); + scheme = scheme.with_basic_auth(decoded_username, decoded_password); + } + + Ok(scheme) + } +} +*/ + +#[derive(Clone, Debug)] +enum Intercept { + All(Url), + Http(Url), + Https(Url), + Custom(Custom), +} + +fn url_auth(url: &mut Url, username: &str, password: &str) { + url.set_username(username).expect("is a base"); + url.set_password(Some(password)).expect("is a base"); +} + +#[derive(Clone)] +struct Custom { + func: Arc Option> + Send + Sync + 'static>, + no_proxy: Option, +} + +impl Custom { + fn call(&self, uri: &http::Uri) -> Option { + let url = format!( + "{}://{}{}{}", + uri.scheme()?, + uri.host()?, + uri.port().map_or("", |_| ":"), + uri.port().map_or(String::new(), |p| p.to_string()) + ) + .parse() + .expect("should be valid Url"); + + (self.func)(&url) + .and_then(|result| result.ok()) + .and_then(|target| { + let m = matcher::Matcher::builder() + .all(String::from(target)) + .build(); + + m.intercept(uri) + }) + //.map(|scheme| scheme.if_no_auth(&self.auth)) + } +} + +impl fmt::Debug for Custom { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("_") + } +} + +pub(crate) fn encode_basic_auth(username: &str, password: &str) -> HeaderValue { + crate::util::basic_auth(username, Some(password)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn url(s: &str) -> http::Uri { + s.parse().unwrap() + } + + fn intercepted_uri(p: &Matcher, s: &str) -> Uri { + p.intercept(&s.parse().unwrap()).unwrap().uri().clone() + } + + #[test] + fn test_http() { + let target = "http://example.domain/"; + let p = Proxy::http(target).unwrap().into_matcher(); + + let http = "http://hyper.rs"; + let other = "https://hyper.rs"; + + assert_eq!(intercepted_uri(&p, http), target); + assert!(p.intercept(&url(other)).is_none()); + } + + #[test] + fn test_https() { + let target = "http://example.domain/"; + let p = Proxy::https(target).unwrap().into_matcher(); + + let http = "http://hyper.rs"; + let other = "https://hyper.rs"; + + assert!(p.intercept(&url(http)).is_none()); + assert_eq!(intercepted_uri(&p, other), target); + } + + #[test] + fn test_all() { + let target = "http://example.domain/"; + let p = Proxy::all(target).unwrap().into_matcher(); + + let http = "http://hyper.rs"; + let https = "https://hyper.rs"; + // no longer supported + //let other = "x-youve-never-heard-of-me-mr-proxy://hyper.rs"; + + assert_eq!(intercepted_uri(&p, http), target); + assert_eq!(intercepted_uri(&p, https), target); + //assert_eq!(intercepted_uri(&p, other), target); + } + + #[test] + fn test_custom() { + let target1 = "http://example.domain/"; + let target2 = "https://example.domain/"; + let p = Proxy::custom(move |url| { + if url.host_str() == Some("hyper.rs") { + target1.parse().ok() + } else if url.scheme() == "http" { + target2.parse().ok() + } else { + None:: + } + }) + .into_matcher(); + + let http = "http://seanmonstar.com"; + let https = "https://hyper.rs"; + let other = "x-youve-never-heard-of-me-mr-proxy://seanmonstar.com"; + + assert_eq!(intercepted_uri(&p, http), target2); + assert_eq!(intercepted_uri(&p, https), target1); + assert!(p.intercept(&url(other)).is_none()); + } + + #[test] + fn test_standard_with_custom_auth_header() { + let target = "http://example.domain/"; + let p = Proxy::all(target) + .unwrap() + .custom_http_auth(http::HeaderValue::from_static("testme")) + .into_matcher(); + + let got = p.intercept(&url("http://anywhere.local")).unwrap(); + let auth = got.basic_auth().unwrap(); + assert_eq!(auth, "testme"); + } + + #[test] + fn test_custom_with_custom_auth_header() { + let target = "http://example.domain/"; + let p = Proxy::custom(move |_| target.parse::().ok()) + .custom_http_auth(http::HeaderValue::from_static("testme")) + .into_matcher(); + + let got = p.intercept(&url("http://anywhere.local")).unwrap(); + let auth = got.basic_auth().unwrap(); + assert_eq!(auth, "testme"); + } + + #[test] + fn test_maybe_has_http_auth() { + let m = Proxy::all("https://letme:in@yo.local") + .unwrap() + .into_matcher(); + assert!(!m.maybe_has_http_auth(), "https always tunnels"); + + let m = Proxy::all("http://letme:in@yo.local") + .unwrap() + .into_matcher(); + assert!(m.maybe_has_http_auth(), "http forwards"); + } + + #[test] + fn test_socks_proxy_default_port() { + { + let m = Proxy::all("socks5://example.com").unwrap().into_matcher(); + + let http = "http://hyper.rs"; + let https = "https://hyper.rs"; + + assert_eq!(intercepted_uri(&m, http).port_u16(), Some(1080)); + assert_eq!(intercepted_uri(&m, https).port_u16(), Some(1080)); + + // custom port + let m = Proxy::all("socks5://example.com:1234") + .unwrap() + .into_matcher(); + + assert_eq!(intercepted_uri(&m, http).port_u16(), Some(1234)); + assert_eq!(intercepted_uri(&m, https).port_u16(), Some(1234)); + } + } +} diff --git a/rust/reqwest/src/redirect.rs b/rust/reqwest/src/redirect.rs new file mode 100644 index 0000000000..fb164f5a6f --- /dev/null +++ b/rust/reqwest/src/redirect.rs @@ -0,0 +1,433 @@ +//! Redirect Handling +//! +//! By default, a `Client` will automatically handle HTTP redirects, having a +//! maximum redirect chain of 10 hops. To customize this behavior, a +//! `redirect::Policy` can be used with a `ClientBuilder`. + +use std::fmt; +use std::{error::Error as StdError, sync::Arc}; + +use crate::header::{AUTHORIZATION, COOKIE, PROXY_AUTHORIZATION, REFERER, WWW_AUTHENTICATE}; +use http::{HeaderMap, HeaderValue}; +use hyper::StatusCode; + +use crate::{async_impl, Url}; +use tower_http::follow_redirect::policy::{ + Action as TowerAction, Attempt as TowerAttempt, Policy as TowerPolicy, +}; + +/// A type that controls the policy on how to handle the following of redirects. +/// +/// The default value will catch redirect loops, and has a maximum of 10 +/// redirects it will follow in a chain before returning an error. +/// +/// - `limited` can be used have the same as the default behavior, but adjust +/// the allowed maximum redirect hops in a chain. +/// - `none` can be used to disable all redirect behavior. +/// - `custom` can be used to create a customized policy. +pub struct Policy { + inner: PolicyKind, +} + +/// A type that holds information on the next request and previous requests +/// in redirect chain. +#[derive(Debug)] +pub struct Attempt<'a> { + status: StatusCode, + next: &'a Url, + previous: &'a [Url], +} + +/// An action to perform when a redirect status code is found. +#[derive(Debug)] +pub struct Action { + inner: ActionKind, +} + +impl Policy { + /// Create a `Policy` with a maximum number of redirects. + /// + /// An `Error` will be returned if the max is reached. + pub fn limited(max: usize) -> Self { + Self { + inner: PolicyKind::Limit(max), + } + } + + /// Create a `Policy` that does not follow any redirect. + pub fn none() -> Self { + Self { + inner: PolicyKind::None, + } + } + + /// Create a custom `Policy` using the passed function. + /// + /// # Note + /// + /// The default `Policy` handles a maximum loop + /// chain, but the custom variant does not do that for you automatically. + /// The custom policy should have some way of handling those. + /// + /// Information on the next request and previous requests can be found + /// on the [`Attempt`] argument passed to the closure. + /// + /// Actions can be conveniently created from methods on the + /// [`Attempt`]. + /// + /// # Example + /// + /// ```rust + /// # use reqwest::{Error, redirect}; + /// # + /// # fn run() -> Result<(), Error> { + /// let custom = redirect::Policy::custom(|attempt| { + /// if attempt.previous().len() > 5 { + /// attempt.error("too many redirects") + /// } else if attempt.url().host_str() == Some("example.domain") { + /// // prevent redirects to 'example.domain' + /// attempt.stop() + /// } else { + /// attempt.follow() + /// } + /// }); + /// let client = reqwest::Client::builder() + /// .redirect(custom) + /// .build()?; + /// # Ok(()) + /// # } + /// ``` + /// + /// [`Attempt`]: struct.Attempt.html + pub fn custom(policy: T) -> Self + where + T: Fn(Attempt) -> Action + Send + Sync + 'static, + { + Self { + inner: PolicyKind::Custom(Box::new(policy)), + } + } + + /// Apply this policy to a given [`Attempt`] to produce a [`Action`]. + /// + /// # Note + /// + /// This method can be used together with `Policy::custom()` + /// to construct one `Policy` that wraps another. + /// + /// # Example + /// + /// ```rust + /// # use reqwest::{Error, redirect}; + /// # + /// # fn run() -> Result<(), Error> { + /// let custom = redirect::Policy::custom(|attempt| { + /// eprintln!("{}, Location: {:?}", attempt.status(), attempt.url()); + /// redirect::Policy::default().redirect(attempt) + /// }); + /// # Ok(()) + /// # } + /// ``` + pub fn redirect(&self, attempt: Attempt) -> Action { + match self.inner { + PolicyKind::Custom(ref custom) => custom(attempt), + PolicyKind::Limit(max) => { + // The first URL in the previous is the initial URL and not a redirection. It needs to be excluded. + if attempt.previous.len() > max { + attempt.error(TooManyRedirects) + } else { + attempt.follow() + } + } + PolicyKind::None => attempt.stop(), + } + } + + pub(crate) fn check(&self, status: StatusCode, next: &Url, previous: &[Url]) -> ActionKind { + self.redirect(Attempt { + status, + next, + previous, + }) + .inner + } + + pub(crate) fn is_default(&self) -> bool { + matches!(self.inner, PolicyKind::Limit(10)) + } +} + +impl Default for Policy { + fn default() -> Policy { + // Keep `is_default` in sync + Policy::limited(10) + } +} + +impl<'a> Attempt<'a> { + /// Get the type of redirect. + pub fn status(&self) -> StatusCode { + self.status + } + + /// Get the next URL to redirect to. + pub fn url(&self) -> &Url { + self.next + } + + /// Get the list of previous URLs that have already been requested in this chain. + pub fn previous(&self) -> &[Url] { + self.previous + } + /// Returns an action meaning reqwest should follow the next URL. + pub fn follow(self) -> Action { + Action { + inner: ActionKind::Follow, + } + } + + /// Returns an action meaning reqwest should not follow the next URL. + /// + /// The 30x response will be returned as the `Ok` result. + pub fn stop(self) -> Action { + Action { + inner: ActionKind::Stop, + } + } + + /// Returns an action failing the redirect with an error. + /// + /// The `Error` will be returned for the result of the sent request. + pub fn error>>(self, error: E) -> Action { + Action { + inner: ActionKind::Error(error.into()), + } + } +} + +enum PolicyKind { + Custom(Box Action + Send + Sync + 'static>), + Limit(usize), + None, +} + +impl fmt::Debug for Policy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("Policy").field(&self.inner).finish() + } +} + +impl fmt::Debug for PolicyKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + PolicyKind::Custom(..) => f.pad("Custom"), + PolicyKind::Limit(max) => f.debug_tuple("Limit").field(&max).finish(), + PolicyKind::None => f.pad("None"), + } + } +} + +// pub(crate) + +#[derive(Debug)] +pub(crate) enum ActionKind { + Follow, + Stop, + Error(Box), +} + +pub(crate) fn remove_sensitive_headers(headers: &mut HeaderMap, next: &Url, previous: &[Url]) { + if let Some(previous) = previous.last() { + let cross_host = next.host_str() != previous.host_str() + || next.port_or_known_default() != previous.port_or_known_default(); + if cross_host { + headers.remove(AUTHORIZATION); + headers.remove(COOKIE); + headers.remove("cookie2"); + headers.remove(PROXY_AUTHORIZATION); + headers.remove(WWW_AUTHENTICATE); + } + } +} + +#[derive(Debug)] +struct TooManyRedirects; + +impl fmt::Display for TooManyRedirects { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("too many redirects") + } +} + +impl StdError for TooManyRedirects {} + +#[derive(Clone)] +pub(crate) struct TowerRedirectPolicy { + policy: Arc, + referer: bool, + urls: Vec, + https_only: bool, +} + +impl TowerRedirectPolicy { + pub(crate) fn new(policy: Policy) -> Self { + Self { + policy: Arc::new(policy), + referer: false, + urls: Vec::new(), + https_only: false, + } + } + + pub(crate) fn with_referer(&mut self, referer: bool) -> &mut Self { + self.referer = referer; + self + } + + pub(crate) fn with_https_only(&mut self, https_only: bool) -> &mut Self { + self.https_only = https_only; + self + } +} + +fn make_referer(next: &Url, previous: &Url) -> Option { + if next.scheme() == "http" && previous.scheme() == "https" { + return None; + } + + let mut referer = previous.clone(); + let _ = referer.set_username(""); + let _ = referer.set_password(None); + referer.set_fragment(None); + referer.as_str().parse().ok() +} + +impl TowerPolicy for TowerRedirectPolicy { + fn redirect(&mut self, attempt: &TowerAttempt<'_>) -> Result { + let previous_url = + Url::parse(&attempt.previous().to_string()).expect("Previous URL must be valid"); + + let next_url = match Url::parse(&attempt.location().to_string()) { + Ok(url) => url, + Err(e) => return Err(crate::error::builder(e)), + }; + + self.urls.push(previous_url.clone()); + + match self.policy.check(attempt.status(), &next_url, &self.urls) { + ActionKind::Follow => { + if next_url.scheme() != "http" && next_url.scheme() != "https" { + return Err(crate::error::url_bad_scheme(next_url)); + } + + if self.https_only && next_url.scheme() != "https" { + return Err(crate::error::redirect( + crate::error::url_bad_scheme(next_url.clone()), + next_url, + )); + } + Ok(TowerAction::Follow) + } + ActionKind::Stop => Ok(TowerAction::Stop), + ActionKind::Error(e) => Err(crate::error::redirect(e, previous_url)), + } + } + + fn on_request(&mut self, req: &mut http::Request) { + if let Ok(next_url) = Url::parse(&req.uri().to_string()) { + remove_sensitive_headers(req.headers_mut(), &next_url, &self.urls); + if self.referer { + if let Some(previous_url) = self.urls.last() { + if let Some(v) = make_referer(&next_url, previous_url) { + req.headers_mut().insert(REFERER, v); + } + } + } + }; + } + + // This must be implemented to make 307 and 308 redirects work + fn clone_body(&self, body: &async_impl::body::Body) -> Option { + body.try_clone() + } +} + +#[test] +fn test_redirect_policy_limit() { + let policy = Policy::default(); + let next = Url::parse("http://x.y/z").unwrap(); + let mut previous = (0..=9) + .map(|i| Url::parse(&format!("http://a.b/c/{i}")).unwrap()) + .collect::>(); + + match policy.check(StatusCode::FOUND, &next, &previous) { + ActionKind::Follow => (), + other => panic!("unexpected {other:?}"), + } + + previous.push(Url::parse("http://a.b.d/e/33").unwrap()); + + match policy.check(StatusCode::FOUND, &next, &previous) { + ActionKind::Error(err) if err.is::() => (), + other => panic!("unexpected {other:?}"), + } +} + +#[test] +fn test_redirect_policy_limit_to_0() { + let policy = Policy::limited(0); + let next = Url::parse("http://x.y/z").unwrap(); + let previous = vec![Url::parse("http://a.b/c").unwrap()]; + + match policy.check(StatusCode::FOUND, &next, &previous) { + ActionKind::Error(err) if err.is::() => (), + other => panic!("unexpected {other:?}"), + } +} + +#[test] +fn test_redirect_policy_custom() { + let policy = Policy::custom(|attempt| { + if attempt.url().host_str() == Some("foo") { + attempt.stop() + } else { + attempt.follow() + } + }); + + let next = Url::parse("http://bar/baz").unwrap(); + match policy.check(StatusCode::FOUND, &next, &[]) { + ActionKind::Follow => (), + other => panic!("unexpected {other:?}"), + } + + let next = Url::parse("http://foo/baz").unwrap(); + match policy.check(StatusCode::FOUND, &next, &[]) { + ActionKind::Stop => (), + other => panic!("unexpected {other:?}"), + } +} + +#[test] +fn test_remove_sensitive_headers() { + use hyper::header::{HeaderValue, ACCEPT, AUTHORIZATION, COOKIE}; + + let mut headers = HeaderMap::new(); + headers.insert(ACCEPT, HeaderValue::from_static("*/*")); + headers.insert(AUTHORIZATION, HeaderValue::from_static("let me in")); + headers.insert(COOKIE, HeaderValue::from_static("foo=bar")); + + let next = Url::parse("http://initial-domain.com/path").unwrap(); + let mut prev = vec![Url::parse("http://initial-domain.com/new_path").unwrap()]; + let mut filtered_headers = headers.clone(); + + remove_sensitive_headers(&mut headers, &next, &prev); + assert_eq!(headers, filtered_headers); + + prev.push(Url::parse("http://new-domain.com/path").unwrap()); + filtered_headers.remove(AUTHORIZATION); + filtered_headers.remove(COOKIE); + + remove_sensitive_headers(&mut headers, &next, &prev); + assert_eq!(headers, filtered_headers); +} diff --git a/rust/reqwest/src/response.rs b/rust/reqwest/src/response.rs new file mode 100644 index 0000000000..9c92cba533 --- /dev/null +++ b/rust/reqwest/src/response.rs @@ -0,0 +1,41 @@ +use url::Url; + +#[derive(Debug, Clone, PartialEq)] +pub(crate) struct ResponseUrl(pub Url); + +/// Extension trait for http::response::Builder objects +/// +/// Allows the user to add a `Url` to the http::Response +pub trait ResponseBuilderExt { + /// A builder method for the `http::response::Builder` type that allows the user to add a `Url` + /// to the `http::Response` + fn url(self, url: Url) -> Self; +} + +impl ResponseBuilderExt for http::response::Builder { + fn url(self, url: Url) -> Self { + self.extension(ResponseUrl(url)) + } +} + +#[cfg(test)] +mod tests { + use super::{ResponseBuilderExt, ResponseUrl}; + use http::response::Builder; + use url::Url; + + #[test] + fn test_response_builder_ext() { + let url = Url::parse("http://example.com").unwrap(); + let response = Builder::new() + .status(200) + .url(url.clone()) + .body(()) + .unwrap(); + + assert_eq!( + response.extensions().get::(), + Some(&ResponseUrl(url)) + ); + } +} diff --git a/rust/reqwest/src/retry.rs b/rust/reqwest/src/retry.rs new file mode 100644 index 0000000000..a3992e8182 --- /dev/null +++ b/rust/reqwest/src/retry.rs @@ -0,0 +1,477 @@ +//! Retry requests +//! +//! A `Client` has the ability to retry requests, by sending additional copies +//! to the server if a response is considered retryable. +//! +//! The [`Builder`] makes it easier to configure what requests to retry, along +//! with including best practices by default, such as a retry budget. +//! +//! # Defaults +//! +//! The default retry behavior of a `Client` is to only retry requests where an +//! error or low-level protocol NACK is encountered that is known to be safe to +//! retry. Note however that providing a specific retry policy will override +//! the default, and you will need to explicitly include that behavior. +//! +//! All policies default to including a retry budget that permits 20% extra +//! requests to be sent. +//! +//! # Scoped +//! +//! A client's retry policy is scoped. That means that the policy doesn't +//! apply to all requests, but only those within a user-defined scope. +//! +//! Since all policies include a budget by default, it doesn't make sense to +//! apply it on _all_ requests. Rather, the retry history applied by a budget +//! should likely only be applied to the same host. +//! +//! # Classifiers +//! +//! A retry policy needs to be configured with a classifier that determines +//! if a request should be retried. Knowledge of the destination server's +//! behavior is required to make a safe classifier. **Requests should not be +//! retried** if the server cannot safely handle the same request twice, or if +//! it causes side effects. +//! +//! Some common properties to check include if the request method is +//! idempotent, or if the response status code indicates a transient error. + +use std::sync::Arc; +use std::time::Duration; + +use tower::retry::budget::{Budget as _, TpsBudget as Budget}; + +/// Builder to configure retries +/// +/// Construct with [`for_host()`]. +#[derive(Debug)] +pub struct Builder { + //backoff: Backoff, + budget: Option, + classifier: classify::Classifier, + max_retries_per_request: u32, + scope: scope::Scoped, +} + +/// The internal type that we convert the builder into, that implements +/// tower::retry::Policy privately. +#[derive(Clone, Debug)] +pub(crate) struct Policy { + budget: Option>, + classifier: classify::Classifier, + max_retries_per_request: u32, + retry_cnt: u32, + scope: scope::Scoped, +} + +//#[derive(Debug)] +//struct Backoff; + +/// Create a retry builder with a request scope. +/// +/// To provide a scope that isn't a closure, use the more general +/// [`Builder::scoped()`]. +pub fn for_host(host: S) -> Builder +where + S: for<'a> PartialEq<&'a str> + Send + Sync + 'static, +{ + scoped(move |req| host == req.uri().host().unwrap_or("")) +} + +/// Create a retry policy that will never retry any request. +/// +/// This is useful for disabling the `Client`s default behavior of retrying +/// protocol nacks. +pub fn never() -> Builder { + scoped(|_| false).no_budget() +} + +fn scoped(func: F) -> Builder +where + F: Fn(&Req) -> bool + Send + Sync + 'static, +{ + Builder::scoped(scope::ScopeFn(func)) +} + +// ===== impl Builder ===== + +impl Builder { + /// Create a scoped retry policy. + /// + /// For a more convenient constructor, see [`for_host()`]. + pub fn scoped(scope: impl scope::Scope) -> Self { + Self { + budget: Some(0.2), + classifier: classify::Classifier::Never, + max_retries_per_request: 2, // on top of the original + scope: scope::Scoped::Dyn(Arc::new(scope)), + } + } + + /// Set no retry budget. + /// + /// Sets that no budget will be enforced. This could also be considered + /// to be an infinite budget. + /// + /// This is NOT recommended. Disabling the budget can make your system more + /// susceptible to retry storms. + pub fn no_budget(mut self) -> Self { + self.budget = None; + self + } + + /// Sets the max extra load the budget will allow. + /// + /// Think of the amount of requests your client generates, and how much + /// load that puts on the server. This option configures as a percentage + /// how much extra load is allowed via retries. + /// + /// For example, if you send 1,000 requests per second, setting a maximum + /// extra load value of `0.3` would allow 300 more requests per second + /// in retries. A value of `2.5` would allow 2,500 more requests. + /// + /// # Panics + /// + /// The `extra_percent` value must be within reasonable values for a + /// percentage. This method will panic if it is less than `0.0`, or greater + /// than `1000.0`. + pub fn max_extra_load(mut self, extra_percent: f32) -> Self { + assert!(extra_percent >= 0.0); + assert!(extra_percent <= 1000.0); + self.budget = Some(extra_percent); + self + } + + // pub fn max_replay_body + + /// Set the max retries allowed per request. + /// + /// For each logical (initial) request, only retry up to `max` times. + /// + /// This value is used in combination with a token budget that is applied + /// to all requests. Even if the budget would allow more requests, this + /// limit will prevent. Likewise, the budget may prevent retrying up to + /// `max` times. This setting prevents a single request from consuming + /// the entire budget. + /// + /// Default is currently 2 retries. + pub fn max_retries_per_request(mut self, max: u32) -> Self { + self.max_retries_per_request = max; + self + } + + /// Provide a classifier to determine if a request should be retried. + /// + /// # Example + /// + /// ```rust + /// # fn with_builder(builder: reqwest::retry::Builder) -> reqwest::retry::Builder { + /// builder.classify_fn(|req_rep| { + /// match (req_rep.method(), req_rep.status()) { + /// (&http::Method::GET, Some(http::StatusCode::SERVICE_UNAVAILABLE)) => { + /// req_rep.retryable() + /// }, + /// _ => req_rep.success() + /// } + /// }) + /// # } + /// ``` + pub fn classify_fn(self, func: F) -> Self + where + F: Fn(classify::ReqRep<'_>) -> classify::Action + Send + Sync + 'static, + { + self.classify(classify::ClassifyFn(func)) + } + + /// Provide a classifier to determine if a request should be retried. + pub fn classify(mut self, classifier: impl classify::Classify) -> Self { + self.classifier = classify::Classifier::Dyn(Arc::new(classifier)); + self + } + + pub(crate) fn default() -> Builder { + Self { + // unscoped protocols nacks doesn't need a budget + budget: None, + classifier: classify::Classifier::ProtocolNacks, + max_retries_per_request: 2, // on top of the original + scope: scope::Scoped::Unscoped, + } + } + + pub(crate) fn into_policy(self) -> Policy { + let budget = self + .budget + .map(|p| Arc::new(Budget::new(Duration::from_secs(10), 10, p))); + Policy { + budget, + classifier: self.classifier, + max_retries_per_request: self.max_retries_per_request, + retry_cnt: 0, + scope: self.scope, + } + } +} + +// ===== internal ====== + +type Req = http::Request; + +impl tower::retry::Policy, crate::Error> for Policy { + // TODO? backoff futures... + type Future = std::future::Ready<()>; + + fn retry( + &mut self, + req: &mut Req, + result: &mut crate::Result>, + ) -> Option { + match self.classifier.classify(req, result) { + classify::Action::Success => { + log::trace!("shouldn't retry!"); + if let Some(ref budget) = self.budget { + budget.deposit(); + } + None + } + classify::Action::Retryable => { + log::trace!("could retry!"); + if self.budget.as_ref().map(|b| b.withdraw()).unwrap_or(true) { + self.retry_cnt += 1; + Some(std::future::ready(())) + } else { + log::debug!("retryable but could not withdraw from budget"); + None + } + } + } + } + + fn clone_request(&mut self, req: &Req) -> Option { + if self.retry_cnt > 0 && !self.scope.applies_to(req) { + return None; + } + if self.retry_cnt >= self.max_retries_per_request { + log::trace!("max_retries_per_request hit"); + return None; + } + let body = req.body().try_clone()?; + let mut new = http::Request::new(body); + *new.method_mut() = req.method().clone(); + *new.uri_mut() = req.uri().clone(); + *new.version_mut() = req.version(); + *new.headers_mut() = req.headers().clone(); + *new.extensions_mut() = req.extensions().clone(); + + Some(new) + } +} + +fn is_retryable_error(err: &crate::Error) -> bool { + use std::error::Error as _; + + // pop the reqwest::Error + let err = if let Some(err) = err.source() { + err + } else { + return false; + }; + // pop the legacy::Error + let err = if let Some(err) = err.source() { + err + } else { + return false; + }; + + #[cfg(not(any(feature = "http3", feature = "http2")))] + let _err = err; + + #[cfg(feature = "http3")] + if let Some(cause) = err.source() { + if let Some(err) = cause.downcast_ref::() { + log::trace!("determining if HTTP/3 error {err} can be retried"); + // TODO: Does h3 provide an API for checking the error? + return err.to_string().as_str() == "timeout"; + } + } + + #[cfg(feature = "http2")] + if let Some(cause) = err.source() { + if let Some(err) = cause.downcast_ref::() { + // They sent us a graceful shutdown, try with a new connection! + if err.is_go_away() && err.is_remote() && err.reason() == Some(h2::Reason::NO_ERROR) { + return true; + } + + // REFUSED_STREAM was sent from the server, which is safe to retry. + // https://www.rfc-editor.org/rfc/rfc9113.html#section-8.7-3.2 + if err.is_reset() && err.is_remote() && err.reason() == Some(h2::Reason::REFUSED_STREAM) + { + return true; + } + } + } + false +} + +// sealed types and traits on purpose while exploring design space +mod scope { + pub trait Scope: Send + Sync + 'static { + fn applies_to(&self, req: &super::Req) -> bool; + } + + // I think scopes likely make the most sense being to hosts. + // If that's the case, then it should probably be easiest to check for + // the host. Perhaps also considering the ability to add more things + // to scope off in the future... + + // For Future Whoever: making a blanket impl for any closure sounds nice, + // but it causes inference issues at the call site. Every closure would + // need to include `: ReqRep` in the arguments. + // + // An alternative is to make things like `ScopeFn`. Slightly more annoying, + // but also more forwards-compatible. :shrug: + + pub struct ScopeFn(pub(super) F); + + impl Scope for ScopeFn + where + F: Fn(&super::Req) -> bool + Send + Sync + 'static, + { + fn applies_to(&self, req: &super::Req) -> bool { + (self.0)(req) + } + } + + #[derive(Clone)] + pub(super) enum Scoped { + Unscoped, + Dyn(std::sync::Arc), + } + + impl Scoped { + pub(super) fn applies_to(&self, req: &super::Req) -> bool { + let ret = match self { + Self::Unscoped => true, + Self::Dyn(s) => s.applies_to(req), + }; + log::trace!("retry in scope: {ret}"); + ret + } + } + + impl std::fmt::Debug for Scoped { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unscoped => f.write_str("Unscoped"), + Self::Dyn(_) => f.write_str("Scoped"), + } + } + } +} + +// sealed types and traits on purpose while exploring design space +mod classify { + pub trait Classify: Send + Sync + 'static { + fn classify(&self, req_rep: ReqRep<'_>) -> Action; + } + + // For Future Whoever: making a blanket impl for any closure sounds nice, + // but it causes inference issues at the call site. Every closure would + // need to include `: ReqRep` in the arguments. + // + // An alternative is to make things like `ClassifyFn`. Slightly more + // annoying, but also more forwards-compatible. :shrug: + pub struct ClassifyFn(pub(super) F); + + impl Classify for ClassifyFn + where + F: Fn(ReqRep<'_>) -> Action + Send + Sync + 'static, + { + fn classify(&self, req_rep: ReqRep<'_>) -> Action { + (self.0)(req_rep) + } + } + + #[derive(Debug)] + pub struct ReqRep<'a>(&'a super::Req, Result); + + impl ReqRep<'_> { + pub fn method(&self) -> &http::Method { + self.0.method() + } + + pub fn uri(&self) -> &http::Uri { + self.0.uri() + } + + pub fn status(&self) -> Option { + self.1.ok() + } + + pub fn error(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.1.as_ref().err().map(|e| &**e as _) + } + + pub fn retryable(self) -> Action { + Action::Retryable + } + + pub fn success(self) -> Action { + Action::Success + } + + fn is_protocol_nack(&self) -> bool { + self.1 + .as_ref() + .err() + .map(|&e| super::is_retryable_error(e)) + .unwrap_or(false) + } + } + + #[must_use] + #[derive(Debug)] + pub enum Action { + Success, + Retryable, + } + + #[derive(Clone)] + pub(super) enum Classifier { + Never, + ProtocolNacks, + Dyn(std::sync::Arc), + } + + impl Classifier { + pub(super) fn classify( + &self, + req: &super::Req, + res: &Result, crate::Error>, + ) -> Action { + let req_rep = ReqRep(req, res.as_ref().map(|r| r.status())); + match self { + Self::Never => Action::Success, + Self::ProtocolNacks => { + if req_rep.is_protocol_nack() { + Action::Retryable + } else { + Action::Success + } + } + Self::Dyn(c) => c.classify(req_rep), + } + } + } + + impl std::fmt::Debug for Classifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Never => f.write_str("Never"), + Self::ProtocolNacks => f.write_str("ProtocolNacks"), + Self::Dyn(_) => f.write_str("Classifier"), + } + } + } +} diff --git a/rust/reqwest/src/tls.rs b/rust/reqwest/src/tls.rs new file mode 100644 index 0000000000..e14d310081 --- /dev/null +++ b/rust/reqwest/src/tls.rs @@ -0,0 +1,843 @@ +//! TLS configuration and types +//! +//! A `Client` will use transport layer security (TLS) by default to connect to +//! HTTPS destinations. +//! +//! # Backends +//! +//! reqwest supports several TLS backends, enabled with Cargo features. +//! +//! ## default-tls +//! +//! reqwest will pick a TLS backend by default. This is true when the +//! `default-tls` feature is enabled. +//! +//! While it currently uses `native-tls`, the feature set is designed to only +//! enable configuration that is shared among available backends. This allows +//! reqwest to change the default to `rustls` (or another) at some point in the +//! future. +//! +//!

This feature is enabled by default, and takes +//! precedence if any other crate enables it. This is true even if you declare +//! `features = []`. You must set `default-features = false` instead.
+//! +//! Since Cargo features are additive, other crates in your dependency tree can +//! cause the default backend to be enabled. If you wish to ensure your +//! `Client` uses a specific backend, call the appropriate builder methods +//! (such as [`use_rustls_tls()`][]). +//! +//! [`use_rustls_tls()`]: crate::ClientBuilder::use_rustls_tls() +//! +//! ## native-tls +//! +//! This backend uses the [native-tls][] crate. That will try to use the system +//! TLS on Windows and Mac, and OpenSSL on Linux targets. +//! +//! Enabling the feature explicitly allows for `native-tls`-specific +//! configuration options. +//! +//! [native-tls]: https://crates.io/crates/native-tls +//! +//! ## rustls-tls +//! +//! This backend uses the [rustls][] crate, a TLS library written in Rust. +//! +//! [rustls]: https://crates.io/crates/rustls + +#[cfg(feature = "__rustls")] +use rustls::{ + client::danger::HandshakeSignatureValid, client::danger::ServerCertVerified, + client::danger::ServerCertVerifier, crypto::WebPkiSupportedAlgorithms, + server::ParsedCertificate, DigitallySignedStruct, Error as TLSError, RootCertStore, + SignatureScheme, +}; +use rustls_pki_types::pem::PemObject; +#[cfg(feature = "__rustls")] +use rustls_pki_types::{ServerName, UnixTime}; +use std::{ + fmt, + io::{BufRead, BufReader}, +}; + +/// Represents a X509 certificate revocation list. +#[cfg(feature = "__rustls")] +pub struct CertificateRevocationList { + #[cfg(feature = "__rustls")] + inner: rustls_pki_types::CertificateRevocationListDer<'static>, +} + +/// Represents a server X509 certificate. +#[derive(Clone)] +pub struct Certificate { + #[cfg(feature = "default-tls")] + native: native_tls_crate::Certificate, + #[cfg(feature = "__rustls")] + original: Cert, +} + +#[cfg(feature = "__rustls")] +#[derive(Clone)] +enum Cert { + Der(Vec), + Pem(Vec), +} + +/// Represents a private key and X509 cert as a client certificate. +#[derive(Clone)] +pub struct Identity { + #[cfg_attr(not(any(feature = "native-tls", feature = "__rustls")), allow(unused))] + inner: ClientCert, +} + +enum ClientCert { + #[cfg(feature = "native-tls")] + Pkcs12(native_tls_crate::Identity), + #[cfg(feature = "native-tls")] + Pkcs8(native_tls_crate::Identity), + #[cfg(feature = "__rustls")] + Pem { + key: rustls_pki_types::PrivateKeyDer<'static>, + certs: Vec>, + }, +} + +impl Clone for ClientCert { + fn clone(&self) -> Self { + match self { + #[cfg(feature = "native-tls")] + Self::Pkcs8(i) => Self::Pkcs8(i.clone()), + #[cfg(feature = "native-tls")] + Self::Pkcs12(i) => Self::Pkcs12(i.clone()), + #[cfg(feature = "__rustls")] + ClientCert::Pem { key, certs } => ClientCert::Pem { + key: key.clone_key(), + certs: certs.clone(), + }, + #[cfg_attr( + any(feature = "native-tls", feature = "__rustls"), + allow(unreachable_patterns) + )] + _ => unreachable!(), + } + } +} + +impl Certificate { + /// Create a `Certificate` from a binary DER encoded certificate + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn cert() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("my_cert.der")? + /// .read_to_end(&mut buf)?; + /// let cert = reqwest::Certificate::from_der(&buf)?; + /// # drop(cert); + /// # Ok(()) + /// # } + /// ``` + pub fn from_der(der: &[u8]) -> crate::Result { + Ok(Certificate { + #[cfg(feature = "default-tls")] + native: native_tls_crate::Certificate::from_der(der).map_err(crate::error::builder)?, + #[cfg(feature = "__rustls")] + original: Cert::Der(der.to_owned()), + }) + } + + /// Create a `Certificate` from a PEM encoded certificate + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn cert() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("my_cert.pem")? + /// .read_to_end(&mut buf)?; + /// let cert = reqwest::Certificate::from_pem(&buf)?; + /// # drop(cert); + /// # Ok(()) + /// # } + /// ``` + pub fn from_pem(pem: &[u8]) -> crate::Result { + Ok(Certificate { + #[cfg(feature = "default-tls")] + native: native_tls_crate::Certificate::from_pem(pem).map_err(crate::error::builder)?, + #[cfg(feature = "__rustls")] + original: Cert::Pem(pem.to_owned()), + }) + } + + /// Create a collection of `Certificate`s from a PEM encoded certificate bundle. + /// Example byte sources may be `.crt`, `.cer` or `.pem` files. + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn cert() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("ca-bundle.crt")? + /// .read_to_end(&mut buf)?; + /// let certs = reqwest::Certificate::from_pem_bundle(&buf)?; + /// # drop(certs); + /// # Ok(()) + /// # } + /// ``` + pub fn from_pem_bundle(pem_bundle: &[u8]) -> crate::Result> { + let mut reader = BufReader::new(pem_bundle); + + Self::read_pem_certs(&mut reader)? + .iter() + .map(|cert_vec| Certificate::from_der(cert_vec)) + .collect::>>() + } + + #[cfg(feature = "default-tls")] + pub(crate) fn add_to_native_tls(self, tls: &mut native_tls_crate::TlsConnectorBuilder) { + tls.add_root_certificate(self.native); + } + + #[cfg(feature = "__rustls")] + pub(crate) fn add_to_rustls( + self, + root_cert_store: &mut rustls::RootCertStore, + ) -> crate::Result<()> { + use std::io::Cursor; + + match self.original { + Cert::Der(buf) => root_cert_store + .add(buf.into()) + .map_err(crate::error::builder)?, + Cert::Pem(buf) => { + let mut reader = Cursor::new(buf); + let certs = Self::read_pem_certs(&mut reader)?; + for c in certs { + root_cert_store + .add(c.into()) + .map_err(crate::error::builder)?; + } + } + } + Ok(()) + } + + fn read_pem_certs(reader: &mut impl BufRead) -> crate::Result>> { + rustls_pki_types::CertificateDer::pem_reader_iter(reader) + .map(|result| match result { + Ok(cert) => Ok(cert.as_ref().to_vec()), + Err(_) => Err(crate::error::builder("invalid certificate encoding")), + }) + .collect() + } +} + +impl Identity { + /// Parses a DER-formatted PKCS #12 archive, using the specified password to decrypt the key. + /// + /// The archive should contain a leaf certificate and its private key, as well any intermediate + /// certificates that allow clients to build a chain to a trusted root. + /// The chain certificates should be in order from the leaf certificate towards the root. + /// + /// PKCS #12 archives typically have the file extension `.p12` or `.pfx`, and can be created + /// with the OpenSSL `pkcs12` tool: + /// + /// ```bash + /// openssl pkcs12 -export -out identity.pfx -inkey key.pem -in cert.pem -certfile chain_certs.pem + /// ``` + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn pkcs12() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("my-ident.pfx")? + /// .read_to_end(&mut buf)?; + /// let pkcs12 = reqwest::Identity::from_pkcs12_der(&buf, "my-privkey-password")?; + /// # drop(pkcs12); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `native-tls` Cargo feature enabled. + #[cfg(feature = "native-tls")] + pub fn from_pkcs12_der(der: &[u8], password: &str) -> crate::Result { + Ok(Identity { + inner: ClientCert::Pkcs12( + native_tls_crate::Identity::from_pkcs12(der, password) + .map_err(crate::error::builder)?, + ), + }) + } + + /// Parses a chain of PEM encoded X509 certificates, with the leaf certificate first. + /// `key` is a PEM encoded PKCS #8 formatted private key for the leaf certificate. + /// + /// The certificate chain should contain any intermediate certificates that should be sent to + /// clients to allow them to build a chain to a trusted root. + /// + /// A certificate chain here means a series of PEM encoded certificates concatenated together. + /// + /// # Examples + /// + /// ``` + /// # use std::fs; + /// # fn pkcs8() -> Result<(), Box> { + /// let cert = fs::read("client.pem")?; + /// let key = fs::read("key.pem")?; + /// let pkcs8 = reqwest::Identity::from_pkcs8_pem(&cert, &key)?; + /// # drop(pkcs8); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `native-tls` Cargo feature enabled. + #[cfg(feature = "native-tls")] + pub fn from_pkcs8_pem(pem: &[u8], key: &[u8]) -> crate::Result { + Ok(Identity { + inner: ClientCert::Pkcs8( + native_tls_crate::Identity::from_pkcs8(pem, key).map_err(crate::error::builder)?, + ), + }) + } + + /// Parses PEM encoded private key and certificate. + /// + /// The input should contain a PEM encoded private key + /// and at least one PEM encoded certificate. + /// + /// Note: The private key must be in RSA, SEC1 Elliptic Curve or PKCS#8 format. + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn pem() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("my-ident.pem")? + /// .read_to_end(&mut buf)?; + /// let id = reqwest::Identity::from_pem(&buf)?; + /// # drop(id); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + pub fn from_pem(buf: &[u8]) -> crate::Result { + use rustls_pki_types::{pem::SectionKind, PrivateKeyDer}; + use std::io::Cursor; + + let (key, certs) = { + let mut pem = Cursor::new(buf); + let mut sk = Vec::::new(); + let mut certs = Vec::::new(); + + while let Some((kind, data)) = + rustls_pki_types::pem::from_buf(&mut pem).map_err(|_| { + crate::error::builder(TLSError::General(String::from( + "Invalid identity PEM file", + ))) + })? + { + match kind { + SectionKind::Certificate => certs.push(data.into()), + SectionKind::PrivateKey => sk.push(PrivateKeyDer::Pkcs8(data.into())), + SectionKind::RsaPrivateKey => sk.push(PrivateKeyDer::Pkcs1(data.into())), + SectionKind::EcPrivateKey => sk.push(PrivateKeyDer::Sec1(data.into())), + _ => { + return Err(crate::error::builder(TLSError::General(String::from( + "No valid certificate was found", + )))) + } + } + } + + if let (Some(sk), false) = (sk.pop(), certs.is_empty()) { + (sk, certs) + } else { + return Err(crate::error::builder(TLSError::General(String::from( + "private key or certificate not found", + )))); + } + }; + + Ok(Identity { + inner: ClientCert::Pem { key, certs }, + }) + } + + #[cfg(feature = "native-tls")] + pub(crate) fn add_to_native_tls( + self, + tls: &mut native_tls_crate::TlsConnectorBuilder, + ) -> crate::Result<()> { + match self.inner { + ClientCert::Pkcs12(id) | ClientCert::Pkcs8(id) => { + tls.identity(id); + Ok(()) + } + #[cfg(feature = "__rustls")] + ClientCert::Pem { .. } => Err(crate::error::builder("incompatible TLS identity type")), + } + } + + #[cfg(feature = "__rustls")] + pub(crate) fn add_to_rustls( + self, + config_builder: rustls::ConfigBuilder< + rustls::ClientConfig, + // Not sure here + rustls::client::WantsClientCert, + >, + ) -> crate::Result { + match self.inner { + ClientCert::Pem { key, certs } => config_builder + .with_client_auth_cert(certs, key) + .map_err(crate::error::builder), + #[cfg(feature = "native-tls")] + ClientCert::Pkcs12(..) | ClientCert::Pkcs8(..) => { + Err(crate::error::builder("incompatible TLS identity type")) + } + } + } +} + +#[cfg(feature = "__rustls")] +impl CertificateRevocationList { + /// Parses a PEM encoded CRL. + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn crl() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("my_crl.pem")? + /// .read_to_end(&mut buf)?; + /// let crl = reqwest::tls::CertificateRevocationList::from_pem(&buf)?; + /// # drop(crl); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + pub fn from_pem(pem: &[u8]) -> crate::Result { + Ok(CertificateRevocationList { + #[cfg(feature = "__rustls")] + inner: rustls_pki_types::CertificateRevocationListDer::from(pem.to_vec()), + }) + } + + /// Creates a collection of `CertificateRevocationList`s from a PEM encoded CRL bundle. + /// Example byte sources may be `.crl` or `.pem` files. + /// + /// # Examples + /// + /// ``` + /// # use std::fs::File; + /// # use std::io::Read; + /// # fn crls() -> Result<(), Box> { + /// let mut buf = Vec::new(); + /// File::open("crl-bundle.crl")? + /// .read_to_end(&mut buf)?; + /// let crls = reqwest::tls::CertificateRevocationList::from_pem_bundle(&buf)?; + /// # drop(crls); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Optional + /// + /// This requires the `rustls-tls(-...)` Cargo feature enabled. + #[cfg(feature = "__rustls")] + pub fn from_pem_bundle(pem_bundle: &[u8]) -> crate::Result> { + rustls_pki_types::CertificateRevocationListDer::pem_slice_iter(pem_bundle) + .map(|result| match result { + Ok(crl) => Ok(CertificateRevocationList { inner: crl }), + Err(_) => Err(crate::error::builder("invalid crl encoding")), + }) + .collect::>>() + } + + #[cfg(feature = "__rustls")] + pub(crate) fn as_rustls_crl<'a>(&self) -> rustls_pki_types::CertificateRevocationListDer<'a> { + self.inner.clone() + } +} + +impl fmt::Debug for Certificate { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Certificate").finish() + } +} + +impl fmt::Debug for Identity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Identity").finish() + } +} + +#[cfg(feature = "__rustls")] +impl fmt::Debug for CertificateRevocationList { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("CertificateRevocationList").finish() + } +} + +/// A TLS protocol version. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Version(InnerVersion); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[non_exhaustive] +enum InnerVersion { + Tls1_0, + Tls1_1, + Tls1_2, + Tls1_3, +} + +// These could perhaps be From/TryFrom implementations, but those would be +// part of the public API so let's be careful +impl Version { + /// Version 1.0 of the TLS protocol. + pub const TLS_1_0: Version = Version(InnerVersion::Tls1_0); + /// Version 1.1 of the TLS protocol. + pub const TLS_1_1: Version = Version(InnerVersion::Tls1_1); + /// Version 1.2 of the TLS protocol. + pub const TLS_1_2: Version = Version(InnerVersion::Tls1_2); + /// Version 1.3 of the TLS protocol. + pub const TLS_1_3: Version = Version(InnerVersion::Tls1_3); + + #[cfg(feature = "default-tls")] + pub(crate) fn to_native_tls(self) -> Option { + match self.0 { + InnerVersion::Tls1_0 => Some(native_tls_crate::Protocol::Tlsv10), + InnerVersion::Tls1_1 => Some(native_tls_crate::Protocol::Tlsv11), + InnerVersion::Tls1_2 => Some(native_tls_crate::Protocol::Tlsv12), + InnerVersion::Tls1_3 => None, + } + } + + #[cfg(feature = "__rustls")] + pub(crate) fn from_rustls(version: rustls::ProtocolVersion) -> Option { + match version { + rustls::ProtocolVersion::SSLv2 => None, + rustls::ProtocolVersion::SSLv3 => None, + rustls::ProtocolVersion::TLSv1_0 => Some(Self(InnerVersion::Tls1_0)), + rustls::ProtocolVersion::TLSv1_1 => Some(Self(InnerVersion::Tls1_1)), + rustls::ProtocolVersion::TLSv1_2 => Some(Self(InnerVersion::Tls1_2)), + rustls::ProtocolVersion::TLSv1_3 => Some(Self(InnerVersion::Tls1_3)), + _ => None, + } + } +} + +pub(crate) enum TlsBackend { + // This is the default and HTTP/3 feature does not use it so suppress it. + #[allow(dead_code)] + #[cfg(feature = "default-tls")] + Default, + #[cfg(feature = "native-tls")] + BuiltNativeTls(native_tls_crate::TlsConnector), + #[cfg(feature = "__rustls")] + Rustls, + #[cfg(feature = "__rustls")] + BuiltRustls(rustls::ClientConfig), + #[cfg(any(feature = "native-tls", feature = "__rustls",))] + UnknownPreconfigured, +} + +impl fmt::Debug for TlsBackend { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + #[cfg(feature = "default-tls")] + TlsBackend::Default => write!(f, "Default"), + #[cfg(feature = "native-tls")] + TlsBackend::BuiltNativeTls(_) => write!(f, "BuiltNativeTls"), + #[cfg(feature = "__rustls")] + TlsBackend::Rustls => write!(f, "Rustls"), + #[cfg(feature = "__rustls")] + TlsBackend::BuiltRustls(_) => write!(f, "BuiltRustls"), + #[cfg(any(feature = "native-tls", feature = "__rustls",))] + TlsBackend::UnknownPreconfigured => write!(f, "UnknownPreconfigured"), + } + } +} + +#[allow(clippy::derivable_impls)] +impl Default for TlsBackend { + fn default() -> TlsBackend { + #[cfg(all(feature = "default-tls", not(feature = "http3")))] + { + TlsBackend::Default + } + + #[cfg(any( + all(feature = "__rustls", not(feature = "default-tls")), + feature = "http3" + ))] + { + TlsBackend::Rustls + } + } +} + +#[cfg(feature = "__rustls")] +#[derive(Debug)] +pub(crate) struct NoVerifier; + +#[cfg(feature = "__rustls")] +impl ServerCertVerifier for NoVerifier { + fn verify_server_cert( + &self, + _end_entity: &rustls_pki_types::CertificateDer, + _intermediates: &[rustls_pki_types::CertificateDer], + _server_name: &ServerName, + _ocsp_response: &[u8], + _now: UnixTime, + ) -> Result { + Ok(ServerCertVerified::assertion()) + } + + fn verify_tls12_signature( + &self, + _message: &[u8], + _cert: &rustls_pki_types::CertificateDer, + _dss: &DigitallySignedStruct, + ) -> Result { + Ok(HandshakeSignatureValid::assertion()) + } + + fn verify_tls13_signature( + &self, + _message: &[u8], + _cert: &rustls_pki_types::CertificateDer, + _dss: &DigitallySignedStruct, + ) -> Result { + Ok(HandshakeSignatureValid::assertion()) + } + + fn supported_verify_schemes(&self) -> Vec { + vec![ + SignatureScheme::RSA_PKCS1_SHA1, + SignatureScheme::ECDSA_SHA1_Legacy, + SignatureScheme::RSA_PKCS1_SHA256, + SignatureScheme::ECDSA_NISTP256_SHA256, + SignatureScheme::RSA_PKCS1_SHA384, + SignatureScheme::ECDSA_NISTP384_SHA384, + SignatureScheme::RSA_PKCS1_SHA512, + SignatureScheme::ECDSA_NISTP521_SHA512, + SignatureScheme::RSA_PSS_SHA256, + SignatureScheme::RSA_PSS_SHA384, + SignatureScheme::RSA_PSS_SHA512, + SignatureScheme::ED25519, + SignatureScheme::ED448, + ] + } +} + +#[cfg(feature = "__rustls")] +#[derive(Debug)] +pub(crate) struct IgnoreHostname { + roots: RootCertStore, + signature_algorithms: WebPkiSupportedAlgorithms, +} + +#[cfg(feature = "__rustls")] +impl IgnoreHostname { + pub(crate) fn new( + roots: RootCertStore, + signature_algorithms: WebPkiSupportedAlgorithms, + ) -> Self { + Self { + roots, + signature_algorithms, + } + } +} + +#[cfg(feature = "__rustls")] +impl ServerCertVerifier for IgnoreHostname { + fn verify_server_cert( + &self, + end_entity: &rustls_pki_types::CertificateDer<'_>, + intermediates: &[rustls_pki_types::CertificateDer<'_>], + _server_name: &ServerName<'_>, + _ocsp_response: &[u8], + now: UnixTime, + ) -> Result { + let cert = ParsedCertificate::try_from(end_entity)?; + + rustls::client::verify_server_cert_signed_by_trust_anchor( + &cert, + &self.roots, + intermediates, + now, + self.signature_algorithms.all, + )?; + Ok(ServerCertVerified::assertion()) + } + + fn verify_tls12_signature( + &self, + message: &[u8], + cert: &rustls_pki_types::CertificateDer<'_>, + dss: &DigitallySignedStruct, + ) -> Result { + rustls::crypto::verify_tls12_signature(message, cert, dss, &self.signature_algorithms) + } + + fn verify_tls13_signature( + &self, + message: &[u8], + cert: &rustls_pki_types::CertificateDer<'_>, + dss: &DigitallySignedStruct, + ) -> Result { + rustls::crypto::verify_tls13_signature(message, cert, dss, &self.signature_algorithms) + } + + fn supported_verify_schemes(&self) -> Vec { + self.signature_algorithms.supported_schemes() + } +} + +/// Hyper extension carrying extra TLS layer information. +/// Made available to clients on responses when `tls_info` is set. +#[derive(Clone)] +pub struct TlsInfo { + pub(crate) peer_certificate: Option>, +} + +impl TlsInfo { + /// Get the DER encoded leaf certificate of the peer. + pub fn peer_certificate(&self) -> Option<&[u8]> { + self.peer_certificate.as_ref().map(|der| &der[..]) + } +} + +impl std::fmt::Debug for TlsInfo { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("TlsInfo").finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(feature = "default-tls")] + #[test] + fn certificate_from_der_invalid() { + Certificate::from_der(b"not der").unwrap_err(); + } + + #[cfg(feature = "default-tls")] + #[test] + fn certificate_from_pem_invalid() { + Certificate::from_pem(b"not pem").unwrap_err(); + } + + #[cfg(feature = "native-tls")] + #[test] + fn identity_from_pkcs12_der_invalid() { + Identity::from_pkcs12_der(b"not der", "nope").unwrap_err(); + } + + #[cfg(feature = "native-tls")] + #[test] + fn identity_from_pkcs8_pem_invalid() { + Identity::from_pkcs8_pem(b"not pem", b"not key").unwrap_err(); + } + + #[cfg(feature = "__rustls")] + #[test] + fn identity_from_pem_invalid() { + Identity::from_pem(b"not pem").unwrap_err(); + } + + #[cfg(feature = "__rustls")] + #[test] + fn identity_from_pem_pkcs1_key() { + let pem = b"-----BEGIN CERTIFICATE-----\n\ + -----END CERTIFICATE-----\n\ + -----BEGIN RSA PRIVATE KEY-----\n\ + -----END RSA PRIVATE KEY-----\n"; + + Identity::from_pem(pem).unwrap(); + } + + #[test] + fn certificates_from_pem_bundle() { + const PEM_BUNDLE: &[u8] = b" + -----BEGIN CERTIFICATE----- + MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5 + MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g + Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG + A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg + Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl + ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j + QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr + ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr + BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM + YyRIHN8wfdVoOw== + -----END CERTIFICATE----- + + -----BEGIN CERTIFICATE----- + MIIB8jCCAXigAwIBAgITBmyf18G7EEwpQ+Vxe3ssyBrBDjAKBggqhkjOPQQDAzA5 + MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g + Um9vdCBDQSA0MB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG + A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg + Q0EgNDB2MBAGByqGSM49AgEGBSuBBAAiA2IABNKrijdPo1MN/sGKe0uoe0ZLY7Bi + 9i0b2whxIdIA6GO9mif78DluXeo9pcmBqqNbIJhFXRbb/egQbeOc4OO9X4Ri83Bk + M6DLJC9wuoihKqB1+IGuYgbEgds5bimwHvouXKNCMEAwDwYDVR0TAQH/BAUwAwEB + /zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0OBBYEFNPsxzplbszh2naaVvuc84ZtV+WB + MAoGCCqGSM49BAMDA2gAMGUCMDqLIfG9fhGt0O9Yli/W651+kI0rz2ZVwyzjKKlw + CkcO8DdZEv8tmZQoTipPNU0zWgIxAOp1AE47xDqUEpHJWEadIRNyp4iciuRMStuW + 1KyLa2tJElMzrdfkviT8tQp21KW8EA== + -----END CERTIFICATE----- + "; + + assert!(Certificate::from_pem_bundle(PEM_BUNDLE).is_ok()) + } + + #[cfg(feature = "__rustls")] + #[test] + fn crl_from_pem() { + let pem = b"-----BEGIN X509 CRL-----\n-----END X509 CRL-----\n"; + + CertificateRevocationList::from_pem(pem).unwrap(); + } + + #[cfg(feature = "__rustls")] + #[test] + fn crl_from_pem_bundle() { + let pem_bundle = std::fs::read("tests/support/crl.pem").unwrap(); + + let result = CertificateRevocationList::from_pem_bundle(&pem_bundle); + + assert!(result.is_ok()); + let result = result.unwrap(); + assert_eq!(result.len(), 1); + } +} diff --git a/rust/reqwest/src/util.rs b/rust/reqwest/src/util.rs new file mode 100644 index 0000000000..02873f7d76 --- /dev/null +++ b/rust/reqwest/src/util.rs @@ -0,0 +1,130 @@ +use crate::header::{Entry, HeaderMap, HeaderValue, OccupiedEntry}; +use std::fmt; + +pub fn basic_auth(username: U, password: Option

) -> HeaderValue +where + U: std::fmt::Display, + P: std::fmt::Display, +{ + use base64::prelude::BASE64_STANDARD; + use base64::write::EncoderWriter; + use std::io::Write; + + let mut buf = b"Basic ".to_vec(); + { + let mut encoder = EncoderWriter::new(&mut buf, &BASE64_STANDARD); + let _ = write!(encoder, "{username}:"); + if let Some(password) = password { + let _ = write!(encoder, "{password}"); + } + } + let mut header = HeaderValue::from_maybe_shared(bytes::Bytes::from(buf)) + .expect("base64 is always valid HeaderValue"); + header.set_sensitive(true); + header +} + +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn fast_random() -> u64 { + use std::cell::Cell; + use std::collections::hash_map::RandomState; + use std::hash::{BuildHasher, Hasher}; + + thread_local! { + static KEY: RandomState = RandomState::new(); + static COUNTER: Cell = Cell::new(0); + } + + KEY.with(|key| { + COUNTER.with(|ctr| { + let n = ctr.get().wrapping_add(1); + ctr.set(n); + + let mut h = key.build_hasher(); + h.write_u64(n); + h.finish() + }) + }) +} + +pub(crate) fn replace_headers(dst: &mut HeaderMap, src: HeaderMap) { + // IntoIter of HeaderMap yields (Option, HeaderValue). + // The first time a name is yielded, it will be Some(name), and if + // there are more values with the same name, the next yield will be + // None. + + let mut prev_entry: Option> = None; + for (key, value) in src { + match key { + Some(key) => match dst.entry(key) { + Entry::Occupied(mut e) => { + e.insert(value); + prev_entry = Some(e); + } + Entry::Vacant(e) => { + let e = e.insert_entry(value); + prev_entry = Some(e); + } + }, + None => match prev_entry { + Some(ref mut entry) => { + entry.append(value); + } + None => unreachable!("HeaderMap::into_iter yielded None first"), + }, + } + } +} + +#[cfg(feature = "cookies")] +#[cfg(not(target_arch = "wasm32"))] +pub(crate) fn add_cookie_header( + headers: &mut HeaderMap, + cookie_store: &dyn crate::cookie::CookieStore, + url: &url::Url, +) { + if let Some(header) = cookie_store.cookies(url) { + headers.insert(crate::header::COOKIE, header); + } +} + +pub(crate) struct Escape<'a>(&'a [u8]); + +#[cfg(not(target_arch = "wasm32"))] +impl<'a> Escape<'a> { + pub(crate) fn new(bytes: &'a [u8]) -> Self { + Escape(bytes) + } +} + +impl fmt::Debug for Escape<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "b\"{}\"", self)?; + Ok(()) + } +} + +impl fmt::Display for Escape<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for &c in self.0 { + // https://doc.rust-lang.org/reference.html#byte-escapes + if c == b'\n' { + write!(f, "\\n")?; + } else if c == b'\r' { + write!(f, "\\r")?; + } else if c == b'\t' { + write!(f, "\\t")?; + } else if c == b'\\' || c == b'"' { + write!(f, "\\{}", c as char)?; + } else if c == b'\0' { + write!(f, "\\0")?; + // ASCII printable + } else if c >= 0x20 && c < 0x7f { + write!(f, "{}", c as char)?; + } else { + write!(f, "\\x{c:02x}")?; + } + } + Ok(()) + } +} diff --git a/rust/reqwest/src/wasm/body.rs b/rust/reqwest/src/wasm/body.rs new file mode 100644 index 0000000000..8d40c503f5 --- /dev/null +++ b/rust/reqwest/src/wasm/body.rs @@ -0,0 +1,320 @@ +#[cfg(feature = "multipart")] +use super::multipart::Form; +/// dox +use bytes::Bytes; +use js_sys::Uint8Array; +use std::{borrow::Cow, fmt}; +use wasm_bindgen::JsValue; + +/// The body of a `Request`. +/// +/// In most cases, this is not needed directly, as the +/// [`RequestBuilder.body`][builder] method uses `Into`, which allows +/// passing many things (like a string or vector of bytes). +/// +/// [builder]: ./struct.RequestBuilder.html#method.body +pub struct Body { + inner: Inner, +} + +enum Inner { + Single(Single), + /// MultipartForm holds a multipart/form-data body. + #[cfg(feature = "multipart")] + MultipartForm(Form), +} + +#[derive(Clone)] +pub(crate) enum Single { + Bytes(Bytes), + Text(Cow<'static, str>), +} + +impl Single { + fn as_bytes(&self) -> &[u8] { + match self { + Single::Bytes(bytes) => bytes.as_ref(), + Single::Text(text) => text.as_bytes(), + } + } + + pub(crate) fn to_js_value(&self) -> JsValue { + match self { + Single::Bytes(bytes) => { + let body_bytes: &[u8] = bytes.as_ref(); + let body_uint8_array: Uint8Array = body_bytes.into(); + let js_value: &JsValue = body_uint8_array.as_ref(); + js_value.to_owned() + } + Single::Text(text) => JsValue::from_str(text), + } + } + + fn is_empty(&self) -> bool { + match self { + Single::Bytes(bytes) => bytes.is_empty(), + Single::Text(text) => text.is_empty(), + } + } +} + +impl Body { + /// Returns a reference to the internal data of the `Body`. + /// + /// `None` is returned, if the underlying data is a multipart form. + #[inline] + pub fn as_bytes(&self) -> Option<&[u8]> { + match &self.inner { + Inner::Single(single) => Some(single.as_bytes()), + #[cfg(feature = "multipart")] + Inner::MultipartForm(_) => None, + } + } + + pub(crate) fn to_js_value(&self) -> crate::Result { + match &self.inner { + Inner::Single(single) => Ok(single.to_js_value()), + #[cfg(feature = "multipart")] + Inner::MultipartForm(form) => { + let form_data = form.to_form_data()?; + let js_value: &JsValue = form_data.as_ref(); + Ok(js_value.to_owned()) + } + } + } + + #[cfg(feature = "multipart")] + pub(crate) fn as_single(&self) -> Option<&Single> { + match &self.inner { + Inner::Single(single) => Some(single), + Inner::MultipartForm(_) => None, + } + } + + #[inline] + #[cfg(feature = "multipart")] + pub(crate) fn from_form(f: Form) -> Body { + Self { + inner: Inner::MultipartForm(f), + } + } + + /// into_part turns a regular body into the body of a multipart/form-data part. + #[cfg(feature = "multipart")] + pub(crate) fn into_part(self) -> Body { + match self.inner { + Inner::Single(single) => Self { + inner: Inner::Single(single), + }, + Inner::MultipartForm(form) => Self { + inner: Inner::MultipartForm(form), + }, + } + } + + pub(crate) fn is_empty(&self) -> bool { + match &self.inner { + Inner::Single(single) => single.is_empty(), + #[cfg(feature = "multipart")] + Inner::MultipartForm(form) => form.is_empty(), + } + } + + pub(crate) fn try_clone(&self) -> Option { + match &self.inner { + Inner::Single(single) => Some(Self { + inner: Inner::Single(single.clone()), + }), + #[cfg(feature = "multipart")] + Inner::MultipartForm(_) => None, + } + } +} + +impl From for Body { + #[inline] + fn from(bytes: Bytes) -> Body { + Body { + inner: Inner::Single(Single::Bytes(bytes)), + } + } +} + +impl From> for Body { + #[inline] + fn from(vec: Vec) -> Body { + Body { + inner: Inner::Single(Single::Bytes(vec.into())), + } + } +} + +impl From<&'static [u8]> for Body { + #[inline] + fn from(s: &'static [u8]) -> Body { + Body { + inner: Inner::Single(Single::Bytes(Bytes::from_static(s))), + } + } +} + +impl From for Body { + #[inline] + fn from(s: String) -> Body { + Body { + inner: Inner::Single(Single::Text(s.into())), + } + } +} + +impl From<&'static str> for Body { + #[inline] + fn from(s: &'static str) -> Body { + Body { + inner: Inner::Single(Single::Text(s.into())), + } + } +} + +impl fmt::Debug for Body { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Body").finish() + } +} + +impl Default for Body { + fn default() -> Body { + Body { + inner: Inner::Single(Single::Bytes(Bytes::new())), + } + } +} + +// Can use new methods in web-sys when requiring v0.2.93. +// > `init.method(m)` to `init.set_method(m)` +// For now, ignore their deprecation. +#[allow(deprecated)] +#[cfg(test)] +mod tests { + use crate::Body; + use js_sys::Uint8Array; + use wasm_bindgen::prelude::*; + use wasm_bindgen_test::*; + + wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + + #[wasm_bindgen] + extern "C" { + // Use `js_namespace` here to bind `console.log(..)` instead of just + // `log(..)` + #[wasm_bindgen(js_namespace = console)] + fn log(s: String); + } + + #[wasm_bindgen_test] + async fn test_body() { + let body = Body::from("TEST"); + assert_eq!([84, 69, 83, 84], body.as_bytes().unwrap()); + } + + #[wasm_bindgen_test] + async fn test_body_js_static_str() { + let body_value = "TEST"; + let body = Body::from(body_value); + + let mut init = web_sys::RequestInit::new(); + init.method("POST"); + init.body(Some( + body.to_js_value() + .expect("could not convert body to JsValue") + .as_ref(), + )); + + let js_req = web_sys::Request::new_with_str_and_init("", &init) + .expect("could not create JS request"); + let text_promise = js_req.text().expect("could not get text promise"); + let text = crate::wasm::promise::(text_promise) + .await + .expect("could not get request body as text"); + + assert_eq!(text.as_string().expect("text is not a string"), body_value); + } + #[wasm_bindgen_test] + async fn test_body_js_string() { + let body_value = "TEST".to_string(); + let body = Body::from(body_value.clone()); + + let mut init = web_sys::RequestInit::new(); + init.method("POST"); + init.body(Some( + body.to_js_value() + .expect("could not convert body to JsValue") + .as_ref(), + )); + + let js_req = web_sys::Request::new_with_str_and_init("", &init) + .expect("could not create JS request"); + let text_promise = js_req.text().expect("could not get text promise"); + let text = crate::wasm::promise::(text_promise) + .await + .expect("could not get request body as text"); + + assert_eq!(text.as_string().expect("text is not a string"), body_value); + } + + #[wasm_bindgen_test] + async fn test_body_js_static_u8_slice() { + let body_value: &'static [u8] = b"\x00\x42"; + let body = Body::from(body_value); + + let mut init = web_sys::RequestInit::new(); + init.method("POST"); + init.body(Some( + body.to_js_value() + .expect("could not convert body to JsValue") + .as_ref(), + )); + + let js_req = web_sys::Request::new_with_str_and_init("", &init) + .expect("could not create JS request"); + + let array_buffer_promise = js_req + .array_buffer() + .expect("could not get array_buffer promise"); + let array_buffer = crate::wasm::promise::(array_buffer_promise) + .await + .expect("could not get request body as array buffer"); + + let v = Uint8Array::new(&array_buffer).to_vec(); + + assert_eq!(v, body_value); + } + + #[wasm_bindgen_test] + async fn test_body_js_vec_u8() { + let body_value = vec![0u8, 42]; + let body = Body::from(body_value.clone()); + + let mut init = web_sys::RequestInit::new(); + init.method("POST"); + init.body(Some( + body.to_js_value() + .expect("could not convert body to JsValue") + .as_ref(), + )); + + let js_req = web_sys::Request::new_with_str_and_init("", &init) + .expect("could not create JS request"); + + let array_buffer_promise = js_req + .array_buffer() + .expect("could not get array_buffer promise"); + let array_buffer = crate::wasm::promise::(array_buffer_promise) + .await + .expect("could not get request body as array buffer"); + + let v = Uint8Array::new(&array_buffer).to_vec(); + + assert_eq!(v, body_value); + } +} diff --git a/rust/reqwest/src/wasm/client.rs b/rust/reqwest/src/wasm/client.rs new file mode 100644 index 0000000000..c594b9868d --- /dev/null +++ b/rust/reqwest/src/wasm/client.rs @@ -0,0 +1,473 @@ +use http::header::USER_AGENT; +use http::{HeaderMap, HeaderValue, Method}; +use js_sys::{Promise, JSON}; +use std::convert::TryInto; +use std::{fmt, future::Future, sync::Arc}; +use url::Url; +use wasm_bindgen::prelude::{wasm_bindgen, UnwrapThrowExt as _}; + +use super::{AbortGuard, Request, RequestBuilder, Response}; +use crate::IntoUrl; + +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_name = fetch)] + fn fetch_with_request(input: &web_sys::Request) -> Promise; +} + +fn js_fetch(req: &web_sys::Request) -> Promise { + use wasm_bindgen::{JsCast, JsValue}; + let global = js_sys::global(); + + if let Ok(true) = js_sys::Reflect::has(&global, &JsValue::from_str("ServiceWorkerGlobalScope")) + { + global + .unchecked_into::() + .fetch_with_request(req) + } else { + // browser + fetch_with_request(req) + } +} + +/// An HTTP Client for WebAssembly. +/// +/// Uses the browser's Fetch API to make requests. The `Client` holds +/// configuration that applies to all requests. To configure a `Client`, +/// use `Client::builder()`. +#[derive(Clone)] +pub struct Client { + config: Arc, +} + +/// A `ClientBuilder` can be used to create a `Client` with custom configuration. +pub struct ClientBuilder { + config: Config, +} + +impl Client { + /// Constructs a new `Client`. + pub fn new() -> Self { + Client::builder().build().unwrap_throw() + } + + /// Creates a `ClientBuilder` to configure a `Client`. + /// + /// This is the same as `ClientBuilder::new()`. + pub fn builder() -> ClientBuilder { + ClientBuilder::new() + } + + /// Convenience method to make a `GET` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn get(&self, url: U) -> RequestBuilder { + self.request(Method::GET, url) + } + + /// Convenience method to make a `POST` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn post(&self, url: U) -> RequestBuilder { + self.request(Method::POST, url) + } + + /// Convenience method to make a `PUT` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn put(&self, url: U) -> RequestBuilder { + self.request(Method::PUT, url) + } + + /// Convenience method to make a `PATCH` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn patch(&self, url: U) -> RequestBuilder { + self.request(Method::PATCH, url) + } + + /// Convenience method to make a `DELETE` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn delete(&self, url: U) -> RequestBuilder { + self.request(Method::DELETE, url) + } + + /// Convenience method to make a `HEAD` request to a URL. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn head(&self, url: U) -> RequestBuilder { + self.request(Method::HEAD, url) + } + + /// Start building a `Request` with the `Method` and `Url`. + /// + /// Returns a `RequestBuilder`, which will allow setting headers and + /// request body before sending. + /// + /// # Errors + /// + /// This method fails whenever supplied `Url` cannot be parsed. + pub fn request(&self, method: Method, url: U) -> RequestBuilder { + let req = url.into_url().map(move |url| Request::new(method, url)); + RequestBuilder::new(self.clone(), req) + } + + /// Executes a `Request`. + /// + /// A `Request` can be built manually with `Request::new()` or obtained + /// from a RequestBuilder with `RequestBuilder::build()`. + /// + /// You should prefer to use the `RequestBuilder` and + /// `RequestBuilder::send()`. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request, + /// redirect loop was detected or redirect limit was exhausted. + pub fn execute( + &self, + request: Request, + ) -> impl Future> { + self.execute_request(request) + } + + // merge request headers with Client default_headers, prior to external http fetch + fn merge_headers(&self, req: &mut Request) { + use http::header::Entry; + let headers: &mut HeaderMap = req.headers_mut(); + // insert default headers in the request headers + // without overwriting already appended headers. + for (key, value) in self.config.headers.iter() { + if let Entry::Vacant(entry) = headers.entry(key) { + entry.insert(value.clone()); + } + } + } + + pub(super) fn execute_request( + &self, + mut req: Request, + ) -> impl Future> { + self.merge_headers(&mut req); + fetch(req) + } +} + +impl Default for Client { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Debug for Client { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("Client"); + self.config.fmt_fields(&mut builder); + builder.finish() + } +} + +impl fmt::Debug for ClientBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("ClientBuilder"); + self.config.fmt_fields(&mut builder); + builder.finish() + } +} + +// Can use new methods in web-sys when requiring v0.2.93. +// > `init.method(m)` to `init.set_method(m)` +// For now, ignore their deprecation. +#[allow(deprecated)] +async fn fetch(req: Request) -> crate::Result { + // Build the js Request + let mut init = web_sys::RequestInit::new(); + init.method(req.method().as_str()); + + // convert HeaderMap to Headers + let js_headers = web_sys::Headers::new() + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?; + + for (name, value) in req.headers() { + js_headers + .append( + name.as_str(), + value.to_str().map_err(crate::error::builder)?, + ) + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?; + } + init.headers(&js_headers.into()); + + // When req.cors is true, do nothing because the default mode is 'cors' + if !req.cors { + init.mode(web_sys::RequestMode::NoCors); + } + + if let Some(creds) = req.credentials { + init.credentials(creds); + } + + if let Some(cache) = req.cache { + init.set_cache(cache); + } + + if let Some(body) = req.body() { + if !body.is_empty() { + init.body(Some(body.to_js_value()?.as_ref())); + } + } + + let mut abort = AbortGuard::new()?; + if let Some(timeout) = req.timeout() { + abort.timeout(*timeout); + } + init.signal(Some(&abort.signal())); + + let js_req = web_sys::Request::new_with_str_and_init(req.url().as_str(), &init) + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?; + + // Await the fetch() promise + let p = js_fetch(&js_req); + let js_resp = super::promise::(p) + .await + .map_err(|error| { + if error.to_string() == "JsValue(\"reqwest::errors::TimedOut\")" { + crate::error::TimedOut.into() + } else { + error + } + }) + .map_err(crate::error::request)?; + + // Convert from the js Response + let mut resp = http::Response::builder().status(js_resp.status()); + + let url = Url::parse(&js_resp.url()).expect_throw("url parse"); + + let js_headers = js_resp.headers(); + let js_iter = js_sys::try_iter(&js_headers) + .expect_throw("headers try_iter") + .expect_throw("headers have an iterator"); + + for item in js_iter { + let item = item.expect_throw("headers iterator doesn't throw"); + let serialized_headers: String = JSON::stringify(&item) + .expect_throw("serialized headers") + .into(); + let [name, value]: [String; 2] = serde_json::from_str(&serialized_headers) + .expect_throw("deserializable serialized headers"); + resp = resp.header(&name, &value); + } + + resp.body(js_resp) + .map(|resp| Response::new(resp, url, abort)) + .map_err(crate::error::request) +} + +// ===== impl ClientBuilder ===== + +impl ClientBuilder { + /// Constructs a new `ClientBuilder`. + /// + /// This is the same as `Client::builder()`. + pub fn new() -> Self { + ClientBuilder { + config: Config::default(), + } + } + + /// Returns a 'Client' that uses this ClientBuilder configuration + pub fn build(mut self) -> Result { + if let Some(err) = self.config.error { + return Err(err); + } + + let config = std::mem::take(&mut self.config); + Ok(Client { + config: Arc::new(config), + }) + } + + /// Sets the `User-Agent` header to be used by this client. + pub fn user_agent(mut self, value: V) -> ClientBuilder + where + V: TryInto, + V::Error: Into, + { + match value.try_into() { + Ok(value) => { + self.config.headers.insert(USER_AGENT, value); + } + Err(e) => { + self.config.error = Some(crate::error::builder(e.into())); + } + } + self + } + + /// Sets the default headers for every request + pub fn default_headers(mut self, headers: HeaderMap) -> ClientBuilder { + for (key, value) in headers.iter() { + self.config.headers.insert(key, value.clone()); + } + self + } +} + +impl Default for ClientBuilder { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +struct Config { + headers: HeaderMap, + error: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + headers: HeaderMap::new(), + error: None, + } + } +} + +impl Config { + fn fmt_fields(&self, f: &mut fmt::DebugStruct<'_, '_>) { + f.field("default_headers", &self.headers); + } +} + +#[cfg(test)] +mod tests { + use wasm_bindgen_test::*; + + wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + + #[wasm_bindgen_test] + async fn default_headers() { + use crate::header::{HeaderMap, HeaderValue, CONTENT_TYPE}; + + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + headers.insert("x-custom", HeaderValue::from_static("flibbertigibbet")); + let client = crate::Client::builder() + .default_headers(headers) + .build() + .expect("client"); + let mut req = client + .get("https://www.example.com") + .build() + .expect("request"); + // merge headers as if client were about to issue fetch + client.merge_headers(&mut req); + + let test_headers = req.headers(); + assert!(test_headers.get(CONTENT_TYPE).is_some(), "content-type"); + assert!(test_headers.get("x-custom").is_some(), "custom header"); + assert!(test_headers.get("accept").is_none(), "no accept header"); + } + + #[wasm_bindgen_test] + async fn default_headers_clone() { + use crate::header::{HeaderMap, HeaderValue, CONTENT_TYPE}; + + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + headers.insert("x-custom", HeaderValue::from_static("flibbertigibbet")); + let client = crate::Client::builder() + .default_headers(headers) + .build() + .expect("client"); + + let mut req = client + .get("https://www.example.com") + .header(CONTENT_TYPE, "text/plain") + .build() + .expect("request"); + client.merge_headers(&mut req); + let headers1 = req.headers(); + + // confirm that request headers override defaults + assert_eq!( + headers1.get(CONTENT_TYPE).unwrap(), + "text/plain", + "request headers override defaults" + ); + + // confirm that request headers don't change client defaults + let mut req2 = client + .get("https://www.example.com/x") + .build() + .expect("req 2"); + client.merge_headers(&mut req2); + let headers2 = req2.headers(); + assert_eq!( + headers2.get(CONTENT_TYPE).unwrap(), + "application/json", + "request headers don't change client defaults" + ); + } + + #[wasm_bindgen_test] + fn user_agent_header() { + use crate::header::USER_AGENT; + + let client = crate::Client::builder() + .user_agent("FooBar/1.2.3") + .build() + .expect("client"); + + let mut req = client + .get("https://www.example.com") + .build() + .expect("request"); + + // Merge the client headers with the request's one. + client.merge_headers(&mut req); + let headers1 = req.headers(); + + // Confirm that we have the `User-Agent` header set + assert_eq!( + headers1.get(USER_AGENT).unwrap(), + "FooBar/1.2.3", + "The user-agent header was not set: {req:#?}" + ); + + // Now we try to overwrite the `User-Agent` value + + let mut req2 = client + .get("https://www.example.com") + .header(USER_AGENT, "Another-User-Agent/42") + .build() + .expect("request 2"); + + client.merge_headers(&mut req2); + let headers2 = req2.headers(); + + assert_eq!( + headers2.get(USER_AGENT).expect("headers2 user agent"), + "Another-User-Agent/42", + "Was not able to overwrite the User-Agent value on the request-builder" + ); + } +} diff --git a/rust/reqwest/src/wasm/mod.rs b/rust/reqwest/src/wasm/mod.rs new file mode 100644 index 0000000000..ea4d1ced5f --- /dev/null +++ b/rust/reqwest/src/wasm/mod.rs @@ -0,0 +1,85 @@ +use std::convert::TryInto; +use std::time::Duration; + +use js_sys::Function; +use wasm_bindgen::prelude::{wasm_bindgen, Closure}; +use wasm_bindgen::{JsCast, JsValue}; +use web_sys::{AbortController, AbortSignal}; + +mod body; +mod client; +/// TODO +#[cfg(feature = "multipart")] +pub mod multipart; +mod request; +mod response; + +pub use self::body::Body; +pub use self::client::{Client, ClientBuilder}; +pub use self::request::{Request, RequestBuilder}; +pub use self::response::Response; + +#[wasm_bindgen] +extern "C" { + #[wasm_bindgen(js_name = "setTimeout")] + fn set_timeout(handler: &Function, timeout: i32) -> JsValue; + + #[wasm_bindgen(js_name = "clearTimeout")] + fn clear_timeout(handle: JsValue) -> JsValue; +} + +async fn promise(promise: js_sys::Promise) -> Result +where + T: JsCast, +{ + use wasm_bindgen_futures::JsFuture; + + let js_val = JsFuture::from(promise).await.map_err(crate::error::wasm)?; + + js_val + .dyn_into::() + .map_err(|_js_val| "promise resolved to unexpected type".into()) +} + +/// A guard that cancels a fetch request when dropped. +struct AbortGuard { + ctrl: AbortController, + timeout: Option<(JsValue, Closure)>, +} + +impl AbortGuard { + fn new() -> crate::Result { + Ok(AbortGuard { + ctrl: AbortController::new() + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?, + timeout: None, + }) + } + + fn signal(&self) -> AbortSignal { + self.ctrl.signal() + } + + fn timeout(&mut self, timeout: Duration) { + let ctrl = self.ctrl.clone(); + let abort = + Closure::once(move || ctrl.abort_with_reason(&"reqwest::errors::TimedOut".into())); + let timeout = set_timeout( + abort.as_ref().unchecked_ref::(), + timeout.as_millis().try_into().expect("timeout"), + ); + if let Some((id, _)) = self.timeout.replace((timeout, abort)) { + clear_timeout(id); + } + } +} + +impl Drop for AbortGuard { + fn drop(&mut self) { + self.ctrl.abort(); + if let Some((id, _)) = self.timeout.take() { + clear_timeout(id); + } + } +} diff --git a/rust/reqwest/src/wasm/multipart.rs b/rust/reqwest/src/wasm/multipart.rs new file mode 100644 index 0000000000..9b5b4c951d --- /dev/null +++ b/rust/reqwest/src/wasm/multipart.rs @@ -0,0 +1,419 @@ +//! multipart/form-data +use std::borrow::Cow; +use std::fmt; + +use http::HeaderMap; +use mime_guess::Mime; +use web_sys::FormData; + +use super::Body; + +/// An async multipart/form-data request. +pub struct Form { + inner: FormParts, +} + +impl Form { + pub(crate) fn is_empty(&self) -> bool { + self.inner.fields.is_empty() + } +} + +/// A field in a multipart form. +pub struct Part { + meta: PartMetadata, + value: Body, +} + +pub(crate) struct FormParts

{ + pub(crate) fields: Vec<(Cow<'static, str>, P)>, +} + +pub(crate) struct PartMetadata { + mime: Option, + file_name: Option>, + pub(crate) headers: HeaderMap, +} + +pub(crate) trait PartProps { + fn metadata(&self) -> &PartMetadata; +} + +// ===== impl Form ===== + +impl Default for Form { + fn default() -> Self { + Self::new() + } +} + +impl Form { + /// Creates a new async Form without any content. + pub fn new() -> Form { + Form { + inner: FormParts::new(), + } + } + + /// Add a data field with supplied name and value. + /// + /// # Examples + /// + /// ``` + /// let form = reqwest::multipart::Form::new() + /// .text("username", "seanmonstar") + /// .text("password", "secret"); + /// ``` + pub fn text(self, name: T, value: U) -> Form + where + T: Into>, + U: Into>, + { + self.part(name, Part::text(value)) + } + + /// Adds a customized Part. + pub fn part(self, name: T, part: Part) -> Form + where + T: Into>, + { + self.with_inner(move |inner| inner.part(name, part)) + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(FormParts) -> FormParts, + { + Form { + inner: func(self.inner), + } + } + + pub(crate) fn to_form_data(&self) -> crate::Result { + let form = FormData::new() + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?; + + for (name, part) in self.inner.fields.iter() { + part.append_to_form(name, &form) + .map_err(crate::error::wasm) + .map_err(crate::error::builder)?; + } + Ok(form) + } +} + +impl fmt::Debug for Form { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.inner.fmt_fields("Form", f) + } +} + +// ===== impl Part ===== + +impl Part { + /// Makes a text parameter. + pub fn text(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(string) => Body::from(string), + }; + Part::new(body) + } + + /// Makes a new parameter from arbitrary bytes. + pub fn bytes(value: T) -> Part + where + T: Into>, + { + let body = match value.into() { + Cow::Borrowed(slice) => Body::from(slice), + Cow::Owned(vec) => Body::from(vec), + }; + Part::new(body) + } + + /// Makes a new parameter from an arbitrary stream. + pub fn stream>(value: T) -> Part { + Part::new(value.into()) + } + + fn new(value: Body) -> Part { + Part { + meta: PartMetadata::new(), + value: value.into_part(), + } + } + + /// Tries to set the mime of this part. + pub fn mime_str(self, mime: &str) -> crate::Result { + Ok(self.mime(mime.parse().map_err(crate::error::builder)?)) + } + + // Re-export when mime 0.4 is available, with split MediaType/MediaRange. + fn mime(self, mime: Mime) -> Part { + self.with_inner(move |inner| inner.mime(mime)) + } + + /// Sets the filename, builder style. + pub fn file_name(self, filename: T) -> Part + where + T: Into>, + { + self.with_inner(move |inner| inner.file_name(filename)) + } + + /// Sets custom headers for the part. + pub fn headers(self, headers: HeaderMap) -> Part { + self.with_inner(move |inner| inner.headers(headers)) + } + + fn with_inner(self, func: F) -> Self + where + F: FnOnce(PartMetadata) -> PartMetadata, + { + Part { + meta: func(self.meta), + value: self.value, + } + } + + fn append_to_form( + &self, + name: &str, + form: &web_sys::FormData, + ) -> Result<(), wasm_bindgen::JsValue> { + let single = self + .value + .as_single() + .expect("A part's body can't be multipart itself"); + + let mut mime_type = self.metadata().mime.as_ref(); + + // The JS fetch API doesn't support file names and mime types for strings. So we do our best + // effort to use `append_with_str` and fallback to `append_with_blob_*` if that's not + // possible. + if let super::body::Single::Text(text) = single { + if mime_type.is_none() || mime_type == Some(&mime_guess::mime::TEXT_PLAIN) { + if self.metadata().file_name.is_none() { + return form.append_with_str(name, text); + } + } else { + mime_type = Some(&mime_guess::mime::TEXT_PLAIN); + } + } + + let blob = self.blob(mime_type)?; + + if let Some(file_name) = &self.metadata().file_name { + form.append_with_blob_and_filename(name, &blob, file_name) + } else { + form.append_with_blob(name, &blob) + } + } + + fn blob(&self, mime_type: Option<&Mime>) -> crate::Result { + use web_sys::Blob; + use web_sys::BlobPropertyBag; + let mut properties = BlobPropertyBag::new(); + if let Some(mime) = mime_type { + properties.type_(mime.as_ref()); + } + + let js_value = self + .value + .as_single() + .expect("A part's body can't be set to a multipart body") + .to_js_value(); + + let body_array = js_sys::Array::new(); + body_array.push(&js_value); + + Blob::new_with_u8_array_sequence_and_options(body_array.as_ref(), &properties) + .map_err(crate::error::wasm) + .map_err(crate::error::builder) + } +} + +impl fmt::Debug for Part { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut dbg = f.debug_struct("Part"); + dbg.field("value", &self.value); + self.meta.fmt_fields(&mut dbg); + dbg.finish() + } +} + +impl PartProps for Part { + fn metadata(&self) -> &PartMetadata { + &self.meta + } +} + +// ===== impl FormParts ===== + +impl FormParts

{ + pub(crate) fn new() -> Self { + FormParts { fields: Vec::new() } + } + + /// Adds a customized Part. + pub(crate) fn part(mut self, name: T, part: P) -> Self + where + T: Into>, + { + self.fields.push((name.into(), part)); + self + } +} + +impl FormParts

{ + pub(crate) fn fmt_fields(&self, ty_name: &'static str, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct(ty_name) + .field("parts", &self.fields) + .finish() + } +} + +// ===== impl PartMetadata ===== + +impl PartMetadata { + pub(crate) fn new() -> Self { + PartMetadata { + mime: None, + file_name: None, + headers: HeaderMap::default(), + } + } + + pub(crate) fn mime(mut self, mime: Mime) -> Self { + self.mime = Some(mime); + self + } + + pub(crate) fn file_name(mut self, filename: T) -> Self + where + T: Into>, + { + self.file_name = Some(filename.into()); + self + } + + pub(crate) fn headers(mut self, headers: T) -> Self + where + T: Into, + { + self.headers = headers.into(); + self + } +} + +impl PartMetadata { + pub(crate) fn fmt_fields<'f, 'fa, 'fb>( + &self, + debug_struct: &'f mut fmt::DebugStruct<'fa, 'fb>, + ) -> &'f mut fmt::DebugStruct<'fa, 'fb> { + debug_struct + .field("mime", &self.mime) + .field("file_name", &self.file_name) + .field("headers", &self.headers) + } +} + +#[cfg(test)] +mod tests { + + use wasm_bindgen_test::*; + + wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + + #[wasm_bindgen_test] + async fn test_multipart_js() { + use super::{Form, Part}; + use js_sys::Uint8Array; + use wasm_bindgen::JsValue; + use web_sys::{File, FormData}; + + let text_file_name = "test.txt"; + let text_file_type = "text/plain"; + let text_content = "TEST"; + let text_part = Part::text(text_content) + .file_name(text_file_name) + .mime_str(text_file_type) + .expect("invalid mime type"); + + let binary_file_name = "binary.bin"; + let binary_file_type = "application/octet-stream"; + let binary_content = vec![0u8, 42]; + let binary_part = Part::bytes(binary_content.clone()) + .file_name(binary_file_name) + .mime_str(binary_file_type) + .expect("invalid mime type"); + + let string_name = "string"; + let string_content = "CONTENT"; + let string_part = Part::text(string_content); + + let text_name = "text part"; + let binary_name = "binary part"; + let form = Form::new() + .part(text_name, text_part) + .part(binary_name, binary_part) + .part(string_name, string_part); + + let mut init = web_sys::RequestInit::new(); + init.method("POST"); + init.body(Some( + form.to_form_data() + .expect("could not convert to FormData") + .as_ref(), + )); + + let js_req = web_sys::Request::new_with_str_and_init("", &init) + .expect("could not create JS request"); + + let form_data_promise = js_req.form_data().expect("could not get form_data promise"); + + let form_data = crate::wasm::promise::(form_data_promise) + .await + .expect("could not get body as form data"); + + // check text part + let text_file = File::from(form_data.get(text_name)); + assert_eq!(text_file.name(), text_file_name); + assert_eq!(text_file.type_(), text_file_type); + + let text_promise = text_file.text(); + let text = crate::wasm::promise::(text_promise) + .await + .expect("could not get text body as text"); + assert_eq!( + text.as_string().expect("text is not a string"), + text_content + ); + + // check binary part + let binary_file = File::from(form_data.get(binary_name)); + assert_eq!(binary_file.name(), binary_file_name); + assert_eq!(binary_file.type_(), binary_file_type); + + // check string part + let string = form_data + .get(string_name) + .as_string() + .expect("content is not a string"); + assert_eq!(string, string_content); + + let binary_array_buffer_promise = binary_file.array_buffer(); + let array_buffer = crate::wasm::promise::(binary_array_buffer_promise) + .await + .expect("could not get request body as array buffer"); + + let binary = Uint8Array::new(&array_buffer).to_vec(); + + assert_eq!(binary, binary_content); + } +} diff --git a/rust/reqwest/src/wasm/request.rs b/rust/reqwest/src/wasm/request.rs new file mode 100644 index 0000000000..3e2bb8ed89 --- /dev/null +++ b/rust/reqwest/src/wasm/request.rs @@ -0,0 +1,621 @@ +use std::convert::TryFrom; +use std::fmt; +use std::time::Duration; + +use bytes::Bytes; +use http::{request::Parts, Method, Request as HttpRequest}; +use serde::Serialize; +#[cfg(feature = "json")] +use serde_json; +use url::Url; +use web_sys::{RequestCache, RequestCredentials}; + +use super::{Body, Client, Response}; +use crate::header::{HeaderMap, HeaderName, HeaderValue, CONTENT_TYPE}; + +/// A request which can be executed with `Client::execute()`. +pub struct Request { + method: Method, + url: Url, + headers: HeaderMap, + body: Option, + timeout: Option, + pub(super) cors: bool, + pub(super) credentials: Option, + pub(super) cache: Option, +} + +/// A builder to construct the properties of a `Request`. +pub struct RequestBuilder { + client: Client, + request: crate::Result, +} + +impl Request { + /// Constructs a new request. + #[inline] + pub fn new(method: Method, url: Url) -> Self { + Request { + method, + url, + headers: HeaderMap::new(), + body: None, + timeout: None, + cors: true, + credentials: None, + cache: None, + } + } + + /// Get the method. + #[inline] + pub fn method(&self) -> &Method { + &self.method + } + + /// Get a mutable reference to the method. + #[inline] + pub fn method_mut(&mut self) -> &mut Method { + &mut self.method + } + + /// Get the url. + #[inline] + pub fn url(&self) -> &Url { + &self.url + } + + /// Get a mutable reference to the url. + #[inline] + pub fn url_mut(&mut self) -> &mut Url { + &mut self.url + } + + /// Get the headers. + #[inline] + pub fn headers(&self) -> &HeaderMap { + &self.headers + } + + /// Get a mutable reference to the headers. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + &mut self.headers + } + + /// Get the body. + #[inline] + pub fn body(&self) -> Option<&Body> { + self.body.as_ref() + } + + /// Get a mutable reference to the body. + #[inline] + pub fn body_mut(&mut self) -> &mut Option { + &mut self.body + } + + /// Get the timeout. + #[inline] + pub fn timeout(&self) -> Option<&Duration> { + self.timeout.as_ref() + } + + /// Get a mutable reference to the timeout. + #[inline] + pub fn timeout_mut(&mut self) -> &mut Option { + &mut self.timeout + } + + /// Attempts to clone the `Request`. + /// + /// None is returned if a body is which can not be cloned. + pub fn try_clone(&self) -> Option { + let body = match self.body.as_ref() { + Some(body) => Some(body.try_clone()?), + None => None, + }; + + Some(Self { + method: self.method.clone(), + url: self.url.clone(), + headers: self.headers.clone(), + body, + timeout: self.timeout, + cors: self.cors, + credentials: self.credentials, + cache: self.cache, + }) + } +} + +impl RequestBuilder { + pub(super) fn new(client: Client, request: crate::Result) -> RequestBuilder { + RequestBuilder { client, request } + } + + /// Assemble a builder starting from an existing `Client` and a `Request`. + pub fn from_parts(client: crate::Client, request: crate::Request) -> crate::RequestBuilder { + crate::RequestBuilder { + client, + request: crate::Result::Ok(request), + } + } + + /// Modify the query string of the URL. + /// + /// Modifies the URL of this request, adding the parameters provided. + /// This method appends and does not overwrite. This means that it can + /// be called multiple times and that existing query parameters are not + /// overwritten if the same key is used. The key will simply show up + /// twice in the query string. + /// Calling `.query([("foo", "a"), ("foo", "b")])` gives `"foo=a&foo=b"`. + /// + /// # Note + /// This method does not support serializing a single key-value + /// pair. Instead of using `.query(("key", "val"))`, use a sequence, such + /// as `.query(&[("key", "val")])`. It's also possible to serialize structs + /// and maps into a key-value pair. + /// + /// # Errors + /// This method will fail if the object you provide cannot be serialized + /// into a query string. + pub fn query(mut self, query: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + let url = req.url_mut(); + let mut pairs = url.query_pairs_mut(); + let serializer = serde_urlencoded::Serializer::new(&mut pairs); + + if let Err(err) = query.serialize(serializer) { + error = Some(crate::error::builder(err)); + } + } + if let Ok(ref mut req) = self.request { + if let Some("") = req.url().query() { + req.url_mut().set_query(None); + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Send a form body. + /// + /// Sets the body to the url encoded serialization of the passed value, + /// and also sets the `Content-Type: application/x-www-form-urlencoded` + /// header. + /// + /// # Errors + /// + /// This method fails if the passed value cannot be serialized into + /// url encoded format + pub fn form(mut self, form: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_urlencoded::to_string(form) { + Ok(body) => { + req.headers_mut().insert( + CONTENT_TYPE, + HeaderValue::from_static("application/x-www-form-urlencoded"), + ); + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + /// Set the request json + pub fn json(mut self, json: &T) -> RequestBuilder { + let mut error = None; + if let Ok(ref mut req) = self.request { + match serde_json::to_vec(json) { + Ok(body) => { + req.headers_mut() + .insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + *req.body_mut() = Some(body.into()); + } + Err(err) => error = Some(crate::error::builder(err)), + } + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Enable HTTP basic authentication. + pub fn basic_auth(self, username: U, password: Option

) -> RequestBuilder + where + U: fmt::Display, + P: fmt::Display, + { + let header_value = crate::util::basic_auth(username, password); + self.header(crate::header::AUTHORIZATION, header_value) + } + + /// Enable HTTP bearer authentication. + pub fn bearer_auth(self, token: T) -> RequestBuilder + where + T: fmt::Display, + { + let header_value = format!("Bearer {token}"); + self.header(crate::header::AUTHORIZATION, header_value) + } + + /// Set the request body. + pub fn body>(mut self, body: T) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.body = Some(body.into()); + } + self + } + + /// Enables a request timeout. + pub fn timeout(mut self, timeout: Duration) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.timeout_mut() = Some(timeout); + } + self + } + + /// TODO + #[cfg(feature = "multipart")] + #[cfg_attr(docsrs, doc(cfg(feature = "multipart")))] + pub fn multipart(mut self, multipart: super::multipart::Form) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + *req.body_mut() = Some(Body::from_form(multipart)) + } + self + } + + /// Add a `Header` to this Request. + pub fn header(mut self, key: K, value: V) -> RequestBuilder + where + HeaderName: TryFrom, + >::Error: Into, + HeaderValue: TryFrom, + >::Error: Into, + { + let mut error = None; + if let Ok(ref mut req) = self.request { + match >::try_from(key) { + Ok(key) => match >::try_from(value) { + Ok(value) => { + req.headers_mut().append(key, value); + } + Err(e) => error = Some(crate::error::builder(e.into())), + }, + Err(e) => error = Some(crate::error::builder(e.into())), + }; + } + if let Some(err) = error { + self.request = Err(err); + } + self + } + + /// Add a set of Headers to the existing ones on this Request. + /// + /// The headers will be merged in to any already set. + pub fn headers(mut self, headers: crate::header::HeaderMap) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + crate::util::replace_headers(req.headers_mut(), headers); + } + self + } + + /// Disable CORS on fetching the request. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request mode][mdn] will be set to 'no-cors'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/mode + pub fn fetch_mode_no_cors(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cors = false; + } + self + } + + /// Set fetch credentials to 'same-origin' + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request credentials][mdn] will be set to 'same-origin'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/credentials + pub fn fetch_credentials_same_origin(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.credentials = Some(RequestCredentials::SameOrigin); + } + self + } + + /// Set fetch credentials to 'include' + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request credentials][mdn] will be set to 'include'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/credentials + pub fn fetch_credentials_include(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.credentials = Some(RequestCredentials::Include); + } + self + } + + /// Set fetch credentials to 'omit' + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request credentials][mdn] will be set to 'omit'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/credentials + pub fn fetch_credentials_omit(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.credentials = Some(RequestCredentials::Omit); + } + self + } + + /// Set fetch cache mode to 'default'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'default'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_default(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::Default); + } + self + } + + /// Set fetch cache mode to 'no-store'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'no-store'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_no_store(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::NoStore); + } + self + } + + /// Set fetch cache mode to 'reload'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'reload'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_reload(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::Reload); + } + self + } + + /// Set fetch cache mode to 'no-cache'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'no-cache'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_no_cache(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::NoCache); + } + self + } + + /// Set fetch cache mode to 'force-cache'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'force-cache'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_force_cache(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::ForceCache); + } + self + } + + /// Set fetch cache mode to 'only-if-cached'. + /// + /// # WASM + /// + /// This option is only effective with WebAssembly target. + /// + /// The [request cache][mdn] will be set to 'only-if-cached'. + /// + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/API/Request/cache + pub fn fetch_cache_only_if_cached(mut self) -> RequestBuilder { + if let Ok(ref mut req) = self.request { + req.cache = Some(RequestCache::OnlyIfCached); + } + self + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + pub fn build(self) -> crate::Result { + self.request + } + + /// Build a `Request`, which can be inspected, modified and executed with + /// `Client::execute()`. + /// + /// This is similar to [`RequestBuilder::build()`], but also returns the + /// embedded `Client`. + pub fn build_split(self) -> (Client, crate::Result) { + (self.client, self.request) + } + + /// Constructs the Request and sends it to the target URL, returning a + /// future Response. + /// + /// # Errors + /// + /// This method fails if there was an error while sending request. + /// + /// # Example + /// + /// ```no_run + /// # use reqwest::Error; + /// # + /// # async fn run() -> Result<(), Error> { + /// let response = reqwest::Client::new() + /// .get("https://hyper.rs") + /// .send() + /// .await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn send(self) -> crate::Result { + let req = self.request?; + self.client.execute_request(req).await + } + + /// Attempt to clone the RequestBuilder. + /// + /// `None` is returned if the RequestBuilder can not be cloned. + /// + /// # Examples + /// + /// ```no_run + /// # use reqwest::Error; + /// # + /// # fn run() -> Result<(), Error> { + /// let client = reqwest::Client::new(); + /// let builder = client.post("http://httpbin.org/post") + /// .body("from a &str!"); + /// let clone = builder.try_clone(); + /// assert!(clone.is_some()); + /// # Ok(()) + /// # } + /// ``` + pub fn try_clone(&self) -> Option { + self.request + .as_ref() + .ok() + .and_then(|req| req.try_clone()) + .map(|req| RequestBuilder { + client: self.client.clone(), + request: Ok(req), + }) + } +} + +impl fmt::Debug for Request { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt_request_fields(&mut f.debug_struct("Request"), self).finish() + } +} + +impl fmt::Debug for RequestBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut builder = f.debug_struct("RequestBuilder"); + match self.request { + Ok(ref req) => fmt_request_fields(&mut builder, req).finish(), + Err(ref err) => builder.field("error", err).finish(), + } + } +} + +fn fmt_request_fields<'a, 'b>( + f: &'a mut fmt::DebugStruct<'a, 'b>, + req: &Request, +) -> &'a mut fmt::DebugStruct<'a, 'b> { + f.field("method", &req.method) + .field("url", &req.url) + .field("headers", &req.headers) +} + +impl TryFrom> for Request +where + T: Into, +{ + type Error = crate::Error; + + fn try_from(req: HttpRequest) -> crate::Result { + let (parts, body) = req.into_parts(); + let Parts { + method, + uri, + headers, + .. + } = parts; + let url = Url::parse(&uri.to_string()).map_err(crate::error::builder)?; + Ok(Request { + method, + url, + headers, + body: Some(body.into()), + timeout: None, + cors: true, + credentials: None, + cache: None, + }) + } +} + +impl TryFrom for HttpRequest { + type Error = crate::Error; + + fn try_from(req: Request) -> crate::Result { + let Request { + method, + url, + headers, + body, + .. + } = req; + + let mut req = HttpRequest::builder() + .method(method) + .uri(url.as_str()) + .body(body.unwrap_or_else(|| Body::from(Bytes::default()))) + .map_err(crate::error::builder)?; + + *req.headers_mut() = headers; + Ok(req) + } +} diff --git a/rust/reqwest/src/wasm/response.rs b/rust/reqwest/src/wasm/response.rs new file mode 100644 index 0000000000..52fad04ee6 --- /dev/null +++ b/rust/reqwest/src/wasm/response.rs @@ -0,0 +1,195 @@ +use std::fmt; + +use bytes::Bytes; +use http::{HeaderMap, StatusCode}; +use js_sys::Uint8Array; +use url::Url; + +use crate::wasm::AbortGuard; + +#[cfg(feature = "stream")] +use wasm_bindgen::JsCast; + +#[cfg(feature = "stream")] +use futures_util::stream::{self, StreamExt}; + +#[cfg(feature = "json")] +use serde::de::DeserializeOwned; + +/// A Response to a submitted `Request`. +pub struct Response { + http: http::Response, + _abort: AbortGuard, + // Boxed to save space (11 words to 1 word), and it's not accessed + // frequently internally. + url: Box, +} + +impl Response { + pub(super) fn new( + res: http::Response, + url: Url, + abort: AbortGuard, + ) -> Response { + Response { + http: res, + url: Box::new(url), + _abort: abort, + } + } + + /// Get the `StatusCode` of this `Response`. + #[inline] + pub fn status(&self) -> StatusCode { + self.http.status() + } + + /// Get the `Headers` of this `Response`. + #[inline] + pub fn headers(&self) -> &HeaderMap { + self.http.headers() + } + + /// Get a mutable reference to the `Headers` of this `Response`. + #[inline] + pub fn headers_mut(&mut self) -> &mut HeaderMap { + self.http.headers_mut() + } + + /// Get the content-length of this response, if known. + /// + /// Reasons it may not be known: + /// + /// - The server didn't send a `content-length` header. + /// - The response is compressed and automatically decoded (thus changing + /// the actual decoded length). + pub fn content_length(&self) -> Option { + self.headers() + .get(http::header::CONTENT_LENGTH)? + .to_str() + .ok()? + .parse() + .ok() + } + + /// Get the final `Url` of this `Response`. + #[inline] + pub fn url(&self) -> &Url { + &self.url + } + + /* It might not be possible to detect this in JS? + /// Get the HTTP `Version` of this `Response`. + #[inline] + pub fn version(&self) -> Version { + self.http.version() + } + */ + + /// Try to deserialize the response body as JSON. + #[cfg(feature = "json")] + #[cfg_attr(docsrs, doc(cfg(feature = "json")))] + pub async fn json(self) -> crate::Result { + let full = self.bytes().await?; + + serde_json::from_slice(&full).map_err(crate::error::decode) + } + + /// Get the response text. + pub async fn text(self) -> crate::Result { + let p = self + .http + .body() + .text() + .map_err(crate::error::wasm) + .map_err(crate::error::decode)?; + let js_val = super::promise::(p) + .await + .map_err(crate::error::decode)?; + if let Some(s) = js_val.as_string() { + Ok(s) + } else { + Err(crate::error::decode("response.text isn't string")) + } + } + + /// Get the response as bytes + pub async fn bytes(self) -> crate::Result { + let p = self + .http + .body() + .array_buffer() + .map_err(crate::error::wasm) + .map_err(crate::error::decode)?; + + let buf_js = super::promise::(p) + .await + .map_err(crate::error::decode)?; + + let buffer = Uint8Array::new(&buf_js); + let mut bytes = vec![0; buffer.length() as usize]; + buffer.copy_to(&mut bytes); + Ok(bytes.into()) + } + + /// Convert the response into a `Stream` of `Bytes` from the body. + #[cfg(feature = "stream")] + pub fn bytes_stream(self) -> impl futures_core::Stream> { + use futures_core::Stream; + use std::pin::Pin; + + let web_response = self.http.into_body(); + let abort = self._abort; + + if let Some(body) = web_response.body() { + let body = wasm_streams::ReadableStream::from_raw(body.unchecked_into()); + Box::pin(body.into_stream().map(move |buf_js| { + // Keep the abort guard alive as long as this stream is. + let _abort = &abort; + let buffer = Uint8Array::new( + &buf_js + .map_err(crate::error::wasm) + .map_err(crate::error::decode)?, + ); + let mut bytes = vec![0; buffer.length() as usize]; + buffer.copy_to(&mut bytes); + Ok(bytes.into()) + })) as Pin>>> + } else { + // If there's no body, return an empty stream. + Box::pin(stream::empty()) as Pin>>> + } + } + + // util methods + + /// Turn a response into an error if the server returned an error. + pub fn error_for_status(self) -> crate::Result { + let status = self.status(); + if status.is_client_error() || status.is_server_error() { + Err(crate::error::status_code(*self.url, status)) + } else { + Ok(self) + } + } + + /// Turn a reference to a response into an error if the server returned an error. + pub fn error_for_status_ref(&self) -> crate::Result<&Self> { + let status = self.status(); + if status.is_client_error() || status.is_server_error() { + Err(crate::error::status_code(*self.url.clone(), status)) + } else { + Ok(self) + } + } +} + +impl fmt::Debug for Response { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Response") + //.field("url", self.url()) + .field("status", &self.status()) + .field("headers", self.headers()) + .finish() + } +} diff --git a/rust/reqwest/tests/badssl.rs b/rust/reqwest/tests/badssl.rs new file mode 100644 index 0000000000..7aa75fd9ba --- /dev/null +++ b/rust/reqwest/tests/badssl.rs @@ -0,0 +1,105 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] + +#[cfg(all(feature = "__tls", not(feature = "rustls-tls-manual-roots")))] +#[tokio::test] +async fn test_badssl_modern() { + let text = reqwest::Client::builder() + .no_proxy() + .build() + .unwrap() + .get("https://mozilla-modern.badssl.com/") + .send() + .await + .unwrap() + .text() + .await + .unwrap(); + + assert!(text.contains("mozilla-modern.badssl.com")); +} + +#[cfg(any( + feature = "rustls-tls-webpki-roots-no-provider", + feature = "rustls-tls-native-roots-no-provider" +))] +#[tokio::test] +async fn test_rustls_badssl_modern() { + let text = reqwest::Client::builder() + .use_rustls_tls() + .no_proxy() + .build() + .unwrap() + .get("https://mozilla-modern.badssl.com/") + .send() + .await + .unwrap() + .text() + .await + .unwrap(); + + assert!(text.contains("mozilla-modern.badssl.com")); +} + +#[cfg(feature = "__tls")] +#[tokio::test] +async fn test_badssl_self_signed() { + let text = reqwest::Client::builder() + .danger_accept_invalid_certs(true) + .no_proxy() + .build() + .unwrap() + .get("https://self-signed.badssl.com/") + .send() + .await + .unwrap() + .text() + .await + .unwrap(); + + assert!(text.contains("self-signed.badssl.com")); +} + +#[cfg(feature = "__tls")] +#[tokio::test] +async fn test_badssl_no_built_in_roots() { + let result = reqwest::Client::builder() + .tls_built_in_root_certs(false) + .no_proxy() + .build() + .unwrap() + .get("https://mozilla-modern.badssl.com/") + .send() + .await; + + assert!(result.is_err()); +} + +#[cfg(any(feature = "native-tls", feature = "rustls-tls"))] +#[tokio::test] +async fn test_badssl_wrong_host() { + let text = reqwest::Client::builder() + .danger_accept_invalid_hostnames(true) + .no_proxy() + .build() + .unwrap() + .get("https://wrong.host.badssl.com/") + .send() + .await + .unwrap() + .text() + .await + .unwrap(); + + assert!(text.contains("wrong.host.badssl.com")); + + let result = reqwest::Client::builder() + .danger_accept_invalid_hostnames(true) + .build() + .unwrap() + .get("https://self-signed.badssl.com/") + .send() + .await; + + assert!(result.is_err()); +} diff --git a/rust/reqwest/tests/blocking.rs b/rust/reqwest/tests/blocking.rs new file mode 100644 index 0000000000..4f22ea27c8 --- /dev/null +++ b/rust/reqwest/tests/blocking.rs @@ -0,0 +1,413 @@ +mod support; + +use http::header::{CONTENT_LENGTH, CONTENT_TYPE, TRANSFER_ENCODING}; +use http_body_util::BodyExt; +#[cfg(feature = "json")] +use std::collections::HashMap; +use support::server; + +#[test] +fn test_response_text() { + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let url = format!("http://{}/text", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.content_length(), Some(5)); + + let body = res.text().unwrap(); + assert_eq!(b"Hello", body.as_bytes()); +} + +#[test] +fn donot_set_content_length_0_if_have_no_body() { + let server = server::http(move |req| async move { + let headers = req.headers(); + assert_eq!(headers.get(CONTENT_LENGTH), None); + assert!(headers.get(CONTENT_TYPE).is_none()); + assert!(headers.get(TRANSFER_ENCODING).is_none()); + http::Response::default() + }); + + let url = format!("http://{}/content-length", server.addr()); + let res = reqwest::blocking::Client::builder() + .no_proxy() + .build() + .expect("client builder") + .get(&url) + .send() + .expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[test] +#[cfg(feature = "charset")] +fn test_response_non_utf_8_text() { + let server = server::http(move |_req| async { + http::Response::builder() + .header("content-type", "text/plain; charset=gbk") + .body(b"\xc4\xe3\xba\xc3"[..].into()) + .unwrap() + }); + + let url = format!("http://{}/text", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.content_length(), Some(4)); + + let body = res.text().unwrap(); + assert_eq!("你好", &body); + assert_eq!(b"\xe4\xbd\xa0\xe5\xa5\xbd", body.as_bytes()); // Now it's utf-8 +} + +#[test] +#[cfg(feature = "json")] +fn test_response_json() { + let server = server::http(move |_req| async { http::Response::new("\"Hello\"".into()) }); + + let url = format!("http://{}/json", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.content_length(), Some(7)); + + let body = res.json::().unwrap(); + assert_eq!("Hello", body); +} + +#[test] +fn test_response_copy_to() { + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let url = format!("http://{}/1", server.addr()); + let mut res = reqwest::blocking::get(&url).unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + + let mut dst = Vec::new(); + res.copy_to(&mut dst).unwrap(); + assert_eq!(dst, b"Hello"); +} + +#[test] +fn test_get() { + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}/1", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.remote_addr(), Some(server.addr())); + + assert_eq!(res.text().unwrap().len(), 0) +} + +#[test] +fn test_post() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-length"], "5"); + + let data = req.into_body().collect().await.unwrap().to_bytes(); + assert_eq!(&*data, b"Hello"); + + http::Response::default() + }); + + let url = format!("http://{}/2", server.addr()); + let res = reqwest::blocking::Client::new() + .post(&url) + .body("Hello") + .send() + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[test] +fn test_post_form() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-length"], "24"); + assert_eq!( + req.headers()["content-type"], + "application/x-www-form-urlencoded" + ); + + let data = req.into_body().collect().await.unwrap().to_bytes(); + assert_eq!(&*data, b"hello=world&sean=monstar"); + + http::Response::default() + }); + + let form = &[("hello", "world"), ("sean", "monstar")]; + + let url = format!("http://{}/form", server.addr()); + let res = reqwest::blocking::Client::new() + .post(&url) + .form(form) + .send() + .expect("request send"); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +/// Calling `Response::error_for_status`` on a response with status in 4xx +/// returns an error. +#[test] +fn test_error_for_status_4xx() { + let server = server::http(move |_req| async { + http::Response::builder() + .status(400) + .body(Default::default()) + .unwrap() + }); + + let url = format!("http://{}/1", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + + let err = res.error_for_status().unwrap_err(); + assert!(err.is_status()); + assert_eq!(err.status(), Some(reqwest::StatusCode::BAD_REQUEST)); +} + +/// Calling `Response::error_for_status`` on a response with status in 5xx +/// returns an error. +#[test] +fn test_error_for_status_5xx() { + let server = server::http(move |_req| async { + http::Response::builder() + .status(500) + .body(Default::default()) + .unwrap() + }); + + let url = format!("http://{}/1", server.addr()); + let res = reqwest::blocking::get(&url).unwrap(); + + let err = res.error_for_status().unwrap_err(); + assert!(err.is_status()); + assert_eq!( + err.status(), + Some(reqwest::StatusCode::INTERNAL_SERVER_ERROR) + ); +} + +#[test] +fn test_default_headers() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["reqwest-test"], "orly"); + http::Response::default() + }); + + let mut headers = http::HeaderMap::with_capacity(1); + headers.insert("reqwest-test", "orly".parse().unwrap()); + let client = reqwest::blocking::Client::builder() + .default_headers(headers) + .build() + .unwrap(); + + let url = format!("http://{}/1", server.addr()); + let res = client.get(&url).send().unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[test] +fn test_override_default_headers() { + let server = server::http(move |req| { + async move { + // not 'iamatoken' + assert_eq!(req.headers()[&http::header::AUTHORIZATION], "secret"); + http::Response::default() + } + }); + + let mut headers = http::HeaderMap::with_capacity(1); + headers.insert( + http::header::AUTHORIZATION, + http::header::HeaderValue::from_static("iamatoken"), + ); + let client = reqwest::blocking::Client::builder() + .default_headers(headers) + .build() + .unwrap(); + + let url = format!("http://{}/3", server.addr()); + let res = client + .get(&url) + .header( + http::header::AUTHORIZATION, + http::header::HeaderValue::from_static("secret"), + ) + .send() + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[test] +fn test_appended_headers_not_overwritten() { + let server = server::http(move |req| async move { + let mut accepts = req.headers().get_all("accept").into_iter(); + assert_eq!(accepts.next().unwrap(), "application/json"); + assert_eq!(accepts.next().unwrap(), "application/json+hal"); + assert_eq!(accepts.next(), None); + + http::Response::default() + }); + + let client = reqwest::blocking::Client::new(); + + let url = format!("http://{}/4", server.addr()); + let res = client + .get(&url) + .header(header::ACCEPT, "application/json") + .header(header::ACCEPT, "application/json+hal") + .send() + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + + // make sure this also works with default headers + use reqwest::header; + let mut headers = header::HeaderMap::with_capacity(1); + headers.insert( + header::ACCEPT, + header::HeaderValue::from_static("text/html"), + ); + let client = reqwest::blocking::Client::builder() + .default_headers(headers) + .build() + .unwrap(); + + let url = format!("http://{}/4", server.addr()); + let res = client + .get(&url) + .header(header::ACCEPT, "application/json") + .header(header::ACCEPT, "application/json+hal") + .send() + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg_attr(not(debug_assertions), ignore)] +#[test] +#[should_panic] +fn test_blocking_inside_a_runtime() { + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let url = format!("http://{}/text", server.addr()); + + let rt = tokio::runtime::Builder::new_current_thread() + .build() + .expect("new rt"); + + rt.block_on(async move { + let _should_panic = reqwest::blocking::get(&url); + }); +} + +#[cfg(feature = "default-tls")] +#[test] +fn test_allowed_methods_blocking() { + let resp = reqwest::blocking::Client::builder() + .https_only(true) + .build() + .expect("client builder") + .get("https://google.com") + .send(); + + assert_eq!(resp.is_err(), false); + + let resp = reqwest::blocking::Client::builder() + .https_only(true) + .build() + .expect("client builder") + .get("http://google.com") + .send(); + + assert_eq!(resp.is_err(), true); +} + +/// Test that a [`reqwest::blocking::Body`] can be created from [`bytes::Bytes`]. +#[test] +fn test_body_from_bytes() { + let body = "abc"; + // No external calls are needed. Only the request building is tested. + let request = reqwest::blocking::Client::builder() + .build() + .expect("Could not build the client") + .put("https://google.com") + .body(bytes::Bytes::from(body)) + .build() + .expect("Invalid body"); + + assert_eq!(request.body().unwrap().as_bytes(), Some(body.as_bytes())); +} + +#[test] +#[cfg(feature = "json")] +fn blocking_add_json_default_content_type_if_not_set_manually() { + use http::header::HeaderValue; + + let mut map = HashMap::new(); + map.insert("body", "json"); + let content_type = HeaderValue::from_static("application/vnd.api+json"); + let req = reqwest::blocking::Client::new() + .post("https://google.com/") + .header(CONTENT_TYPE, &content_type) + .json(&map) + .build() + .expect("request is not valid"); + + assert_eq!(content_type, req.headers().get(CONTENT_TYPE).unwrap()); +} + +#[test] +#[cfg(feature = "json")] +fn blocking_update_json_content_type_if_set_manually() { + let mut map = HashMap::new(); + map.insert("body", "json"); + let req = reqwest::blocking::Client::new() + .post("https://google.com/") + .json(&map) + .build() + .expect("request is not valid"); + + assert_eq!("application/json", req.headers().get(CONTENT_TYPE).unwrap()); +} + +#[test] +#[cfg(feature = "__tls")] +fn test_response_no_tls_info_for_http() { + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let url = format!("http://{}/text", server.addr()); + + let client = reqwest::blocking::Client::builder() + .tls_info(true) + .build() + .unwrap(); + + let res = client.get(&url).send().unwrap(); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.content_length(), Some(5)); + let tls_info = res.extensions().get::(); + assert_eq!(tls_info.is_none(), true); + + let body = res.text().unwrap(); + assert_eq!(b"Hello", body.as_bytes()); +} diff --git a/rust/reqwest/tests/brotli.rs b/rust/reqwest/tests/brotli.rs new file mode 100644 index 0000000000..ba116ed924 --- /dev/null +++ b/rust/reqwest/tests/brotli.rs @@ -0,0 +1,357 @@ +mod support; +use std::io::Read; +use support::server; +use tokio::io::AsyncWriteExt; + +#[tokio::test] +async fn brotli_response() { + brotli_case(10_000, 4096).await; +} + +#[tokio::test] +async fn brotli_single_byte_chunks() { + brotli_case(10, 1).await; +} + +#[tokio::test] +async fn test_brotli_empty_body() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "HEAD"); + + http::Response::builder() + .header("content-encoding", "br") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::new(); + let res = client + .head(&format!("http://{}/brotli", server.addr())) + .send() + .await + .unwrap(); + + let body = res.text().await.unwrap(); + + assert_eq!(body, ""); +} + +#[tokio::test] +async fn test_accept_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "application/json"); + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("br")); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept", server.addr())) + .header( + reqwest::header::ACCEPT, + reqwest::header::HeaderValue::from_static("application/json"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_accept_encoding_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "*/*"); + assert_eq!(req.headers()["accept-encoding"], "identity"); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept-encoding", server.addr())) + .header( + reqwest::header::ACCEPT_ENCODING, + reqwest::header::HeaderValue::from_static("identity"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +async fn brotli_case(response_size: usize, chunk_size: usize) { + use futures_util::stream::StreamExt; + + let content: String = (0..response_size) + .into_iter() + .map(|i| format!("test {i}")) + .collect(); + + let mut encoder = brotli_crate::CompressorReader::new(content.as_bytes(), 4096, 5, 20); + let mut brotlied_content = Vec::new(); + encoder.read_to_end(&mut brotlied_content).unwrap(); + + let mut response = format!( + "\ + HTTP/1.1 200 OK\r\n\ + Server: test-accept\r\n\ + Content-Encoding: br\r\n\ + Content-Length: {}\r\n\ + \r\n", + &brotlied_content.len() + ) + .into_bytes(); + response.extend(&brotlied_content); + + let server = server::http(move |req| { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("br")); + + let brotlied = brotlied_content.clone(); + async move { + let len = brotlied.len(); + let stream = + futures_util::stream::unfold((brotlied, 0), move |(brotlied, pos)| async move { + let chunk = brotlied.chunks(chunk_size).nth(pos)?.to_vec(); + + Some((chunk, (brotlied, pos + 1))) + }); + + let body = reqwest::Body::wrap_stream(stream.map(Ok::<_, std::convert::Infallible>)); + + http::Response::builder() + .header("content-encoding", "br") + .header("content-length", len) + .body(body) + .unwrap() + } + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/brotli", server.addr())) + .send() + .await + .expect("response"); + + let body = res.text().await.expect("text"); + assert_eq!(body, content); +} + +const COMPRESSED_RESPONSE_HEADERS: &[u8] = b"HTTP/1.1 200 OK\x0d\x0a\ + Content-Type: text/plain\x0d\x0a\ + Connection: keep-alive\x0d\x0a\ + Content-Encoding: br\x0d\x0a"; + +const RESPONSE_CONTENT: &str = "some message here"; + +fn brotli_compress(input: &[u8]) -> Vec { + let mut encoder = brotli_crate::CompressorReader::new(input, 4096, 5, 20); + let mut brotlied_content = Vec::new(); + encoder.read_to_end(&mut brotlied_content).unwrap(); + brotlied_content +} + +#[tokio::test] +async fn test_non_chunked_non_fragmented_response() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let brotlied_content = brotli_compress(RESPONSE_CONTENT.as_bytes()); + let content_length_header = + format!("Content-Length: {}\r\n\r\n", brotlied_content.len()).into_bytes(); + let response = [ + COMPRESSED_RESPONSE_HEADERS, + &content_length_header, + &brotlied_content, + ] + .concat(); + + client_socket + .write_all(response.as_slice()) + .await + .expect("response write_all failed"); + client_socket.flush().await.expect("response flush failed"); + }) + }); + + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_1() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let brotlied_content = brotli_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + brotlied_content.len() + ) + .as_bytes(), + &brotlied_content, + ] + .concat(); + let response_second_part = b"\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_2() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let brotlied_content = brotli_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + brotlied_content.len() + ) + .as_bytes(), + &brotlied_content, + b"\r\n", + ] + .concat(); + let response_second_part = b"0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_with_extra_bytes() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let brotlied_content = brotli_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + brotlied_content.len() + ) + .as_bytes(), + &brotlied_content, + ] + .concat(); + let response_second_part = b"\r\n2ab\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + let err = res.text().await.expect_err("there must be an error"); + assert!(err.is_decode()); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} diff --git a/rust/reqwest/tests/ci.rs b/rust/reqwest/tests/ci.rs new file mode 100644 index 0000000000..a33e4d24d9 --- /dev/null +++ b/rust/reqwest/tests/ci.rs @@ -0,0 +1,14 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use support::server; + +#[tokio::test] +#[should_panic(expected = "test server should not panic")] +async fn server_panics_should_propagate() { + let server = server::http(|_| async { + panic!("kaboom"); + }); + + let _ = reqwest::get(format!("http://{}/ci", server.addr())).await; +} diff --git a/rust/reqwest/tests/client.rs b/rust/reqwest/tests/client.rs new file mode 100644 index 0000000000..fce977f69a --- /dev/null +++ b/rust/reqwest/tests/client.rs @@ -0,0 +1,530 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; + +use support::server; + +use http::header::{CONTENT_LENGTH, CONTENT_TYPE, TRANSFER_ENCODING}; +#[cfg(feature = "json")] +use std::collections::HashMap; + +use reqwest::Client; +use tokio::io::AsyncWriteExt; + +#[tokio::test] +async fn auto_headers() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "GET"); + + assert_eq!(req.headers()["accept"], "*/*"); + assert_eq!(req.headers().get("user-agent"), None); + if cfg!(feature = "gzip") { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("gzip")); + } + if cfg!(feature = "brotli") { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("br")); + } + if cfg!(feature = "zstd") { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("zstd")); + } + if cfg!(feature = "deflate") { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("deflate")); + } + + http::Response::default() + }); + + let url = format!("http://{}/1", server.addr()); + let res = reqwest::Client::builder() + .no_proxy() + .build() + .unwrap() + .get(&url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.remote_addr(), Some(server.addr())); +} + +#[tokio::test] +async fn donot_set_content_length_0_if_have_no_body() { + let server = server::http(move |req| async move { + let headers = req.headers(); + assert_eq!(headers.get(CONTENT_LENGTH), None); + assert!(headers.get(CONTENT_TYPE).is_none()); + assert!(headers.get(TRANSFER_ENCODING).is_none()); + dbg!(&headers); + http::Response::default() + }); + + let url = format!("http://{}/content-length", server.addr()); + let res = reqwest::Client::builder() + .no_proxy() + .build() + .expect("client builder") + .get(&url) + .send() + .await + .expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn user_agent() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["user-agent"], "reqwest-test-agent"); + http::Response::default() + }); + + let url = format!("http://{}/ua", server.addr()); + let res = reqwest::Client::builder() + .user_agent("reqwest-test-agent") + .build() + .expect("client builder") + .get(&url) + .send() + .await + .expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn response_text() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let client = Client::new(); + + let res = client + .get(&format!("http://{}/text", server.addr())) + .send() + .await + .expect("Failed to get"); + assert_eq!(res.content_length(), Some(5)); + let text = res.text().await.expect("Failed to get text"); + assert_eq!("Hello", text); +} + +#[tokio::test] +async fn response_bytes() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let client = Client::new(); + + let res = client + .get(&format!("http://{}/bytes", server.addr())) + .send() + .await + .expect("Failed to get"); + assert_eq!(res.content_length(), Some(5)); + let bytes = res.bytes().await.expect("res.bytes()"); + assert_eq!("Hello", bytes); +} + +#[tokio::test] +#[cfg(feature = "json")] +async fn response_json() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::new("\"Hello\"".into()) }); + + let client = Client::new(); + + let res = client + .get(&format!("http://{}/json", server.addr())) + .send() + .await + .expect("Failed to get"); + let text = res.json::().await.expect("Failed to get json"); + assert_eq!("Hello", text); +} + +#[tokio::test] +async fn body_pipe_response() { + use http_body_util::BodyExt; + let _ = env_logger::try_init(); + + let server = server::http(move |req| async move { + if req.uri() == "/get" { + http::Response::new("pipe me".into()) + } else { + assert_eq!(req.uri(), "/pipe"); + assert_eq!(req.headers()["content-length"], "7"); + + let full: Vec = req + .into_body() + .collect() + .await + .expect("must succeed") + .to_bytes() + .to_vec(); + + assert_eq!(full, b"pipe me"); + + http::Response::default() + } + }); + + let client = Client::new(); + + let res1 = client + .get(&format!("http://{}/get", server.addr())) + .send() + .await + .expect("get1"); + + assert_eq!(res1.status(), reqwest::StatusCode::OK); + assert_eq!(res1.content_length(), Some(7)); + + // and now ensure we can "pipe" the response to another request + let res2 = client + .post(&format!("http://{}/pipe", server.addr())) + .body(res1) + .send() + .await + .expect("res2"); + + assert_eq!(res2.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn overridden_dns_resolution_with_gai() { + let _ = env_logger::builder().is_test(true).try_init(); + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let overridden_domain = "rust-lang.org"; + let url = format!( + "http://{overridden_domain}:{}/domain_override", + server.addr().port() + ); + let client = reqwest::Client::builder() + .no_proxy() + .resolve(overridden_domain, server.addr()) + .build() + .expect("client builder"); + let req = client.get(&url); + let res = req.send().await.expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); + let text = res.text().await.expect("Failed to get text"); + assert_eq!("Hello", text); +} + +#[tokio::test] +async fn overridden_dns_resolution_with_gai_multiple() { + let _ = env_logger::builder().is_test(true).try_init(); + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let overridden_domain = "rust-lang.org"; + let url = format!( + "http://{overridden_domain}:{}/domain_override", + server.addr().port() + ); + // the server runs on IPv4 localhost, so provide both IPv4 and IPv6 and let the happy eyeballs + // algorithm decide which address to use. + let client = reqwest::Client::builder() + .no_proxy() + .resolve_to_addrs( + overridden_domain, + &[ + std::net::SocketAddr::new( + std::net::IpAddr::V6(std::net::Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), + server.addr().port(), + ), + server.addr(), + ], + ) + .build() + .expect("client builder"); + let req = client.get(&url); + let res = req.send().await.expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); + let text = res.text().await.expect("Failed to get text"); + assert_eq!("Hello", text); +} + +#[cfg(feature = "hickory-dns")] +#[tokio::test] +async fn overridden_dns_resolution_with_hickory_dns() { + let _ = env_logger::builder().is_test(true).try_init(); + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let overridden_domain = "rust-lang.org"; + let url = format!( + "http://{overridden_domain}:{}/domain_override", + server.addr().port() + ); + let client = reqwest::Client::builder() + .no_proxy() + .resolve(overridden_domain, server.addr()) + .hickory_dns(true) + .build() + .expect("client builder"); + let req = client.get(&url); + let res = req.send().await.expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); + let text = res.text().await.expect("Failed to get text"); + assert_eq!("Hello", text); +} + +#[cfg(feature = "hickory-dns")] +#[tokio::test] +async fn overridden_dns_resolution_with_hickory_dns_multiple() { + let _ = env_logger::builder().is_test(true).try_init(); + let server = server::http(move |_req| async { http::Response::new("Hello".into()) }); + + let overridden_domain = "rust-lang.org"; + let url = format!( + "http://{overridden_domain}:{}/domain_override", + server.addr().port() + ); + // the server runs on IPv4 localhost, so provide both IPv4 and IPv6 and let the happy eyeballs + // algorithm decide which address to use. + let client = reqwest::Client::builder() + .no_proxy() + .resolve_to_addrs( + overridden_domain, + &[ + std::net::SocketAddr::new( + std::net::IpAddr::V6(std::net::Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), + server.addr().port(), + ), + server.addr(), + ], + ) + .hickory_dns(true) + .build() + .expect("client builder"); + let req = client.get(&url); + let res = req.send().await.expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); + let text = res.text().await.expect("Failed to get text"); + assert_eq!("Hello", text); +} + +#[cfg(any(feature = "native-tls", feature = "__rustls",))] +#[test] +fn use_preconfigured_tls_with_bogus_backend() { + struct DefinitelyNotTls; + + reqwest::Client::builder() + .use_preconfigured_tls(DefinitelyNotTls) + .build() + .expect_err("definitely is not TLS"); +} + +#[cfg(feature = "native-tls")] +#[test] +fn use_preconfigured_native_tls_default() { + extern crate native_tls_crate; + + let tls = native_tls_crate::TlsConnector::builder() + .build() + .expect("tls builder"); + + reqwest::Client::builder() + .use_preconfigured_tls(tls) + .build() + .expect("preconfigured default tls"); +} + +#[cfg(feature = "__rustls")] +#[test] +fn use_preconfigured_rustls_default() { + extern crate rustls; + + let root_cert_store = rustls::RootCertStore::empty(); + let tls = rustls::ClientConfig::builder() + .with_root_certificates(root_cert_store) + .with_no_client_auth(); + + reqwest::Client::builder() + .use_preconfigured_tls(tls) + .build() + .expect("preconfigured rustls tls"); +} + +#[cfg(feature = "__rustls")] +#[tokio::test] +#[ignore = "Needs TLS support in the test server"] +async fn http2_upgrade() { + let server = server::http(move |_| async move { http::Response::default() }); + + let url = format!("https://localhost:{}", server.addr().port()); + let res = reqwest::Client::builder() + .danger_accept_invalid_certs(true) + .use_rustls_tls() + .build() + .expect("client builder") + .get(&url) + .send() + .await + .expect("request"); + + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!(res.version(), reqwest::Version::HTTP_2); +} + +#[cfg(feature = "default-tls")] +#[cfg_attr(feature = "http3", ignore = "enabling http3 seems to break this, why?")] +#[tokio::test] +async fn test_allowed_methods() { + let resp = reqwest::Client::builder() + .https_only(true) + .build() + .expect("client builder") + .get("https://google.com") + .send() + .await; + + assert!(resp.is_ok()); + + let resp = reqwest::Client::builder() + .https_only(true) + .build() + .expect("client builder") + .get("http://google.com") + .send() + .await; + + assert!(resp.is_err()); +} + +#[test] +#[cfg(feature = "json")] +fn add_json_default_content_type_if_not_set_manually() { + let mut map = HashMap::new(); + map.insert("body", "json"); + let content_type = http::HeaderValue::from_static("application/vnd.api+json"); + let req = Client::new() + .post("https://google.com/") + .header(CONTENT_TYPE, &content_type) + .json(&map) + .build() + .expect("request is not valid"); + + assert_eq!(content_type, req.headers().get(CONTENT_TYPE).unwrap()); +} + +#[test] +#[cfg(feature = "json")] +fn update_json_content_type_if_set_manually() { + let mut map = HashMap::new(); + map.insert("body", "json"); + let req = Client::new() + .post("https://google.com/") + .json(&map) + .build() + .expect("request is not valid"); + + assert_eq!("application/json", req.headers().get(CONTENT_TYPE).unwrap()); +} + +#[cfg(all(feature = "__tls", not(feature = "rustls-tls-manual-roots")))] +#[tokio::test] +async fn test_tls_info() { + let resp = reqwest::Client::builder() + .tls_info(true) + .build() + .expect("client builder") + .get("https://google.com") + .send() + .await + .expect("response"); + let tls_info = resp.extensions().get::(); + assert!(tls_info.is_some()); + let tls_info = tls_info.unwrap(); + let peer_certificate = tls_info.peer_certificate(); + assert!(peer_certificate.is_some()); + let der = peer_certificate.unwrap(); + assert_eq!(der[0], 0x30); // ASN.1 SEQUENCE + + let resp = reqwest::Client::builder() + .build() + .expect("client builder") + .get("https://google.com") + .send() + .await + .expect("response"); + let tls_info = resp.extensions().get::(); + assert!(tls_info.is_none()); +} + +#[tokio::test] +async fn close_connection_after_idle_timeout() { + let mut server = server::http(move |_| async move { http::Response::default() }); + + let client = reqwest::Client::builder() + .pool_idle_timeout(std::time::Duration::from_secs(1)) + .build() + .unwrap(); + + let url = format!("http://{}", server.addr()); + + client.get(&url).send().await.unwrap(); + + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + + assert!(server + .events() + .iter() + .any(|e| matches!(e, server::Event::ConnectionClosed))); +} + +#[tokio::test] +async fn http1_reason_phrase() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + client_socket + .write_all(b"HTTP/1.1 418 I'm not a teapot\r\nContent-Length: 0\r\n\r\n") + .await + .expect("response write_all failed"); + }) + }); + + let client = Client::new(); + + let res = client + .get(&format!("http://{}", server.addr())) + .send() + .await + .expect("Failed to get"); + + assert_eq!( + res.error_for_status().unwrap_err().to_string(), + format!( + "HTTP status client error (418 I'm not a teapot) for url (http://{}/)", + server.addr() + ) + ); +} + +#[tokio::test] +async fn error_has_url() { + let u = "http://does.not.exist.local/ever"; + let err = reqwest::get(u).await.unwrap_err(); + assert_eq!(err.url().map(AsRef::as_ref), Some(u), "{err:?}"); +} diff --git a/rust/reqwest/tests/connector_layers.rs b/rust/reqwest/tests/connector_layers.rs new file mode 100644 index 0000000000..1be18aeb81 --- /dev/null +++ b/rust/reqwest/tests/connector_layers.rs @@ -0,0 +1,374 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; + +use std::time::Duration; + +use futures_util::future::join_all; +use tower::layer::util::Identity; +use tower::limit::ConcurrencyLimitLayer; +use tower::timeout::TimeoutLayer; + +use support::{delay_layer::DelayLayer, server}; + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn non_op_layer() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(Identity::new()) + .no_proxy() + .build() + .unwrap(); + + let res = client.get(url).send().await; + + assert!(res.is_ok()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn non_op_layer_with_timeout() { + let _ = env_logger::try_init(); + + let client = reqwest::Client::builder() + .connector_layer(Identity::new()) + .connect_timeout(Duration::from_millis(200)) + .no_proxy() + .build() + .unwrap(); + + // never returns + let url = "http://192.0.2.1:81/slow"; + + let res = client.get(url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn with_connect_timeout_layer_never_returning() { + let _ = env_logger::try_init(); + + let client = reqwest::Client::builder() + .connector_layer(TimeoutLayer::new(Duration::from_millis(100))) + .no_proxy() + .build() + .unwrap(); + + // never returns + let url = "http://192.0.2.1:81/slow"; + + let res = client.get(url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn with_connect_timeout_layer_slow() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(200))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(100))) + .no_proxy() + .build() + .unwrap(); + + let res = client.get(url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn multiple_timeout_layers_under_threshold() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(200))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(300))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(500))) + .connect_timeout(Duration::from_millis(200)) + .no_proxy() + .build() + .unwrap(); + + let res = client.get(url).send().await; + + assert!(res.is_ok()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn multiple_timeout_layers_over_threshold() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(50))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(50))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(50))) + .connect_timeout(Duration::from_millis(50)) + .no_proxy() + .build() + .unwrap(); + + let res = client.get(url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn with_concurrency_limit_layer_timeout() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(ConcurrencyLimitLayer::new(1)) + .timeout(Duration::from_millis(200)) + .pool_max_idle_per_host(0) // disable connection reuse to force resource contention on the concurrency limit semaphore + .no_proxy() + .build() + .unwrap(); + + // first call succeeds since no resource contention + let res = client.get(url.clone()).send().await; + assert!(res.is_ok()); + + // 3 calls where the second two wait on the first and time out + let mut futures = Vec::new(); + for _ in 0..3 { + futures.push(client.clone().get(url.clone()).send()); + } + + let all_res = join_all(futures).await; + + let timed_out = all_res + .into_iter() + .any(|res| res.is_err_and(|err| err.is_timeout())); + + assert!(timed_out, "at least one request should have timed out"); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn with_concurrency_limit_layer_success() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(200))) + .connector_layer(ConcurrencyLimitLayer::new(1)) + .timeout(Duration::from_millis(1000)) + .pool_max_idle_per_host(0) // disable connection reuse to force resource contention on the concurrency limit semaphore + .no_proxy() + .build() + .unwrap(); + + // first call succeeds since no resource contention + let res = client.get(url.clone()).send().await; + assert!(res.is_ok()); + + // 3 calls of which all are individually below the inner timeout + // and the sum is below outer timeout which affects the final call which waited the whole time + let mut futures = Vec::new(); + for _ in 0..3 { + futures.push(client.clone().get(url.clone()).send()); + } + + let all_res = join_all(futures).await; + + for res in all_res.into_iter() { + assert!( + res.is_ok(), + "neither outer long timeout or inner short timeout should be exceeded" + ); + } +} + +#[cfg(feature = "blocking")] +#[test] +fn non_op_layer_blocking_client() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::blocking::Client::builder() + .connector_layer(Identity::new()) + .build() + .unwrap(); + + let res = client.get(url).send(); + + assert!(res.is_ok()); +} + +#[cfg(feature = "blocking")] +#[test] +fn timeout_layer_blocking_client() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::blocking::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(50))) + .no_proxy() + .build() + .unwrap(); + + let res = client.get(url).send(); + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(feature = "blocking")] +#[test] +fn concurrency_layer_blocking_client_timeout() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::blocking::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(ConcurrencyLimitLayer::new(1)) + .timeout(Duration::from_millis(200)) + .pool_max_idle_per_host(0) // disable connection reuse to force resource contention on the concurrency limit semaphore + .build() + .unwrap(); + + let res = client.get(url.clone()).send(); + + assert!(res.is_ok()); + + // 3 calls where the second two wait on the first and time out + let mut join_handles = Vec::new(); + for _ in 0..3 { + let client = client.clone(); + let url = url.clone(); + let join_handle = std::thread::spawn(move || client.get(url.clone()).send()); + join_handles.push(join_handle); + } + + let timed_out = join_handles + .into_iter() + .any(|handle| handle.join().unwrap().is_err_and(|err| err.is_timeout())); + + assert!(timed_out, "at least one request should have timed out"); +} + +#[cfg(feature = "blocking")] +#[test] +fn concurrency_layer_blocking_client_success() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::blocking::Client::builder() + .connector_layer(DelayLayer::new(Duration::from_millis(100))) + .connector_layer(TimeoutLayer::new(Duration::from_millis(200))) + .connector_layer(ConcurrencyLimitLayer::new(1)) + .timeout(Duration::from_millis(1000)) + .pool_max_idle_per_host(0) // disable connection reuse to force resource contention on the concurrency limit semaphore + .build() + .unwrap(); + + let res = client.get(url.clone()).send(); + + assert!(res.is_ok()); + + // 3 calls of which all are individually below the inner timeout + // and the sum is below outer timeout which affects the final call which waited the whole time + let mut join_handles = Vec::new(); + for _ in 0..3 { + let client = client.clone(); + let url = url.clone(); + let join_handle = std::thread::spawn(move || client.get(url.clone()).send()); + join_handles.push(join_handle); + } + + for handle in join_handles { + let res = handle.join().unwrap(); + assert!( + res.is_ok(), + "neither outer long timeout or inner short timeout should be exceeded" + ); + } +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn no_generic_bounds_required_for_client_new() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::Client::new(); + let res = client.get(url).send().await; + + assert!(res.is_ok()); +} + +#[cfg(feature = "blocking")] +#[test] +fn no_generic_bounds_required_for_client_new_blocking() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + + let url = format!("http://{}", server.addr()); + + let client = reqwest::blocking::Client::new(); + let res = client.get(url).send(); + + assert!(res.is_ok()); +} diff --git a/rust/reqwest/tests/cookie.rs b/rust/reqwest/tests/cookie.rs new file mode 100644 index 0000000000..60ee93ae44 --- /dev/null +++ b/rust/reqwest/tests/cookie.rs @@ -0,0 +1,203 @@ +mod support; +use support::server; + +#[tokio::test] +async fn cookie_response_accessor() { + let server = server::http(move |_req| async move { + http::Response::builder() + .header("Set-Cookie", "key=val") + .header( + "Set-Cookie", + "expires=1; Expires=Wed, 21 Oct 2015 07:28:00 GMT", + ) + .header("Set-Cookie", "path=1; Path=/the-path") + .header("Set-Cookie", "maxage=1; Max-Age=100") + .header("Set-Cookie", "domain=1; Domain=mydomain") + .header("Set-Cookie", "secure=1; Secure") + .header("Set-Cookie", "httponly=1; HttpOnly") + .header("Set-Cookie", "samesitelax=1; SameSite=Lax") + .header("Set-Cookie", "samesitestrict=1; SameSite=Strict") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::new(); + + let url = format!("http://{}/", server.addr()); + let res = client.get(&url).send().await.unwrap(); + + let cookies = res.cookies().collect::>(); + + // key=val + assert_eq!(cookies[0].name(), "key"); + assert_eq!(cookies[0].value(), "val"); + + // expires + assert_eq!(cookies[1].name(), "expires"); + assert_eq!( + cookies[1].expires().unwrap(), + std::time::SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(1_445_412_480) + ); + + // path + assert_eq!(cookies[2].name(), "path"); + assert_eq!(cookies[2].path().unwrap(), "/the-path"); + + // max-age + assert_eq!(cookies[3].name(), "maxage"); + assert_eq!( + cookies[3].max_age().unwrap(), + std::time::Duration::from_secs(100) + ); + + // domain + assert_eq!(cookies[4].name(), "domain"); + assert_eq!(cookies[4].domain().unwrap(), "mydomain"); + + // secure + assert_eq!(cookies[5].name(), "secure"); + assert_eq!(cookies[5].secure(), true); + + // httponly + assert_eq!(cookies[6].name(), "httponly"); + assert_eq!(cookies[6].http_only(), true); + + // samesitelax + assert_eq!(cookies[7].name(), "samesitelax"); + assert!(cookies[7].same_site_lax()); + + // samesitestrict + assert_eq!(cookies[8].name(), "samesitestrict"); + assert!(cookies[8].same_site_strict()); +} + +#[tokio::test] +async fn cookie_store_simple() { + let server = server::http(move |req| async move { + if req.uri() == "/2" { + assert_eq!(req.headers()["cookie"], "key=val"); + } + http::Response::builder() + .header("Set-Cookie", "key=val; HttpOnly") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::builder() + .cookie_store(true) + .build() + .unwrap(); + + let url = format!("http://{}/", server.addr()); + client.get(&url).send().await.unwrap(); + + let url = format!("http://{}/2", server.addr()); + client.get(&url).send().await.unwrap(); +} + +#[tokio::test] +async fn cookie_store_overwrite_existing() { + let server = server::http(move |req| async move { + if req.uri() == "/" { + http::Response::builder() + .header("Set-Cookie", "key=val") + .body(Default::default()) + .unwrap() + } else if req.uri() == "/2" { + assert_eq!(req.headers()["cookie"], "key=val"); + http::Response::builder() + .header("Set-Cookie", "key=val2") + .body(Default::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/3"); + assert_eq!(req.headers()["cookie"], "key=val2"); + http::Response::default() + } + }); + + let client = reqwest::Client::builder() + .cookie_store(true) + .build() + .unwrap(); + + let url = format!("http://{}/", server.addr()); + client.get(&url).send().await.unwrap(); + + let url = format!("http://{}/2", server.addr()); + client.get(&url).send().await.unwrap(); + + let url = format!("http://{}/3", server.addr()); + client.get(&url).send().await.unwrap(); +} + +#[tokio::test] +async fn cookie_store_max_age() { + let server = server::http(move |req| async move { + assert_eq!(req.headers().get("cookie"), None); + http::Response::builder() + .header("Set-Cookie", "key=val; Max-Age=0") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::builder() + .cookie_store(true) + .build() + .unwrap(); + let url = format!("http://{}/", server.addr()); + client.get(&url).send().await.unwrap(); + client.get(&url).send().await.unwrap(); +} + +#[tokio::test] +async fn cookie_store_expires() { + let server = server::http(move |req| async move { + assert_eq!(req.headers().get("cookie"), None); + http::Response::builder() + .header( + "Set-Cookie", + "key=val; Expires=Wed, 21 Oct 2015 07:28:00 GMT", + ) + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::builder() + .cookie_store(true) + .build() + .unwrap(); + + let url = format!("http://{}/", server.addr()); + client.get(&url).send().await.unwrap(); + client.get(&url).send().await.unwrap(); +} + +#[tokio::test] +async fn cookie_store_path() { + let server = server::http(move |req| async move { + if req.uri() == "/" { + assert_eq!(req.headers().get("cookie"), None); + http::Response::builder() + .header("Set-Cookie", "key=val; Path=/subpath") + .body(Default::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/subpath"); + assert_eq!(req.headers()["cookie"], "key=val"); + http::Response::default() + } + }); + + let client = reqwest::Client::builder() + .cookie_store(true) + .build() + .unwrap(); + + let url = format!("http://{}/", server.addr()); + client.get(&url).send().await.unwrap(); + client.get(&url).send().await.unwrap(); + + let url = format!("http://{}/subpath", server.addr()); + client.get(&url).send().await.unwrap(); +} diff --git a/rust/reqwest/tests/deflate.rs b/rust/reqwest/tests/deflate.rs new file mode 100644 index 0000000000..147c363075 --- /dev/null +++ b/rust/reqwest/tests/deflate.rs @@ -0,0 +1,358 @@ +mod support; +use flate2::write::ZlibEncoder; +use flate2::Compression; +use std::io::Write; +use support::server; +use tokio::io::AsyncWriteExt; + +#[tokio::test] +async fn deflate_response() { + deflate_case(10_000, 4096).await; +} + +#[tokio::test] +async fn deflate_single_byte_chunks() { + deflate_case(10, 1).await; +} + +#[tokio::test] +async fn test_deflate_empty_body() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "HEAD"); + + http::Response::builder() + .header("content-encoding", "deflate") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::new(); + let res = client + .head(&format!("http://{}/deflate", server.addr())) + .send() + .await + .unwrap(); + + let body = res.text().await.unwrap(); + + assert_eq!(body, ""); +} + +#[tokio::test] +async fn test_accept_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "application/json"); + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("deflate")); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept", server.addr())) + .header( + reqwest::header::ACCEPT, + reqwest::header::HeaderValue::from_static("application/json"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_accept_encoding_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "*/*"); + assert_eq!(req.headers()["accept-encoding"], "identity"); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept-encoding", server.addr())) + .header( + reqwest::header::ACCEPT_ENCODING, + reqwest::header::HeaderValue::from_static("identity"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +async fn deflate_case(response_size: usize, chunk_size: usize) { + use futures_util::stream::StreamExt; + + let content: String = (0..response_size) + .into_iter() + .map(|i| format!("test {i}")) + .collect(); + + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(content.as_bytes()).unwrap(); + let deflated_content = encoder.finish().unwrap(); + + let mut response = format!( + "\ + HTTP/1.1 200 OK\r\n\ + Server: test-accept\r\n\ + Content-Encoding: deflate\r\n\ + Content-Length: {}\r\n\ + \r\n", + &deflated_content.len() + ) + .into_bytes(); + response.extend(&deflated_content); + + let server = server::http(move |req| { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("deflate")); + + let deflated = deflated_content.clone(); + async move { + let len = deflated.len(); + let stream = + futures_util::stream::unfold((deflated, 0), move |(deflated, pos)| async move { + let chunk = deflated.chunks(chunk_size).nth(pos)?.to_vec(); + + Some((chunk, (deflated, pos + 1))) + }); + + let body = reqwest::Body::wrap_stream(stream.map(Ok::<_, std::convert::Infallible>)); + + http::Response::builder() + .header("content-encoding", "deflate") + .header("content-length", len) + .body(body) + .unwrap() + } + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/deflate", server.addr())) + .send() + .await + .expect("response"); + + let body = res.text().await.expect("text"); + assert_eq!(body, content); +} + +const COMPRESSED_RESPONSE_HEADERS: &[u8] = b"HTTP/1.1 200 OK\x0d\x0a\ + Content-Type: text/plain\x0d\x0a\ + Connection: keep-alive\x0d\x0a\ + Content-Encoding: deflate\x0d\x0a"; + +const RESPONSE_CONTENT: &str = "some message here"; + +fn deflate_compress(input: &[u8]) -> Vec { + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(input).unwrap(); + encoder.finish().unwrap() +} + +#[tokio::test] +async fn test_non_chunked_non_fragmented_response() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let deflated_content = deflate_compress(RESPONSE_CONTENT.as_bytes()); + let content_length_header = + format!("Content-Length: {}\r\n\r\n", deflated_content.len()).into_bytes(); + let response = [ + COMPRESSED_RESPONSE_HEADERS, + &content_length_header, + &deflated_content, + ] + .concat(); + + client_socket + .write_all(response.as_slice()) + .await + .expect("response write_all failed"); + client_socket.flush().await.expect("response flush failed"); + }) + }); + + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_1() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let deflated_content = deflate_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + deflated_content.len() + ) + .as_bytes(), + &deflated_content, + ] + .concat(); + let response_second_part = b"\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_2() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let deflated_content = deflate_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + deflated_content.len() + ) + .as_bytes(), + &deflated_content, + b"\r\n", + ] + .concat(); + let response_second_part = b"0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_with_extra_bytes() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let deflated_content = deflate_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + deflated_content.len() + ) + .as_bytes(), + &deflated_content, + ] + .concat(); + let response_second_part = b"\r\n2ab\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + let err = res.text().await.expect_err("there must be an error"); + assert!(err.is_decode()); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} diff --git a/rust/reqwest/tests/gzip.rs b/rust/reqwest/tests/gzip.rs new file mode 100644 index 0000000000..1028ebfade --- /dev/null +++ b/rust/reqwest/tests/gzip.rs @@ -0,0 +1,357 @@ +mod support; +use flate2::write::GzEncoder; +use flate2::Compression; +use support::server; + +use std::io::Write; +use std::time::Duration; +use tokio::io::AsyncWriteExt; + +#[tokio::test] +async fn gzip_response() { + gzip_case(10_000, 4096).await; +} + +#[tokio::test] +async fn gzip_single_byte_chunks() { + gzip_case(10, 1).await; +} + +#[tokio::test] +async fn test_gzip_empty_body() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "HEAD"); + + http::Response::builder() + .header("content-encoding", "gzip") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::new(); + let res = client + .head(&format!("http://{}/gzip", server.addr())) + .send() + .await + .unwrap(); + + let body = res.text().await.unwrap(); + + assert_eq!(body, ""); +} + +#[tokio::test] +async fn test_accept_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "application/json"); + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("gzip")); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept", server.addr())) + .header( + reqwest::header::ACCEPT, + reqwest::header::HeaderValue::from_static("application/json"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_accept_encoding_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "*/*"); + assert_eq!(req.headers()["accept-encoding"], "identity"); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept-encoding", server.addr())) + .header( + reqwest::header::ACCEPT_ENCODING, + reqwest::header::HeaderValue::from_static("identity"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +async fn gzip_case(response_size: usize, chunk_size: usize) { + use futures_util::stream::StreamExt; + + let content: String = (0..response_size) + .into_iter() + .map(|i| format!("test {i}")) + .collect(); + + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(content.as_bytes()).unwrap(); + let gzipped_content = encoder.finish().unwrap(); + + let mut response = format!( + "\ + HTTP/1.1 200 OK\r\n\ + Server: test-accept\r\n\ + Content-Encoding: gzip\r\n\ + Content-Length: {}\r\n\ + \r\n", + &gzipped_content.len() + ) + .into_bytes(); + response.extend(&gzipped_content); + + let server = server::http(move |req| { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("gzip")); + + let gzipped = gzipped_content.clone(); + async move { + let len = gzipped.len(); + let stream = + futures_util::stream::unfold((gzipped, 0), move |(gzipped, pos)| async move { + let chunk = gzipped.chunks(chunk_size).nth(pos)?.to_vec(); + + Some((chunk, (gzipped, pos + 1))) + }); + + let body = reqwest::Body::wrap_stream(stream.map(Ok::<_, std::convert::Infallible>)); + + http::Response::builder() + .header("content-encoding", "gzip") + .header("content-length", len) + .body(body) + .unwrap() + } + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/gzip", server.addr())) + .send() + .await + .expect("response"); + + let body = res.text().await.expect("text"); + assert_eq!(body, content); +} + +const COMPRESSED_RESPONSE_HEADERS: &[u8] = b"HTTP/1.1 200 OK\x0d\x0a\ + Content-Type: text/plain\x0d\x0a\ + Connection: keep-alive\x0d\x0a\ + Content-Encoding: gzip\x0d\x0a"; + +const RESPONSE_CONTENT: &str = "some message here"; + +fn gzip_compress(input: &[u8]) -> Vec { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(input).unwrap(); + encoder.finish().unwrap() +} + +#[tokio::test] +async fn test_non_chunked_non_fragmented_response() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let gzipped_content = gzip_compress(RESPONSE_CONTENT.as_bytes()); + let content_length_header = + format!("Content-Length: {}\r\n\r\n", gzipped_content.len()).into_bytes(); + let response = [ + COMPRESSED_RESPONSE_HEADERS, + &content_length_header, + &gzipped_content, + ] + .concat(); + + client_socket + .write_all(response.as_slice()) + .await + .expect("response write_all failed"); + client_socket.flush().await.expect("response flush failed"); + }) + }); + + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_1() { + const DELAY_BETWEEN_RESPONSE_PARTS: Duration = Duration::from_millis(1000); + const DELAY_MARGIN: Duration = Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let gzipped_content = gzip_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + gzipped_content.len() + ) + .as_bytes(), + &gzipped_content, + ] + .concat(); + let response_second_part = b"\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_2() { + const DELAY_BETWEEN_RESPONSE_PARTS: Duration = Duration::from_millis(1000); + const DELAY_MARGIN: Duration = Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let gzipped_content = gzip_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + gzipped_content.len() + ) + .as_bytes(), + &gzipped_content, + b"\r\n", + ] + .concat(); + let response_second_part = b"0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_with_extra_bytes() { + const DELAY_BETWEEN_RESPONSE_PARTS: Duration = Duration::from_millis(1000); + const DELAY_MARGIN: Duration = Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let gzipped_content = gzip_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + gzipped_content.len() + ) + .as_bytes(), + &gzipped_content, + ] + .concat(); + let response_second_part = b"\r\n2ab\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + let err = res.text().await.expect_err("there must be an error"); + assert!(err.is_decode()); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} diff --git a/rust/reqwest/tests/http3.rs b/rust/reqwest/tests/http3.rs new file mode 100644 index 0000000000..e8b1023c79 --- /dev/null +++ b/rust/reqwest/tests/http3.rs @@ -0,0 +1,299 @@ +#![cfg(feature = "http3")] +#![cfg(not(target_arch = "wasm32"))] + +mod support; + +use http::header::CONTENT_LENGTH; +use std::error::Error; +use support::server; + +fn assert_send_sync(_: &T) {} + +#[tokio::test] +async fn http3_request_full() { + use http_body_util::BodyExt; + + let server = server::Http3::new().build(move |req| async move { + assert_eq!(req.headers()[CONTENT_LENGTH], "5"); + let reqb = req.collect().await.unwrap().to_bytes(); + assert_eq!(reqb, "hello"); + http::Response::default() + }); + + let url = format!("https://{}/content-length", server.addr()); + let res_fut = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .build() + .expect("client builder") + .post(url) + .version(http::Version::HTTP_3) + .body("hello") + .send(); + + assert_send_sync(&res_fut); + let res = res_fut.await.expect("request"); + + assert_eq!(res.version(), http::Version::HTTP_3); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +async fn find_free_tcp_addr() -> std::net::SocketAddr { + let listener = tokio::net::TcpListener::bind("[::1]:0").await.unwrap(); + listener.local_addr().unwrap() +} + +#[cfg(feature = "http3")] +#[tokio::test] +async fn http3_test_failed_connection() { + let addr = find_free_tcp_addr().await; + let port = addr.port(); + + let url = format!("https://[::1]:{port}/"); + let client = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .http3_max_idle_timeout(std::time::Duration::from_millis(20)) + .build() + .expect("client builder"); + + let err = client + .get(&url) + .version(http::Version::HTTP_3) + .send() + .await + .unwrap_err(); + + let err = err + .source() + .unwrap() + .source() + .unwrap() + .downcast_ref::() + .unwrap(); + assert_eq!(*err, quinn::ConnectionError::TimedOut); + + let err = client + .get(&url) + .version(http::Version::HTTP_3) + .send() + .await + .unwrap_err(); + + let err = err + .source() + .unwrap() + .source() + .unwrap() + .downcast_ref::() + .unwrap(); + assert_eq!(*err, quinn::ConnectionError::TimedOut); + + let server = server::Http3::new() + .with_addr(addr) + .build(|_| async { http::Response::default() }); + + let res = client + .post(&url) + .version(http::Version::HTTP_3) + .body("hello") + .send() + .await + .expect("request"); + + assert_eq!(res.version(), http::Version::HTTP_3); + assert_eq!(res.status(), reqwest::StatusCode::OK); + drop(server); +} + +#[cfg(feature = "http3")] +#[tokio::test] +async fn http3_test_concurrent_request() { + let server = server::Http3::new().build(|req| async move { + let mut res = http::Response::default(); + *res.body_mut() = reqwest::Body::from(format!("hello {}", req.uri().path())); + res + }); + let addr = server.addr(); + + let client = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .http3_max_idle_timeout(std::time::Duration::from_millis(20)) + .build() + .expect("client builder"); + + let mut tasks = vec![]; + for i in 0..10 { + let client = client.clone(); + tasks.push(async move { + let url = format!("https://{}/{}", addr, i); + + client + .post(&url) + .version(http::Version::HTTP_3) + .send() + .await + .expect("request") + }); + } + + let handlers = tasks.into_iter().map(tokio::spawn).collect::>(); + + for (i, handler) in handlers.into_iter().enumerate() { + let result = handler.await.unwrap(); + + assert_eq!(result.version(), http::Version::HTTP_3); + assert_eq!(result.status(), reqwest::StatusCode::OK); + + let body = result.text().await.unwrap(); + assert_eq!(body, format!("hello /{}", i)); + } + + drop(server); +} + +#[cfg(feature = "http3")] +#[tokio::test] +async fn http3_test_reconnection() { + use std::error::Error; + + use h3::error::{ConnectionError, StreamError}; + + let server = server::Http3::new().build(|_| async { http::Response::default() }); + let addr = server.addr(); + + let url = format!("https://{}/", addr); + let client = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .http3_max_idle_timeout(std::time::Duration::from_millis(20)) + .build() + .expect("client builder"); + + let res = client + .post(&url) + .version(http::Version::HTTP_3) + .send() + .await + .expect("request"); + + assert_eq!(res.version(), http::Version::HTTP_3); + assert_eq!(res.status(), reqwest::StatusCode::OK); + drop(server); + + let err = client + .get(&url) + .version(http::Version::HTTP_3) + .send() + .await + .unwrap_err(); + + let err = err + .source() + .unwrap() + .source() + .unwrap() + .downcast_ref::() + .unwrap(); + + assert!(matches!( + err, + StreamError::ConnectionError { + 0: ConnectionError::Timeout { .. }, + .. + } + )); + + let server = server::Http3::new() + .with_addr(addr) + .build(|_| async { http::Response::default() }); + + let res = client + .post(&url) + .version(http::Version::HTTP_3) + .body("hello") + .send() + .await + .expect("request"); + + assert_eq!(res.version(), http::Version::HTTP_3); + assert_eq!(res.status(), reqwest::StatusCode::OK); + drop(server); +} + +#[cfg(all(feature = "http3", feature = "stream"))] +#[tokio::test] +async fn http3_request_stream() { + use http_body_util::BodyExt; + + let server = server::Http3::new().build(move |req| async move { + let reqb = req.collect().await.unwrap().to_bytes(); + assert_eq!(reqb, "hello world"); + http::Response::default() + }); + + let url = format!("https://{}", server.addr()); + let body = reqwest::Body::wrap_stream(futures_util::stream::iter(vec![ + Ok::<_, std::convert::Infallible>("hello"), + Ok::<_, std::convert::Infallible>(" "), + Ok::<_, std::convert::Infallible>("world"), + ])); + + let res = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .build() + .expect("client builder") + .post(url) + .version(http::Version::HTTP_3) + .body(body) + .send() + .await + .expect("request"); + + assert_eq!(res.version(), http::Version::HTTP_3); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(all(feature = "http3", feature = "stream"))] +#[tokio::test] +async fn http3_request_stream_error() { + use http_body_util::BodyExt; + + let server = server::Http3::new().build(move |req| async move { + // HTTP/3 response can start and finish before the entire request body has been received. + // To avoid prematurely terminating the session, collect full request body before responding. + let _ = req.collect().await; + + http::Response::default() + }); + + let url = format!("https://{}", server.addr()); + let body = reqwest::Body::wrap_stream(futures_util::stream::iter(vec![ + Ok::<_, std::io::Error>("first chunk"), + Err::<_, std::io::Error>(std::io::Error::other("oh no!")), + ])); + + let res = reqwest::Client::builder() + .http3_prior_knowledge() + .danger_accept_invalid_certs(true) + .build() + .expect("client builder") + .post(url) + .version(http::Version::HTTP_3) + .body(body) + .send() + .await; + + let err = res.unwrap_err(); + assert!(err.is_request()); + let err = err + .source() + .unwrap() + .source() + .unwrap() + .downcast_ref::() + .unwrap(); + assert!(err.is_body()); +} diff --git a/rust/reqwest/tests/multipart.rs b/rust/reqwest/tests/multipart.rs new file mode 100644 index 0000000000..5906471a1b --- /dev/null +++ b/rust/reqwest/tests/multipart.rs @@ -0,0 +1,236 @@ +#![cfg(not(target_arch = "wasm32"))] +mod support; +use http_body_util::BodyExt; +use support::server; + +#[tokio::test] +async fn text_part() { + let _ = env_logger::try_init(); + + let form = reqwest::multipart::Form::new().text("foo", "bar"); + + let expected_body = format!( + "\ + --{0}\r\n\ + Content-Disposition: form-data; name=\"foo\"\r\n\r\n\ + bar\r\n\ + --{0}--\r\n\ + ", + form.boundary() + ); + + let ct = format!("multipart/form-data; boundary={}", form.boundary()); + + let server = server::http(move |mut req| { + let ct = ct.clone(); + let expected_body = expected_body.clone(); + async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-type"], ct); + assert_eq!( + req.headers()["content-length"], + expected_body.len().to_string() + ); + + let mut full: Vec = Vec::new(); + while let Some(item) = req.body_mut().frame().await { + full.extend(&*item.unwrap().into_data().unwrap()); + } + + assert_eq!(full, expected_body.as_bytes()); + + http::Response::default() + } + }); + + let url = format!("http://{}/multipart/1", server.addr()); + + let res = reqwest::Client::new() + .post(&url) + .multipart(form) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(feature = "stream")] +#[tokio::test] +async fn stream_part() { + use futures_util::stream; + use std::future; + + let _ = env_logger::try_init(); + + let stream = reqwest::Body::wrap_stream(stream::once(future::ready(Ok::<_, reqwest::Error>( + "part1 part2".to_owned(), + )))); + let part = reqwest::multipart::Part::stream(stream); + + let form = reqwest::multipart::Form::new() + .text("foo", "bar") + .part("part_stream", part); + + let expected_body = format!( + "\ + --{0}\r\n\ + Content-Disposition: form-data; name=\"foo\"\r\n\ + \r\n\ + bar\r\n\ + --{0}\r\n\ + Content-Disposition: form-data; name=\"part_stream\"\r\n\ + \r\n\ + part1 part2\r\n\ + --{0}--\r\n\ + ", + form.boundary() + ); + + let ct = format!("multipart/form-data; boundary={}", form.boundary()); + + let server = server::http(move |req| { + let ct = ct.clone(); + let expected_body = expected_body.clone(); + async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-type"], ct); + assert_eq!(req.headers()["transfer-encoding"], "chunked"); + + let full = req.collect().await.unwrap().to_bytes(); + + assert_eq!(full, expected_body.as_bytes()); + + http::Response::default() + } + }); + + let url = format!("http://{}/multipart/1", server.addr()); + + let client = reqwest::Client::new(); + + let res = client + .post(&url) + .multipart(form) + .send() + .await + .expect("Failed to post multipart"); + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(feature = "blocking")] +#[test] +fn blocking_file_part() { + let _ = env_logger::try_init(); + + let form = reqwest::blocking::multipart::Form::new() + .file("foo", "Cargo.lock") + .unwrap(); + + let fcontents = std::fs::read_to_string("Cargo.lock").unwrap(); + + let expected_body = format!( + "\ + --{0}\r\n\ + Content-Disposition: form-data; name=\"foo\"; filename=\"Cargo.lock\"\r\n\ + Content-Type: application/octet-stream\r\n\r\n\ + {1}\r\n\ + --{0}--\r\n\ + ", + form.boundary(), + fcontents + ); + + let ct = format!("multipart/form-data; boundary={}", form.boundary()); + + let server = server::http(move |req| { + let ct = ct.clone(); + let expected_body = expected_body.clone(); + async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-type"], ct); + // files know their exact size + assert_eq!( + req.headers()["content-length"], + expected_body.len().to_string() + ); + + let full = req.collect().await.unwrap().to_bytes(); + + assert_eq!(full, expected_body.as_bytes()); + + http::Response::default() + } + }); + + let url = format!("http://{}/multipart/2", server.addr()); + + let res = reqwest::blocking::Client::new() + .post(&url) + .multipart(form) + .send() + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(feature = "stream")] +#[tokio::test] +async fn async_impl_file_part() { + let _ = env_logger::try_init(); + + let form = reqwest::multipart::Form::new() + .file("foo", "Cargo.lock") + .await + .unwrap(); + + let fcontents = std::fs::read_to_string("Cargo.lock").unwrap(); + + let expected_body = format!( + "\ + --{0}\r\n\ + Content-Disposition: form-data; name=\"foo\"; filename=\"Cargo.lock\"\r\n\ + Content-Type: application/octet-stream\r\n\r\n\ + {1}\r\n\ + --{0}--\r\n\ + ", + form.boundary(), + fcontents + ); + + let ct = format!("multipart/form-data; boundary={}", form.boundary()); + + let server = server::http(move |req| { + let ct = ct.clone(); + let expected_body = expected_body.clone(); + async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-type"], ct); + // files know their exact size + assert_eq!( + req.headers()["content-length"], + expected_body.len().to_string() + ); + let full = req.collect().await.unwrap().to_bytes(); + + assert_eq!(full, expected_body.as_bytes()); + + http::Response::default() + } + }); + + let url = format!("http://{}/multipart/3", server.addr()); + + let res = reqwest::Client::new() + .post(&url) + .multipart(form) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} diff --git a/rust/reqwest/tests/not_tcp.rs b/rust/reqwest/tests/not_tcp.rs new file mode 100644 index 0000000000..6023b6bdd6 --- /dev/null +++ b/rust/reqwest/tests/not_tcp.rs @@ -0,0 +1,56 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +#![cfg(unix)] + +mod support; + +#[tokio::test] +async fn unix_socket_works() { + let server = support::not_tcp::uds(move |_| async move { http::Response::default() }); + + let res = reqwest::Client::builder() + .unix_socket(server.path()) + .build() + .unwrap() + .get("http://yolo.local/foo") + .send() + .await + .expect("send request"); + + assert_eq!(res.status(), 200); +} + +#[tokio::test] +async fn unix_socket_ignores_proxies() { + let server = support::not_tcp::uds(move |_| async move { http::Response::default() }); + + let res = reqwest::Client::builder() + .unix_socket(server.path()) + .proxy(reqwest::Proxy::http("http://dont.use.me.local").unwrap()) + .build() + .unwrap() + .get("http://yolo.local/foo") + .send() + .await + .expect("send request"); + + assert_eq!(res.status(), 200); +} + +// TODO: enable when test server supports TLS +#[ignore] +#[tokio::test] +async fn unix_socket_uses_tls() { + let server = support::not_tcp::uds(move |_| async move { http::Response::default() }); + + let res = reqwest::Client::builder() + .unix_socket(server.path()) + .build() + .unwrap() + .get("https://yolo.local/foo") + .send() + .await + .expect("send request"); + + assert_eq!(res.status(), 200); +} diff --git a/rust/reqwest/tests/proxy.rs b/rust/reqwest/tests/proxy.rs new file mode 100644 index 0000000000..8dbdde0210 --- /dev/null +++ b/rust/reqwest/tests/proxy.rs @@ -0,0 +1,445 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use support::server; + +use std::env; + +use std::sync::LazyLock; +use tokio::sync::Mutex; + +// serialize tests that read from / write to environment variables +static HTTP_PROXY_ENV_MUTEX: LazyLock> = LazyLock::new(|| Mutex::new(())); + +#[tokio::test] +async fn http_proxy() { + let url = "http://hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + + async { http::Response::default() } + }); + + let proxy = format!("http://{}", server.addr()); + + let res = reqwest::Client::builder() + .proxy(reqwest::Proxy::http(&proxy).unwrap()) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn http_proxy_basic_auth() { + let url = "http://hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + + async { http::Response::default() } + }); + + let proxy = format!("http://{}", server.addr()); + + let res = reqwest::Client::builder() + .proxy( + reqwest::Proxy::http(&proxy) + .unwrap() + .basic_auth("Aladdin", "open sesame"), + ) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn http_proxy_basic_auth_parsed() { + let url = "http://hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + + async { http::Response::default() } + }); + + let proxy = format!("http://Aladdin:open sesame@{}", server.addr()); + + let res = reqwest::Client::builder() + .proxy(reqwest::Proxy::http(&proxy).unwrap()) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn system_http_proxy_basic_auth_parsed() { + let url = "http://hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuc2VzYW1l" + ); + + async { http::Response::default() } + }); + + // avoid races with other tests that change "http_proxy" + let _env_lock = HTTP_PROXY_ENV_MUTEX.lock().await; + + // save system setting first. + let system_proxy = env::var("http_proxy"); + + // set-up http proxy. + env::set_var( + "http_proxy", + format!("http://Aladdin:opensesame@{}", server.addr()), + ); + + let res = reqwest::Client::builder() + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + + // reset user setting. + match system_proxy { + Err(_) => env::remove_var("http_proxy"), + Ok(proxy) => env::set_var("http_proxy", proxy), + } +} + +#[tokio::test] +async fn test_no_proxy() { + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), "/4"); + + async { http::Response::default() } + }); + let proxy = format!("http://{}", server.addr()); + let url = format!("http://{}/4", server.addr()); + + // set up proxy and use no_proxy to clear up client builder proxies. + let res = reqwest::Client::builder() + .proxy(reqwest::Proxy::http(&proxy).unwrap()) + .no_proxy() + .build() + .unwrap() + .get(&url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), &url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_custom_headers() { + let url = "http://hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + async { http::Response::default() } + }); + + let proxy = format!("http://{}", server.addr()); + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert( + // reqwest::header::HeaderName::from_static("Proxy-Authorization"), + reqwest::header::PROXY_AUTHORIZATION, + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==".parse().unwrap(), + ); + + let res = reqwest::Client::builder() + .proxy(reqwest::Proxy::http(&proxy).unwrap().headers(headers)) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_using_system_proxy() { + let url = "http://not.a.real.sub.hyper.rs.local/prox"; + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "not.a.real.sub.hyper.rs.local"); + + async { http::Response::default() } + }); + + // avoid races with other tests that change "http_proxy" + let _env_lock = HTTP_PROXY_ENV_MUTEX.lock().await; + + // save system setting first. + let system_proxy = env::var("http_proxy"); + // set-up http proxy. + env::set_var("http_proxy", format!("http://{}", server.addr())); + + // system proxy is used by default + let res = reqwest::get(url).await.unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + + // reset user setting. + match system_proxy { + Err(_) => env::remove_var("http_proxy"), + Ok(proxy) => env::set_var("http_proxy", proxy), + } +} + +#[tokio::test] +async fn http_over_http() { + let url = "http://hyper.rs.local/prox"; + + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + + async { http::Response::default() } + }); + + let proxy = format!("http://{}", server.addr()); + + let res = reqwest::Client::builder() + .proxy(reqwest::Proxy::http(&proxy).unwrap()) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(feature = "__tls")] +#[tokio::test] +async fn tunnel_detects_auth_required() { + let url = "https://hyper.rs.local/prox"; + + let server = server::http(move |req| { + assert_eq!(req.method(), "CONNECT"); + assert_eq!(req.uri(), "hyper.rs.local:443"); + assert!(!req + .headers() + .contains_key(http::header::PROXY_AUTHORIZATION)); + + async { + let mut res = http::Response::default(); + *res.status_mut() = http::StatusCode::PROXY_AUTHENTICATION_REQUIRED; + res + } + }); + + let proxy = format!("http://{}", server.addr()); + + let err = reqwest::Client::builder() + .proxy(reqwest::Proxy::https(&proxy).unwrap()) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap_err(); + + let err = support::error::inspect(err).pop().unwrap(); + assert!( + err.contains("auth"), + "proxy auth err expected, got: {:?}", + err + ); +} + +#[cfg(feature = "__tls")] +#[tokio::test] +async fn tunnel_includes_proxy_auth() { + let url = "https://hyper.rs.local/prox"; + + let server = server::http(move |req| { + assert_eq!(req.method(), "CONNECT"); + assert_eq!(req.uri(), "hyper.rs.local:443"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + + async { + // return 400 to not actually deal with TLS tunneling + let mut res = http::Response::default(); + *res.status_mut() = http::StatusCode::BAD_REQUEST; + res + } + }); + + let proxy = format!("http://Aladdin:open%20sesame@{}", server.addr()); + + let err = reqwest::Client::builder() + .proxy(reqwest::Proxy::https(&proxy).unwrap()) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap_err(); + + let err = support::error::inspect(err).pop().unwrap(); + assert!( + err.contains("unsuccessful"), + "tunnel unsuccessful expected, got: {:?}", + err + ); +} + +#[cfg(feature = "__tls")] +#[tokio::test] +async fn tunnel_includes_user_agent() { + let url = "https://hyper.rs.local/prox"; + + let server = server::http(move |req| { + assert_eq!(req.method(), "CONNECT"); + assert_eq!(req.uri(), "hyper.rs.local:443"); + assert_eq!(req.headers()["user-agent"], "reqwest-test"); + + async { + // return 400 to not actually deal with TLS tunneling + let mut res = http::Response::default(); + *res.status_mut() = http::StatusCode::BAD_REQUEST; + res + } + }); + + let proxy = format!("http://{}", server.addr()); + + let err = reqwest::Client::builder() + .proxy(reqwest::Proxy::https(&proxy).unwrap()) + .user_agent("reqwest-test") + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap_err(); + + let err = support::error::inspect(err).pop().unwrap(); + assert!( + err.contains("unsuccessful"), + "tunnel unsuccessful expected, got: {:?}", + err + ); +} + +#[tokio::test] +async fn tunnel_includes_proxy_auth_with_multiple_proxies() { + let url = "http://hyper.rs.local/prox"; + let server1 = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.uri(), url); + assert_eq!(req.headers()["host"], "hyper.rs.local"); + assert_eq!( + req.headers()["proxy-authorization"], + "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" + ); + assert_eq!(req.headers()["proxy-header"], "proxy2"); + async { http::Response::default() } + }); + + let proxy_url = format!("http://Aladdin:open%20sesame@{}", server1.addr()); + + let mut headers1 = reqwest::header::HeaderMap::new(); + headers1.insert("proxy-header", "proxy1".parse().unwrap()); + + let mut headers2 = reqwest::header::HeaderMap::new(); + headers2.insert("proxy-header", "proxy2".parse().unwrap()); + + let client = reqwest::Client::builder() + // When processing proxy headers, the first one is iterated, + // and if the current URL does not match, the proxy is skipped + .proxy( + reqwest::Proxy::https(&proxy_url) + .unwrap() + .headers(headers1.clone()), + ) + // When processing proxy headers, the second one is iterated, + // and for the current URL matching, the proxy will be used + .proxy( + reqwest::Proxy::http(&proxy_url) + .unwrap() + .headers(headers2.clone()), + ) + .build() + .unwrap(); + + let res = client.get(url).send().await.unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); + + let client = reqwest::Client::builder() + // When processing proxy headers, the first one is iterated, + // and for the current URL matching, the proxy will be used + .proxy(reqwest::Proxy::http(&proxy_url).unwrap().headers(headers2)) + // When processing proxy headers, the second one is iterated, + // and if the current URL does not match, the proxy is skipped + .proxy(reqwest::Proxy::https(&proxy_url).unwrap().headers(headers1)) + .build() + .unwrap(); + + let res = client.get(url).send().await.unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} diff --git a/rust/reqwest/tests/redirect.rs b/rust/reqwest/tests/redirect.rs new file mode 100644 index 0000000000..186717358a --- /dev/null +++ b/rust/reqwest/tests/redirect.rs @@ -0,0 +1,543 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use http_body_util::BodyExt; +use reqwest::Body; +use support::server; + +#[tokio::test] +async fn test_redirect_301_and_302_and_303_changes_post_to_get() { + let client = reqwest::Client::new(); + let codes = [301u16, 302, 303]; + + for &code in &codes { + let redirect = server::http(move |req| async move { + if req.method() == "POST" { + assert_eq!(req.uri(), &*format!("/{code}")); + http::Response::builder() + .status(code) + .header("location", "/dst") + .header("server", "test-redirect") + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.method(), "GET"); + + http::Response::builder() + .header("server", "test-dst") + .body(Body::default()) + .unwrap() + } + }); + + let url = format!("http://{}/{}", redirect.addr(), code); + let dst = format!("http://{}/{}", redirect.addr(), "dst"); + let res = client.post(&url).send().await.unwrap(); + assert_eq!(res.url().as_str(), dst); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!( + res.headers().get(reqwest::header::SERVER).unwrap(), + &"test-dst" + ); + } +} + +#[tokio::test] +async fn test_redirect_307_and_308_tries_to_get_again() { + let client = reqwest::Client::new(); + let codes = [307u16, 308]; + for &code in &codes { + let redirect = server::http(move |req| async move { + assert_eq!(req.method(), "GET"); + if req.uri() == &*format!("/{code}") { + http::Response::builder() + .status(code) + .header("location", "/dst") + .header("server", "test-redirect") + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/dst"); + + http::Response::builder() + .header("server", "test-dst") + .body(Body::default()) + .unwrap() + } + }); + + let url = format!("http://{}/{}", redirect.addr(), code); + let dst = format!("http://{}/{}", redirect.addr(), "dst"); + let res = client.get(&url).send().await.unwrap(); + assert_eq!(res.url().as_str(), dst); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!( + res.headers().get(reqwest::header::SERVER).unwrap(), + &"test-dst" + ); + } +} + +#[tokio::test] +async fn test_redirect_307_and_308_tries_to_post_again() { + let _ = env_logger::try_init(); + let client = reqwest::Client::new(); + let codes = [307u16, 308]; + for &code in &codes { + let redirect = server::http(move |mut req| async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.headers()["content-length"], "5"); + + let data = req + .body_mut() + .frame() + .await + .unwrap() + .unwrap() + .into_data() + .unwrap(); + assert_eq!(&*data, b"Hello"); + + if req.uri() == &*format!("/{code}") { + http::Response::builder() + .status(code) + .header("location", "/dst") + .header("server", "test-redirect") + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/dst"); + + http::Response::builder() + .header("server", "test-dst") + .body(Body::default()) + .unwrap() + } + }); + + let url = format!("http://{}/{}", redirect.addr(), code); + let dst = format!("http://{}/{}", redirect.addr(), "dst"); + let res = client.post(&url).body("Hello").send().await.unwrap(); + assert_eq!(res.url().as_str(), dst); + assert_eq!(res.status(), reqwest::StatusCode::OK); + assert_eq!( + res.headers().get(reqwest::header::SERVER).unwrap(), + &"test-dst" + ); + } +} + +#[cfg(feature = "blocking")] +#[test] +fn test_redirect_307_does_not_try_if_reader_cannot_reset() { + let client = reqwest::blocking::Client::new(); + let codes = [307u16, 308]; + for &code in &codes { + let redirect = server::http(move |mut req| async move { + assert_eq!(req.method(), "POST"); + assert_eq!(req.uri(), &*format!("/{code}")); + assert_eq!(req.headers()["transfer-encoding"], "chunked"); + + let data = req + .body_mut() + .frame() + .await + .unwrap() + .unwrap() + .into_data() + .unwrap(); + assert_eq!(&*data, b"Hello"); + + http::Response::builder() + .status(code) + .header("location", "/dst") + .header("server", "test-redirect") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/{}", redirect.addr(), code); + let res = client + .post(&url) + .body(reqwest::blocking::Body::new(&b"Hello"[..])) + .send() + .unwrap(); + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), code); + } +} + +#[tokio::test] +async fn test_redirect_removes_sensitive_headers() { + use tokio::sync::watch; + + let (tx, rx) = watch::channel::>(None); + + let end_server = server::http(move |req| { + let mut rx = rx.clone(); + async move { + assert_eq!(req.headers().get("cookie"), None); + + rx.changed().await.unwrap(); + let mid_addr = rx.borrow().unwrap(); + assert_eq!( + req.headers()["referer"], + format!("http://{mid_addr}/sensitive") + ); + http::Response::default() + } + }); + + let end_addr = end_server.addr(); + + let mid_server = server::http(move |req| async move { + assert_eq!(req.headers()["cookie"], "foo=bar"); + http::Response::builder() + .status(302) + .header("location", format!("http://{end_addr}/end")) + .body(Body::default()) + .unwrap() + }); + + tx.send(Some(mid_server.addr())).unwrap(); + + reqwest::Client::builder() + .build() + .unwrap() + .get(&format!("http://{}/sensitive", mid_server.addr())) + .header( + reqwest::header::COOKIE, + reqwest::header::HeaderValue::from_static("foo=bar"), + ) + .send() + .await + .unwrap(); +} + +#[tokio::test] +async fn test_redirect_policy_can_return_errors() { + let server = server::http(move |req| async move { + assert_eq!(req.uri(), "/loop"); + http::Response::builder() + .status(302) + .header("location", "/loop") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/loop", server.addr()); + let err = reqwest::get(&url).await.unwrap_err(); + assert!(err.is_redirect()); +} + +#[tokio::test] +async fn test_redirect_policy_can_stop_redirects_without_an_error() { + let server = server::http(move |req| async move { + assert_eq!(req.uri(), "/no-redirect"); + http::Response::builder() + .status(302) + .header("location", "/dont") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/no-redirect", server.addr()); + + let res = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .build() + .unwrap() + .get(&url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::FOUND); +} + +#[tokio::test] +async fn test_referer_is_not_set_if_disabled() { + let server = server::http(move |req| async move { + if req.uri() == "/no-refer" { + http::Response::builder() + .status(302) + .header("location", "/dst") + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/dst"); + assert_eq!(req.headers().get("referer"), None); + + http::Response::default() + } + }); + + reqwest::Client::builder() + .referer(false) + .build() + .unwrap() + .get(&format!("http://{}/no-refer", server.addr())) + .send() + .await + .unwrap(); +} + +#[tokio::test] +async fn test_invalid_location_stops_redirect_gh484() { + let server = server::http(move |_req| async move { + http::Response::builder() + .status(302) + .header("location", "http://www.yikes{KABOOM}") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/yikes", server.addr()); + + let res = reqwest::get(&url).await.unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::FOUND); +} + +#[tokio::test] +async fn test_invalid_scheme_is_rejected() { + let server = server::http(move |_req| async move { + http::Response::builder() + .status(302) + .header("location", "htt://www.yikes.com/") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/yikes", server.addr()); + + let err = reqwest::get(&url).await.unwrap_err(); + assert!(err.is_builder()); +} + +#[cfg(feature = "cookies")] +#[tokio::test] +async fn test_redirect_302_with_set_cookies() { + let code = 302; + let server = server::http(move |req| async move { + if req.uri() == "/302" { + http::Response::builder() + .status(302) + .header("location", "/dst") + .header("set-cookie", "key=value") + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.uri(), "/dst"); + assert_eq!(req.headers()["cookie"], "key=value"); + http::Response::default() + } + }); + + let url = format!("http://{}/{}", server.addr(), code); + let dst = format!("http://{}/{}", server.addr(), "dst"); + + let client = reqwest::ClientBuilder::new() + .cookie_store(true) + .build() + .unwrap(); + let res = client.get(&url).send().await.unwrap(); + + assert_eq!(res.url().as_str(), dst); + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[cfg(feature = "__rustls")] +#[tokio::test] +#[ignore = "Needs TLS support in the test server"] +async fn test_redirect_https_only_enforced_gh1312() { + let server = server::http(move |_req| async move { + http::Response::builder() + .status(302) + .header("location", "http://insecure") + .body(Body::default()) + .unwrap() + }); + + let url = format!("https://{}/yikes", server.addr()); + + let res = reqwest::Client::builder() + .danger_accept_invalid_certs(true) + .use_rustls_tls() + .https_only(true) + .build() + .expect("client builder") + .get(&url) + .send() + .await; + + let err = res.unwrap_err(); + assert!(err.is_redirect()); +} + +#[tokio::test] +async fn test_redirect_limit_to_1() { + let server = server::http(move |req| async move { + let i: i32 = req + .uri() + .path() + .rsplit('/') + .next() + .unwrap() + .parse::() + .unwrap(); + assert!(req.uri().path().ends_with(&format!("/redirect/{i}"))); + http::Response::builder() + .status(302) + .header("location", format!("/redirect/{}", i + 1)) + .body(Body::default()) + .unwrap() + }); + // The number at the end of the uri indicates the total number of redirections + let url = format!("http://{}/redirect/0", server.addr()); + + let client = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::limited(1)) + .build() + .unwrap(); + let res = client.get(&url).send().await.unwrap_err(); + // If the maximum limit is 1, then the final uri should be /redirect/1 + assert_eq!( + res.url().unwrap().as_str(), + format!("http://{}/redirect/1", server.addr()).as_str() + ); + assert!(res.is_redirect()); +} + +#[tokio::test] +async fn test_redirect_custom() { + let server = server::http(move |req| async move { + assert!(req.uri().path().ends_with("/foo")); + http::Response::builder() + .status(302) + .header("location", "/should_not_be_called") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/foo", server.addr()); + + let res = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::custom(|attempt| { + if attempt.url().path().ends_with("/should_not_be_called") { + attempt.stop() + } else { + attempt.follow() + } + })) + .build() + .unwrap() + .get(&url) + .send() + .await + .unwrap(); + + assert_eq!(res.url().as_str(), url); + assert_eq!(res.status(), reqwest::StatusCode::FOUND); +} + +#[tokio::test] +async fn test_scheme_only_check_after_policy_return_follow() { + let server = server::http(move |_| async move { + http::Response::builder() + .status(302) + .header("location", "htt://www.yikes.com/") + .body(Body::default()) + .unwrap() + }); + + let url = format!("http://{}/yikes", server.addr()); + let res = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::custom(|attempt| attempt.stop())) + .build() + .unwrap() + .get(&url) + .send() + .await; + + assert!(res.is_ok()); + assert_eq!(res.unwrap().status(), reqwest::StatusCode::FOUND); + + let res = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::custom(|attempt| { + attempt.follow() + })) + .build() + .unwrap() + .get(&url) + .send() + .await; + + assert!(res.is_err()); + assert!(res.unwrap_err().is_builder()); +} + +#[tokio::test] +async fn test_redirect_301_302_303_empty_payload_headers() { + let client = reqwest::Client::new(); + let codes = [301u16, 302, 303]; + for &code in &codes { + let redirect = server::http(move |mut req| async move { + if req.method() == "POST" { + let data = req + .body_mut() + .frame() + .await + .unwrap() + .unwrap() + .into_data() + .unwrap(); + + assert_eq!(&*data, b"Hello"); + if req.headers().get(reqwest::header::CONTENT_LENGTH).is_some() { + assert_eq!(req.headers()[reqwest::header::CONTENT_LENGTH], "5"); + } + assert_eq!(req.uri(), &*format!("/{code}")); + + http::Response::builder() + .header("location", "/dst") + .header("server", "test-dst") + .status(code) + .body(Body::default()) + .unwrap() + } else { + assert_eq!(req.method(), "GET"); + assert!(req.headers().get(reqwest::header::CONTENT_TYPE).is_none()); + assert!(req.headers().get(reqwest::header::CONTENT_LENGTH).is_none()); + assert!(req + .headers() + .get(reqwest::header::CONTENT_ENCODING) + .is_none()); + http::Response::builder() + .header("server", "test-dst") + .body(Body::default()) + .unwrap() + } + }); + + let url = format!("http://{}/{}", redirect.addr(), code); + let dst = format!("http://{}/{}", redirect.addr(), "dst"); + let res = client + .post(&url) + .body("Hello") + .header(reqwest::header::CONTENT_TYPE, "text/plain") + .header(reqwest::header::CONTENT_LENGTH, "5") + .header(reqwest::header::CONTENT_ENCODING, "identity") + .send() + .await + .unwrap(); + assert_eq!(res.url().as_str(), dst); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get(reqwest::header::SERVER).unwrap(), + &"test-dst" + ); + } +} diff --git a/rust/reqwest/tests/retry.rs b/rust/reqwest/tests/retry.rs new file mode 100644 index 0000000000..15d264e583 --- /dev/null +++ b/rust/reqwest/tests/retry.rs @@ -0,0 +1,144 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use support::server; + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; + +#[tokio::test] +async fn retries_apply_in_scope() { + let _ = env_logger::try_init(); + let cnt = Arc::new(AtomicUsize::new(0)); + let server = server::http(move |_req| { + let cnt = cnt.clone(); + async move { + if cnt.fetch_add(1, Ordering::Relaxed) == 0 { + // first req is bad + http::Response::builder() + .status(http::StatusCode::SERVICE_UNAVAILABLE) + .body(Default::default()) + .unwrap() + } else { + http::Response::default() + } + } + }); + + let scope = server.addr().ip().to_string(); + let retries = reqwest::retry::for_host(scope).classify_fn(|req_rep| { + if req_rep.status() == Some(http::StatusCode::SERVICE_UNAVAILABLE) { + req_rep.retryable() + } else { + req_rep.success() + } + }); + + let url = format!("http://{}", server.addr()); + let resp = reqwest::Client::builder() + .retry(retries) + .build() + .unwrap() + .get(url) + .send() + .await + .unwrap(); + + assert_eq!(resp.status(), 200); +} + +#[cfg(feature = "http2")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn default_retries_have_a_limit() { + let _ = env_logger::try_init(); + + let server = server::http_with_config( + move |req| async move { + assert_eq!(req.version(), http::Version::HTTP_2); + // refused forever + Err(h2::Error::from(h2::Reason::REFUSED_STREAM)) + }, + |_| {}, + ); + + let client = reqwest::Client::builder() + .http2_prior_knowledge() + .build() + .unwrap(); + + let url = format!("http://{}", server.addr()); + + let _err = client.get(url).send().await.unwrap_err(); +} + +// NOTE: using the default "current_thread" runtime here would cause the test to +// fail, because the only thread would block until `panic_rx` receives a +// notification while the client needs to be driven to get the graceful shutdown +// done. +#[cfg(feature = "http2")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn highly_concurrent_requests_to_http2_server_with_low_max_concurrent_streams() { + let client = reqwest::Client::builder() + .http2_prior_knowledge() + .build() + .unwrap(); + + let server = server::http_with_config( + move |req| async move { + assert_eq!(req.version(), http::Version::HTTP_2); + Ok::<_, std::convert::Infallible>(http::Response::default()) + }, + |builder| { + builder.http2().max_concurrent_streams(1); + }, + ); + + let url = format!("http://{}", server.addr()); + + let futs = (0..100).map(|_| { + let client = client.clone(); + let url = url.clone(); + async move { + let res = client.get(&url).send().await.unwrap(); + assert_eq!(res.status(), reqwest::StatusCode::OK); + } + }); + futures_util::future::join_all(futs).await; +} + +#[cfg(feature = "http2")] +#[tokio::test] +async fn highly_concurrent_requests_to_slow_http2_server_with_low_max_concurrent_streams() { + use support::delay_server; + + let client = reqwest::Client::builder() + .http2_prior_knowledge() + .build() + .unwrap(); + + let server = delay_server::Server::new( + move |req| async move { + assert_eq!(req.version(), http::Version::HTTP_2); + http::Response::default() + }, + |http| { + http.http2().max_concurrent_streams(1); + }, + std::time::Duration::from_secs(2), + ) + .await; + + let url = format!("http://{}", server.addr()); + + let futs = (0..100).map(|_| { + let client = client.clone(); + let url = url.clone(); + async move { + let res = client.get(&url).send().await.unwrap(); + assert_eq!(res.status(), reqwest::StatusCode::OK); + } + }); + futures_util::future::join_all(futs).await; + + server.shutdown().await; +} diff --git a/rust/reqwest/tests/support/delay_layer.rs b/rust/reqwest/tests/support/delay_layer.rs new file mode 100644 index 0000000000..b8eec42a1b --- /dev/null +++ b/rust/reqwest/tests/support/delay_layer.rs @@ -0,0 +1,119 @@ +use std::{ + future::Future, + pin::Pin, + task::{Context, Poll}, + time::Duration, +}; + +use pin_project_lite::pin_project; +use tokio::time::Sleep; +use tower::{BoxError, Layer, Service}; + +/// This tower layer injects an arbitrary delay before calling downstream layers. +#[derive(Clone)] +pub struct DelayLayer { + delay: Duration, +} + +impl DelayLayer { + pub const fn new(delay: Duration) -> Self { + DelayLayer { delay } + } +} + +impl Layer for DelayLayer { + type Service = Delay; + fn layer(&self, service: S) -> Self::Service { + Delay::new(service, self.delay) + } +} + +impl std::fmt::Debug for DelayLayer { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("DelayLayer") + .field("delay", &self.delay) + .finish() + } +} + +/// This tower service injects an arbitrary delay before calling downstream layers. +#[derive(Debug, Clone)] +pub struct Delay { + inner: S, + delay: Duration, +} +impl Delay { + pub fn new(inner: S, delay: Duration) -> Self { + Delay { inner, delay } + } +} + +impl Service for Delay +where + S: Service, + S::Error: Into, +{ + type Response = S::Response; + + type Error = BoxError; + + type Future = ResponseFuture; + + fn poll_ready( + &mut self, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + match self.inner.poll_ready(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(r) => Poll::Ready(r.map_err(Into::into)), + } + } + + fn call(&mut self, req: Request) -> Self::Future { + let response = self.inner.call(req); + let sleep = tokio::time::sleep(self.delay); + + ResponseFuture::new(response, sleep) + } +} + +// `Delay` response future +pin_project! { + #[derive(Debug)] + pub struct ResponseFuture { + #[pin] + response: S, + #[pin] + sleep: Sleep, + } +} + +impl ResponseFuture { + pub(crate) fn new(response: S, sleep: Sleep) -> Self { + ResponseFuture { response, sleep } + } +} + +impl Future for ResponseFuture +where + F: Future>, + E: Into, +{ + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + + // First poll the sleep until complete + match this.sleep.poll(cx) { + Poll::Pending => return Poll::Pending, + Poll::Ready(_) => {} + } + + // Then poll the inner future + match this.response.poll(cx) { + Poll::Ready(v) => Poll::Ready(v.map_err(Into::into)), + Poll::Pending => Poll::Pending, + } + } +} diff --git a/rust/reqwest/tests/support/delay_server.rs b/rust/reqwest/tests/support/delay_server.rs new file mode 100644 index 0000000000..f79c2a4df4 --- /dev/null +++ b/rust/reqwest/tests/support/delay_server.rs @@ -0,0 +1,123 @@ +#![cfg(not(target_arch = "wasm32"))] +#![allow(unused)] +use std::convert::Infallible; +use std::future::Future; +use std::net; +use std::time::Duration; + +use futures_util::FutureExt; +use http::{Request, Response}; +use hyper::service::service_fn; +use tokio::net::TcpListener; +use tokio::select; +use tokio::sync::oneshot; + +/// This server, unlike [`super::server::Server`], allows for delaying the +/// specified amount of time after each TCP connection is established. This is +/// useful for testing the behavior of the client when the server is slow. +/// +/// For example, in case of HTTP/2, once the TCP/TLS connection is established, +/// both endpoints are supposed to send a preface and an initial `SETTINGS` +/// frame (See [RFC9113 3.4] for details). What if these frames are delayed for +/// whatever reason? This server allows for testing such scenarios. +/// +/// [RFC9113 3.4]: https://www.rfc-editor.org/rfc/rfc9113.html#name-http-2-connection-preface +pub struct Server { + addr: net::SocketAddr, + shutdown_tx: Option>, + server_terminated_rx: oneshot::Receiver<()>, +} + +type Builder = hyper_util::server::conn::auto::Builder; + +impl Server { + pub async fn new(func: F1, apply_config: F2, delay: Duration) -> Self + where + F1: Fn(Request) -> Fut + Clone + Send + 'static, + Fut: Future> + Send + 'static, + F2: FnOnce(&mut Builder) -> Bu + Send + 'static, + { + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let (server_terminated_tx, server_terminated_rx) = oneshot::channel(); + + let tcp_listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = tcp_listener.local_addr().unwrap(); + + tokio::spawn(async move { + let mut builder = + hyper_util::server::conn::auto::Builder::new(hyper_util::rt::TokioExecutor::new()); + apply_config(&mut builder); + + tokio::spawn(async move { + let builder = builder; + let (connection_shutdown_tx, connection_shutdown_rx) = oneshot::channel(); + let connection_shutdown_rx = connection_shutdown_rx.shared(); + let mut shutdown_rx = std::pin::pin!(shutdown_rx); + + let mut handles = Vec::new(); + loop { + select! { + _ = shutdown_rx.as_mut() => { + connection_shutdown_tx.send(()).unwrap(); + break; + } + res = tcp_listener.accept() => { + let (stream, _) = res.unwrap(); + let io = hyper_util::rt::TokioIo::new(stream); + + + let handle = tokio::spawn({ + let connection_shutdown_rx = connection_shutdown_rx.clone(); + let func = func.clone(); + let svc = service_fn(move |req| { + let fut = func(req); + async move { + Ok::<_, Infallible>(fut.await) + }}); + let builder = builder.clone(); + + async move { + let fut = builder.serve_connection_with_upgrades(io, svc); + tokio::time::sleep(delay).await; + + let mut conn = std::pin::pin!(fut); + + select! { + _ = conn.as_mut() => {} + _ = connection_shutdown_rx => { + conn.as_mut().graceful_shutdown(); + conn.await.unwrap(); + } + } + } + }); + + handles.push(handle); + } + } + } + + futures_util::future::join_all(handles).await; + server_terminated_tx.send(()).unwrap(); + }); + }); + + Self { + addr, + shutdown_tx: Some(shutdown_tx), + server_terminated_rx, + } + } + + pub async fn shutdown(mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + + self.server_terminated_rx.await.unwrap(); + } + + pub fn addr(&self) -> net::SocketAddr { + self.addr + } +} diff --git a/rust/reqwest/tests/support/error.rs b/rust/reqwest/tests/support/error.rs new file mode 100644 index 0000000000..e38c6744be --- /dev/null +++ b/rust/reqwest/tests/support/error.rs @@ -0,0 +1,15 @@ +use std::error::Error as StdError; + +pub fn inspect(err: E) -> Vec +where + E: Into>, +{ + let berr = err.into(); + let mut err = Some(&*berr as &(dyn StdError + 'static)); + let mut errs = Vec::new(); + while let Some(e) = err { + errs.push(e.to_string()); + err = e.source(); + } + errs +} diff --git a/rust/reqwest/tests/support/mod.rs b/rust/reqwest/tests/support/mod.rs new file mode 100644 index 0000000000..1d14efd50b --- /dev/null +++ b/rust/reqwest/tests/support/mod.rs @@ -0,0 +1,12 @@ +#![allow(dead_code)] + +pub mod delay_layer; +pub mod delay_server; +pub mod error; +pub mod not_tcp; +pub mod server; + +// TODO: remove once done converting to new support server? +#[allow(unused)] +pub static DEFAULT_USER_AGENT: &str = + concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); diff --git a/rust/reqwest/tests/support/not_tcp.rs b/rust/reqwest/tests/support/not_tcp.rs new file mode 100644 index 0000000000..126d458d65 --- /dev/null +++ b/rust/reqwest/tests/support/not_tcp.rs @@ -0,0 +1,149 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(unix)] + +use std::convert::Infallible; +use std::future::Future; +use std::sync::mpsc as std_mpsc; +use std::thread; +use std::time::Duration; + +use tokio::runtime; +use tokio::sync::oneshot; + +pub struct Server { + path: std::path::PathBuf, + panic_rx: std_mpsc::Receiver<()>, + events_rx: std_mpsc::Receiver, + shutdown_tx: Option>, +} + +#[non_exhaustive] +pub enum Event { + ConnectionClosed, +} + +impl Server { + pub fn path(&self) -> &std::path::Path { + &self.path + } + + pub fn events(&mut self) -> Vec { + let mut events = Vec::new(); + while let Ok(event) = self.events_rx.try_recv() { + events.push(event); + } + events + } +} + +impl Drop for Server { + fn drop(&mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + + if !::std::thread::panicking() { + self.panic_rx + .recv_timeout(Duration::from_secs(3)) + .expect("test server should not panic"); + } + } +} + +pub fn uds(func: F) -> Server +where + F: Fn(http::Request) -> Fut + Clone + Send + 'static, + Fut: Future> + Send + 'static, +{ + uds_with_config(func, |_builder| {}) +} + +type Builder = hyper_util::server::conn::auto::Builder; + +pub fn uds_with_config(func: F1, apply_config: F2) -> Server +where + F1: Fn(http::Request) -> Fut + Clone + Send + 'static, + Fut: Future> + Send + 'static, + F2: FnOnce(&mut Builder) -> Bu + Send + 'static, +{ + // Spawn new runtime in thread to prevent reactor execution context conflict + let test_name = thread::current().name().unwrap_or("").to_string(); + thread::spawn(move || { + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let path = random_tmp_path(); + let listener = rt.block_on(async { + tokio::net::UnixListener::bind(&path) + .unwrap() + }); + + let (shutdown_tx, mut shutdown_rx) = oneshot::channel(); + let (panic_tx, panic_rx) = std_mpsc::channel(); + let (events_tx, events_rx) = std_mpsc::channel(); + let tname = format!( + "test({})-support-server", + test_name, + ); + + let close_path = path.clone(); + + thread::Builder::new() + .name(tname) + .spawn(move || { + rt.block_on(async move { + let mut builder = + hyper_util::server::conn::auto::Builder::new(hyper_util::rt::TokioExecutor::new()); + apply_config(&mut builder); + + loop { + tokio::select! { + _ = &mut shutdown_rx => { + break; + } + accepted = listener.accept() => { + let (io, _) = accepted.expect("accepted"); + let func = func.clone(); + let svc = hyper::service::service_fn(move |req| { + let fut = func(req); + async move { Ok::<_, Infallible>(fut.await) } + }); + let builder = builder.clone(); + let events_tx = events_tx.clone(); + tokio::spawn(async move { + let _ = builder.serve_connection_with_upgrades(hyper_util::rt::TokioIo::new(io), svc).await; + let _ = events_tx.send(Event::ConnectionClosed); + }); + } + } + } + let _ = std::fs::remove_file(close_path); + let _ = panic_tx.send(()); + }); + }) + .expect("thread spawn"); + Server { + path, + panic_rx, + events_rx, + shutdown_tx: Some(shutdown_tx), + } + }) + .join() + .unwrap() +} + +fn random_tmp_path() -> std::path::PathBuf { + use std::hash::BuildHasher; + + let mut buf = std::env::temp_dir(); + + // libstd uses system random to create each one + let rng = std::collections::hash_map::RandomState::new(); + let n = rng.hash_one("reqwest-uds-sock"); + + buf.push(format!("reqwest-test-uds-sock-{}", n)); + + buf +} diff --git a/rust/reqwest/tests/support/server.cert b/rust/reqwest/tests/support/server.cert new file mode 100644 index 0000000000..e573f2a52a Binary files /dev/null and b/rust/reqwest/tests/support/server.cert differ diff --git a/rust/reqwest/tests/support/server.key b/rust/reqwest/tests/support/server.key new file mode 100644 index 0000000000..757035e241 Binary files /dev/null and b/rust/reqwest/tests/support/server.key differ diff --git a/rust/reqwest/tests/support/server.rs b/rust/reqwest/tests/support/server.rs new file mode 100644 index 0000000000..e9b653db46 --- /dev/null +++ b/rust/reqwest/tests/support/server.rs @@ -0,0 +1,387 @@ +#![cfg(not(target_arch = "wasm32"))] +use std::convert::Infallible; +use std::future::Future; +use std::net; +use std::sync::mpsc as std_mpsc; +use std::thread; +use std::time::Duration; + +use tokio::io::AsyncReadExt; +use tokio::net::TcpStream; +use tokio::runtime; +use tokio::sync::oneshot; + +pub struct Server { + addr: net::SocketAddr, + panic_rx: std_mpsc::Receiver<()>, + events_rx: std_mpsc::Receiver, + shutdown_tx: Option>, +} + +#[non_exhaustive] +pub enum Event { + ConnectionClosed, +} + +impl Server { + pub fn addr(&self) -> net::SocketAddr { + self.addr + } + + pub fn events(&mut self) -> Vec { + let mut events = Vec::new(); + while let Ok(event) = self.events_rx.try_recv() { + events.push(event); + } + events + } +} + +impl Drop for Server { + fn drop(&mut self) { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + + if !::std::thread::panicking() { + self.panic_rx + .recv_timeout(Duration::from_secs(3)) + .expect("test server should not panic"); + } + } +} + +pub fn http(func: F) -> Server +where + F: Fn(http::Request) -> Fut + Clone + Send + 'static, + Fut: Future> + Send + 'static, +{ + let infall = move |req| { + let fut = func(req); + async move { Ok::<_, Infallible>(fut.await) } + }; + http_with_config(infall, |_builder| {}) +} + +type Builder = hyper_util::server::conn::auto::Builder; + +pub fn http_with_config(func: F1, apply_config: F2) -> Server +where + F1: Fn(http::Request) -> Fut + Clone + Send + 'static, + Fut: Future, E>> + Send + 'static, + E: Into>, + F2: FnOnce(&mut Builder) -> Bu + Send + 'static, +{ + // Spawn new runtime in thread to prevent reactor execution context conflict + let test_name = thread::current().name().unwrap_or("").to_string(); + thread::spawn(move || { + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let listener = rt.block_on(async move { + tokio::net::TcpListener::bind(&std::net::SocketAddr::from(([127, 0, 0, 1], 0))) + .await + .unwrap() + }); + let addr = listener.local_addr().unwrap(); + + let (shutdown_tx, mut shutdown_rx) = oneshot::channel(); + let (panic_tx, panic_rx) = std_mpsc::channel(); + let (events_tx, events_rx) = std_mpsc::channel(); + let tname = format!( + "test({})-support-server", + test_name, + ); + thread::Builder::new() + .name(tname) + .spawn(move || { + rt.block_on(async move { + let mut builder = + hyper_util::server::conn::auto::Builder::new(hyper_util::rt::TokioExecutor::new()); + apply_config(&mut builder); + let mut tasks = tokio::task::JoinSet::new(); + let graceful = hyper_util::server::graceful::GracefulShutdown::new(); + + loop { + tokio::select! { + _ = &mut shutdown_rx => { + graceful.shutdown().await; + break; + } + accepted = listener.accept() => { + let (io, _) = accepted.expect("accepted"); + let func = func.clone(); + let svc = hyper::service::service_fn(func); + let builder = builder.clone(); + let events_tx = events_tx.clone(); + let watcher = graceful.watcher(); + + tasks.spawn(async move { + let conn = builder.serve_connection_with_upgrades(hyper_util::rt::TokioIo::new(io), svc); + let _ = watcher.watch(conn).await; + let _ = events_tx.send(Event::ConnectionClosed); + }); + } + } + } + + // try to drain + while let Some(result) = tasks.join_next().await { + if let Err(e) = result { + if e.is_panic() { + std::panic::resume_unwind(e.into_panic()); + } + } + } + let _ = panic_tx.send(()); + }); + }) + .expect("thread spawn"); + Server { + addr, + panic_rx, + events_rx, + shutdown_tx: Some(shutdown_tx), + } + }) + .join() + .unwrap() +} + +#[cfg(feature = "http3")] +#[derive(Debug, Default)] +pub struct Http3 { + addr: Option, +} + +#[cfg(feature = "http3")] +impl Http3 { + pub fn new() -> Self { + Self::default() + } + + pub fn with_addr(mut self, addr: std::net::SocketAddr) -> Self { + self.addr = Some(addr); + self + } + + pub fn build(self, func: F1) -> Server + where + F1: Fn( + http::Request< + http_body_util::combinators::BoxBody, + >, + ) -> Fut + + Clone + + Send + + 'static, + Fut: Future> + Send + 'static, + { + use bytes::Buf; + use http_body_util::BodyExt; + use quinn::crypto::rustls::QuicServerConfig; + use std::sync::Arc; + + let addr = self.addr.unwrap_or_else(|| "[::1]:0".parse().unwrap()); + + // Spawn new runtime in thread to prevent reactor execution context conflict + let test_name = thread::current().name().unwrap_or("").to_string(); + thread::spawn(move || { + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + + let cert = std::fs::read("tests/support/server.cert").unwrap().into(); + let key = std::fs::read("tests/support/server.key").unwrap().try_into().unwrap(); + + let mut tls_config = rustls::ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(vec![cert], key) + .unwrap(); + tls_config.max_early_data_size = u32::MAX; + tls_config.alpn_protocols = vec![b"h3".into()]; + + let server_config = quinn::ServerConfig::with_crypto(Arc::new(QuicServerConfig::try_from(tls_config).unwrap())); + let endpoint = rt.block_on(async move { + quinn::Endpoint::server(server_config, addr).unwrap() + }); + let addr = endpoint.local_addr().unwrap(); + + let (shutdown_tx, mut shutdown_rx) = oneshot::channel(); + let (panic_tx, panic_rx) = std_mpsc::channel(); + let (events_tx, events_rx) = std_mpsc::channel(); + let tname = format!( + "test({})-support-server", + test_name, + ); + thread::Builder::new() + .name(tname) + .spawn(move || { + rt.block_on(async move { + + loop { + tokio::select! { + _ = &mut shutdown_rx => { + break; + } + Some(accepted) = endpoint.accept() => { + let conn = accepted.await.expect("accepted"); + let mut h3_conn = h3::server::Connection::new(h3_quinn::Connection::new(conn)).await.unwrap(); + let events_tx = events_tx.clone(); + let func = func.clone(); + tokio::spawn(async move { + while let Ok(Some(resolver)) = h3_conn.accept().await { + let events_tx = events_tx.clone(); + let func = func.clone(); + tokio::spawn(async move { + if let Ok((req, stream)) = resolver.resolve_request().await { + let (mut tx, rx) = stream.split(); + let body = futures_util::stream::unfold(rx, |mut rx| async move { + match rx.recv_data().await { + Ok(Some(mut buf)) => { + Some((Ok(hyper::body::Frame::data(buf.copy_to_bytes(buf.remaining()))), rx)) + }, + Ok(None) => None, + Err(err) => { + Some((Err(err), rx)) + } + } + }); + let body = BodyExt::boxed(http_body_util::StreamBody::new(body)); + let resp = func(req.map(move |()| body)).await; + let (parts, mut body) = resp.into_parts(); + let resp = http::Response::from_parts(parts, ()); + tx.send_response(resp).await.unwrap(); + + while let Some(Ok(frame)) = body.frame().await { + if let Ok(data) = frame.into_data() { + tx.send_data(data).await.unwrap(); + } + } + tx.finish().await.unwrap(); + events_tx.send(Event::ConnectionClosed).unwrap(); + } + }); + } + }); + } + } + } + let _ = panic_tx.send(()); + }); + }) + .expect("thread spawn"); + Server { + addr, + panic_rx, + events_rx, + shutdown_tx: Some(shutdown_tx), + } + }) + .join() + .unwrap() + } +} + +pub fn low_level_with_response(do_response: F) -> Server +where + for<'c> F: Fn(&'c [u8], &'c mut TcpStream) -> Box + Send + 'c> + + Clone + + Send + + 'static, +{ + // Spawn new runtime in thread to prevent reactor execution context conflict + let test_name = thread::current().name().unwrap_or("").to_string(); + thread::spawn(move || { + let rt = runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("new rt"); + let listener = rt.block_on(async move { + tokio::net::TcpListener::bind(&std::net::SocketAddr::from(([127, 0, 0, 1], 0))) + .await + .unwrap() + }); + let addr = listener.local_addr().unwrap(); + + let (shutdown_tx, mut shutdown_rx) = oneshot::channel(); + let (panic_tx, panic_rx) = std_mpsc::channel(); + let (events_tx, events_rx) = std_mpsc::channel(); + let tname = format!("test({})-support-server", test_name,); + thread::Builder::new() + .name(tname) + .spawn(move || { + rt.block_on(async move { + loop { + tokio::select! { + _ = &mut shutdown_rx => { + break; + } + accepted = listener.accept() => { + let (io, _) = accepted.expect("accepted"); + let do_response = do_response.clone(); + let events_tx = events_tx.clone(); + tokio::spawn(async move { + low_level_server_client(io, do_response).await; + let _ = events_tx.send(Event::ConnectionClosed); + }); + } + } + } + let _ = panic_tx.send(()); + }); + }) + .expect("thread spawn"); + Server { + addr, + panic_rx, + events_rx, + shutdown_tx: Some(shutdown_tx), + } + }) + .join() + .unwrap() +} + +async fn low_level_server_client(mut client_socket: TcpStream, do_response: F) +where + for<'c> F: Fn(&'c [u8], &'c mut TcpStream) -> Box + Send + 'c>, +{ + loop { + let request = low_level_read_http_request(&mut client_socket) + .await + .expect("read_http_request failed"); + if request.is_empty() { + // connection closed by client + break; + } + + Box::into_pin(do_response(&request, &mut client_socket)).await; + } +} + +async fn low_level_read_http_request( + client_socket: &mut TcpStream, +) -> core::result::Result, std::io::Error> { + let mut buf = Vec::new(); + + // Read until the delimiter "\r\n\r\n" is found + loop { + let mut temp_buffer = [0; 1024]; + let n = client_socket.read(&mut temp_buffer).await?; + + if n == 0 { + break; + } + + buf.extend_from_slice(&temp_buffer[..n]); + + if let Some(pos) = buf.windows(4).position(|window| window == b"\r\n\r\n") { + return Ok(buf.drain(..pos + 4).collect()); + } + } + + Ok(buf) +} diff --git a/rust/reqwest/tests/timeouts.rs b/rust/reqwest/tests/timeouts.rs new file mode 100644 index 0000000000..c7d3c95acc --- /dev/null +++ b/rust/reqwest/tests/timeouts.rs @@ -0,0 +1,447 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use support::server; + +use std::time::Duration; + +#[tokio::test] +async fn client_timeout() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_millis(300)).await; + http::Response::default() + } + }); + + let client = reqwest::Client::builder() + .timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://{}/slow", server.addr()); + + let res = client.get(&url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_timeout()); + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[tokio::test] +async fn request_timeout() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_millis(300)).await; + http::Response::default() + } + }); + + let client = reqwest::Client::builder().no_proxy().build().unwrap(); + + let url = format!("http://{}/slow", server.addr()); + + let res = client + .get(&url) + .timeout(Duration::from_millis(100)) + .send() + .await; + + let err = res.unwrap_err(); + + if cfg!(not(target_arch = "wasm32")) { + assert!(err.is_timeout() && !err.is_connect()); + } else { + assert!(err.is_timeout()); + } + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[tokio::test] +async fn connect_timeout() { + let _ = env_logger::try_init(); + + let client = reqwest::Client::builder() + .connect_timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = "http://192.0.2.1:81/slow"; + + let res = client + .get(url) + .timeout(Duration::from_millis(1000)) + .send() + .await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn connect_many_timeout_succeeds() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::default() }); + let port = server.addr().port(); + + let client = reqwest::Client::builder() + .resolve_to_addrs( + "many_addrs", + &["192.0.2.1:81".parse().unwrap(), server.addr()], + ) + .connect_timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://many_addrs:{port}/eventual"); + + let _res = client + .get(url) + .timeout(Duration::from_millis(1000)) + .send() + .await + .unwrap(); +} + +#[cfg(not(target_arch = "wasm32"))] +#[tokio::test] +async fn connect_many_timeout() { + let _ = env_logger::try_init(); + + let client = reqwest::Client::builder() + .resolve_to_addrs( + "many_addrs", + &[ + "192.0.2.1:81".parse().unwrap(), + "192.0.2.2:81".parse().unwrap(), + ], + ) + .connect_timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = "http://many_addrs:81/slow".to_string(); + + let res = client + .get(url) + .timeout(Duration::from_millis(1000)) + .send() + .await; + + let err = res.unwrap_err(); + + assert!(err.is_connect() && err.is_timeout()); +} + +#[cfg(feature = "stream")] +#[tokio::test] +async fn response_timeout() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // immediate response, but delayed body + let body = reqwest::Body::wrap_stream(futures_util::stream::once(async { + tokio::time::sleep(Duration::from_secs(1)).await; + Ok::<_, std::convert::Infallible>("Hello") + })); + + http::Response::new(body) + } + }); + + let client = reqwest::Client::builder() + .timeout(Duration::from_millis(500)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://{}/slow", server.addr()); + let res = client.get(&url).send().await.expect("Failed to get"); + let body = res.text().await; + + let err = body.unwrap_err(); + + assert!(err.is_timeout()); +} + +#[tokio::test] +async fn read_timeout_applies_to_headers() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_millis(300)).await; + http::Response::default() + } + }); + + let client = reqwest::Client::builder() + .read_timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://{}/slow", server.addr()); + + let res = client.get(&url).send().await; + + let err = res.unwrap_err(); + + assert!(err.is_timeout()); + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[cfg(feature = "stream")] +#[tokio::test] +async fn read_timeout_applies_to_body() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // immediate response, but delayed body + let body = reqwest::Body::wrap_stream(futures_util::stream::once(async { + tokio::time::sleep(Duration::from_millis(300)).await; + Ok::<_, std::convert::Infallible>("Hello") + })); + + http::Response::new(body) + } + }); + + let client = reqwest::Client::builder() + .read_timeout(Duration::from_millis(100)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://{}/slow", server.addr()); + let res = client.get(&url).send().await.expect("Failed to get"); + let body = res.text().await; + + let err = body.unwrap_err(); + + assert!(err.is_timeout()); +} + +#[cfg(feature = "stream")] +#[tokio::test] +async fn read_timeout_allows_slow_response_body() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| { + async { + // immediate response, but body that has slow chunks + + let slow = futures_util::stream::unfold(0, |state| async move { + if state < 3 { + tokio::time::sleep(Duration::from_millis(100)).await; + Some(( + Ok::<_, std::convert::Infallible>(state.to_string()), + state + 1, + )) + } else { + None + } + }); + let body = reqwest::Body::wrap_stream(slow); + + http::Response::new(body) + } + }); + + let client = reqwest::Client::builder() + .read_timeout(Duration::from_millis(200)) + //.timeout(Duration::from_millis(200)) + .no_proxy() + .build() + .unwrap(); + + let url = format!("http://{}/slow", server.addr()); + let res = client.get(&url).send().await.expect("Failed to get"); + let body = res.text().await.expect("body text"); + + assert_eq!(body, "012"); +} + +/// Tests that internal client future cancels when the oneshot channel +/// is canceled. +#[cfg(feature = "blocking")] +#[test] +fn timeout_closes_connection() { + let _ = env_logger::try_init(); + + // Make Client drop *after* the Server, so the background doesn't + // close too early. + let client = reqwest::blocking::Client::builder() + .timeout(Duration::from_millis(500)) + .build() + .unwrap(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_secs(2)).await; + http::Response::default() + } + }); + + let url = format!("http://{}/closes", server.addr()); + let err = client.get(&url).send().unwrap_err(); + + assert!(err.is_timeout()); + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[cfg(feature = "blocking")] +#[test] +fn timeout_blocking_request() { + let _ = env_logger::try_init(); + + // Make Client drop *after* the Server, so the background doesn't + // close too early. + let client = reqwest::blocking::Client::builder().build().unwrap(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_secs(2)).await; + http::Response::default() + } + }); + + let url = format!("http://{}/closes", server.addr()); + let err = client + .get(&url) + .timeout(Duration::from_millis(500)) + .send() + .unwrap_err(); + + assert!(err.is_timeout()); + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[cfg(feature = "blocking")] +#[test] +fn connect_timeout_blocking_request() { + let _ = env_logger::try_init(); + + let client = reqwest::blocking::Client::builder() + .connect_timeout(Duration::from_millis(100)) + .build() + .unwrap(); + + // never returns + let url = "http://192.0.2.1:81/slow"; + + let err = client.get(url).send().unwrap_err(); + + assert!(err.is_timeout()); +} + +#[cfg(feature = "blocking")] +#[cfg(feature = "stream")] +#[test] +fn blocking_request_timeout_body() { + let _ = env_logger::try_init(); + + let client = reqwest::blocking::Client::builder() + // this should be overridden + .connect_timeout(Duration::from_millis(200)) + // this should be overridden + .timeout(Duration::from_millis(200)) + .build() + .unwrap(); + + let server = server::http(move |_req| { + async { + // immediate response, but delayed body + let body = reqwest::Body::wrap_stream(futures_util::stream::once(async { + tokio::time::sleep(Duration::from_secs(1)).await; + Ok::<_, std::convert::Infallible>("Hello") + })); + + http::Response::new(body) + } + }); + + let url = format!("http://{}/closes", server.addr()); + let res = client + .get(&url) + // longer than client timeout + .timeout(Duration::from_secs(5)) + .send() + .expect("get response"); + + let text = res.text().unwrap(); + assert_eq!(text, "Hello"); +} + +#[cfg(feature = "blocking")] +#[test] +fn write_timeout_large_body() { + let _ = env_logger::try_init(); + let body = vec![b'x'; 20_000]; + let len = 8192; + + // Make Client drop *after* the Server, so the background doesn't + // close too early. + let client = reqwest::blocking::Client::builder() + .timeout(Duration::from_millis(500)) + .build() + .unwrap(); + + let server = server::http(move |_req| { + async { + // delay returning the response + tokio::time::sleep(Duration::from_secs(2)).await; + http::Response::default() + } + }); + + let cursor = std::io::Cursor::new(body); + let url = format!("http://{}/write-timeout", server.addr()); + let err = client + .post(&url) + .body(reqwest::blocking::Body::sized(cursor, len as u64)) + .send() + .unwrap_err(); + + assert!(err.is_timeout()); + assert_eq!(err.url().map(|u| u.as_str()), Some(url.as_str())); +} + +#[tokio::test] +async fn response_body_timeout_forwards_size_hint() { + let _ = env_logger::try_init(); + + let server = server::http(move |_req| async { http::Response::new(b"hello".to_vec().into()) }); + + let client = reqwest::Client::builder().no_proxy().build().unwrap(); + + let url = format!("http://{}/slow", server.addr()); + + let res = client + .get(&url) + .timeout(Duration::from_secs(1)) + .send() + .await + .expect("response"); + + assert_eq!(res.content_length(), Some(5)); +} diff --git a/rust/reqwest/tests/upgrade.rs b/rust/reqwest/tests/upgrade.rs new file mode 100644 index 0000000000..7a67c0457a --- /dev/null +++ b/rust/reqwest/tests/upgrade.rs @@ -0,0 +1,52 @@ +#![cfg(not(target_arch = "wasm32"))] +#![cfg(not(feature = "rustls-tls-manual-roots-no-provider"))] +mod support; +use support::server; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; + +#[tokio::test] +async fn http_upgrade() { + let server = server::http(move |req| { + assert_eq!(req.method(), "GET"); + assert_eq!(req.headers()["connection"], "upgrade"); + assert_eq!(req.headers()["upgrade"], "foobar"); + + tokio::spawn(async move { + let mut upgraded = hyper_util::rt::TokioIo::new(hyper::upgrade::on(req).await.unwrap()); + + let mut buf = vec![0; 7]; + upgraded.read_exact(&mut buf).await.unwrap(); + assert_eq!(buf, b"foo=bar"); + + upgraded.write_all(b"bar=foo").await.unwrap(); + }); + + async { + http::Response::builder() + .status(http::StatusCode::SWITCHING_PROTOCOLS) + .header(http::header::CONNECTION, "upgrade") + .header(http::header::UPGRADE, "foobar") + .body(reqwest::Body::default()) + .unwrap() + } + }); + + let res = reqwest::Client::builder() + .build() + .unwrap() + .get(format!("http://{}", server.addr())) + .header(http::header::CONNECTION, "upgrade") + .header(http::header::UPGRADE, "foobar") + .send() + .await + .unwrap(); + + assert_eq!(res.status(), http::StatusCode::SWITCHING_PROTOCOLS); + let mut upgraded = res.upgrade().await.unwrap(); + + upgraded.write_all(b"foo=bar").await.unwrap(); + + let mut buf = vec![]; + upgraded.read_to_end(&mut buf).await.unwrap(); + assert_eq!(buf, b"bar=foo"); +} diff --git a/rust/reqwest/tests/wasm_simple.rs b/rust/reqwest/tests/wasm_simple.rs new file mode 100644 index 0000000000..b5ee832074 --- /dev/null +++ b/rust/reqwest/tests/wasm_simple.rs @@ -0,0 +1,39 @@ +#![cfg(target_arch = "wasm32")] +use std::time::Duration; + +use wasm_bindgen::prelude::*; +use wasm_bindgen_test::*; +wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); + +#[wasm_bindgen] +extern "C" { + // Use `js_namespace` here to bind `console.log(..)` instead of just + // `log(..)` + #[wasm_bindgen(js_namespace = console)] + fn log(s: &str); +} + +#[wasm_bindgen_test] +async fn simple_example() { + let res = reqwest::get("https://hyper.rs") + .await + .expect("http get example"); + log(&format!("Status: {}", res.status())); + + let body = res.text().await.expect("response to utf-8 text"); + log(&format!("Body:\n\n{body}")); +} + +#[wasm_bindgen_test] +async fn request_with_timeout() { + let client = reqwest::Client::new(); + let err = client + .get("https://hyper.rs/not-cached") + .timeout(Duration::from_millis(1)) + .send() + .await + .expect_err("Expected error from aborted request"); + + assert!(err.is_request()); + assert!(err.is_timeout()); +} diff --git a/rust/reqwest/tests/zstd.rs b/rust/reqwest/tests/zstd.rs new file mode 100644 index 0000000000..be463094f6 --- /dev/null +++ b/rust/reqwest/tests/zstd.rs @@ -0,0 +1,568 @@ +mod support; +use support::server; +use tokio::io::AsyncWriteExt; + +#[tokio::test] +async fn zstd_response() { + zstd_case(10_000, 4096).await; +} + +#[tokio::test] +async fn zstd_single_byte_chunks() { + zstd_case(10, 1).await; +} + +#[tokio::test] +async fn test_zstd_empty_body() { + let server = server::http(move |req| async move { + assert_eq!(req.method(), "HEAD"); + + http::Response::builder() + .header("content-encoding", "zstd") + .body(Default::default()) + .unwrap() + }); + + let client = reqwest::Client::new(); + let res = client + .head(&format!("http://{}/zstd", server.addr())) + .send() + .await + .unwrap(); + + let body = res.text().await.unwrap(); + + assert_eq!(body, ""); +} + +#[tokio::test] +async fn test_accept_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "application/json"); + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("zstd")); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept", server.addr())) + .header( + reqwest::header::ACCEPT, + reqwest::header::HeaderValue::from_static("application/json"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +#[tokio::test] +async fn test_accept_encoding_header_is_not_changed_if_set() { + let server = server::http(move |req| async move { + assert_eq!(req.headers()["accept"], "*/*"); + assert_eq!(req.headers()["accept-encoding"], "identity"); + http::Response::default() + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/accept-encoding", server.addr())) + .header( + reqwest::header::ACCEPT_ENCODING, + reqwest::header::HeaderValue::from_static("identity"), + ) + .send() + .await + .unwrap(); + + assert_eq!(res.status(), reqwest::StatusCode::OK); +} + +async fn zstd_case(response_size: usize, chunk_size: usize) { + use futures_util::stream::StreamExt; + + let content: String = (0..response_size) + .into_iter() + .map(|i| format!("test {i}")) + .collect(); + + let zstded_content = zstd_crate::encode_all(content.as_bytes(), 3).unwrap(); + + let mut response = format!( + "\ + HTTP/1.1 200 OK\r\n\ + Server: test-accept\r\n\ + Content-Encoding: zstd\r\n\ + Content-Length: {}\r\n\ + \r\n", + &zstded_content.len() + ) + .into_bytes(); + response.extend(&zstded_content); + + let server = server::http(move |req| { + assert!(req.headers()["accept-encoding"] + .to_str() + .unwrap() + .contains("zstd")); + + let zstded = zstded_content.clone(); + async move { + let len = zstded.len(); + let stream = + futures_util::stream::unfold((zstded, 0), move |(zstded, pos)| async move { + let chunk = zstded.chunks(chunk_size).nth(pos)?.to_vec(); + + Some((chunk, (zstded, pos + 1))) + }); + + let body = reqwest::Body::wrap_stream(stream.map(Ok::<_, std::convert::Infallible>)); + + http::Response::builder() + .header("content-encoding", "zstd") + .header("content-length", len) + .body(body) + .unwrap() + } + }); + + let client = reqwest::Client::new(); + + let res = client + .get(&format!("http://{}/zstd", server.addr())) + .send() + .await + .expect("response"); + + let body = res.text().await.expect("text"); + assert_eq!(body, content); +} + +const COMPRESSED_RESPONSE_HEADERS: &[u8] = b"HTTP/1.1 200 OK\x0d\x0a\ + Content-Type: text/plain\x0d\x0a\ + Connection: keep-alive\x0d\x0a\ + Content-Encoding: zstd\x0d\x0a"; + +const RESPONSE_CONTENT: &str = "some message here"; + +fn zstd_compress(input: &[u8]) -> Vec { + zstd_crate::encode_all(input, 3).unwrap() +} + +#[tokio::test] +async fn test_non_chunked_non_fragmented_response() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let zstded_content = zstd_compress(RESPONSE_CONTENT.as_bytes()); + let content_length_header = + format!("Content-Length: {}\r\n\r\n", zstded_content.len()).into_bytes(); + let response = [ + COMPRESSED_RESPONSE_HEADERS, + &content_length_header, + &zstded_content, + ] + .concat(); + + client_socket + .write_all(response.as_slice()) + .await + .expect("response write_all failed"); + client_socket.flush().await.expect("response flush failed"); + }) + }); + + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); +} + +// Big response can have multiple ZSTD frames in it +#[tokio::test] +async fn test_non_chunked_non_fragmented_multiple_frames_response() { + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + // Split the content into two parts + let content_bytes = RESPONSE_CONTENT.as_bytes(); + let mid = content_bytes.len() / 2; + // Compress each part separately to create multiple ZSTD frames + let compressed_part1 = zstd_crate::encode_all(&content_bytes[0..mid], 3).unwrap(); + let compressed_part2 = zstd_crate::encode_all(&content_bytes[mid..], 3).unwrap(); + // Concatenate the compressed frames + let mut zstded_content = compressed_part1; + zstded_content.extend_from_slice(&compressed_part2); + // Set Content-Length to the total length of the concatenated frames + let content_length_header = + format!("Content-Length: {}\r\n\r\n", zstded_content.len()).into_bytes(); + let response = [ + COMPRESSED_RESPONSE_HEADERS, + &content_length_header, + &zstded_content, + ] + .concat(); + + client_socket + .write_all(response.as_slice()) + .await + .expect("response write_all failed"); + client_socket.flush().await.expect("response flush failed"); + }) + }); + + let res = reqwest::Client::new() + .get(format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); +} + +#[tokio::test] +async fn test_chunked_fragmented_multiple_frames_in_one_chunk() { + // Define constants for delay and timing margin + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); // 1-second delay + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); // Margin for timing assertions + + // Set up a low-level server + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + // Split RESPONSE_CONTENT into two parts + let mid = RESPONSE_CONTENT.len() / 2; + let part1 = &RESPONSE_CONTENT[0..mid]; + let part2 = &RESPONSE_CONTENT[mid..]; + + // Compress each part separately to create two ZSTD frames + let compressed_part1 = zstd_compress(part1.as_bytes()); + let compressed_part2 = zstd_compress(part2.as_bytes()); + + // Concatenate the frames into a single chunk's data + let chunk_data = [compressed_part1.as_slice(), compressed_part2.as_slice()].concat(); + + // Calculate the chunk size in bytes + let chunk_size = chunk_data.len(); + + // Prepare the initial response part: headers + chunk size + let headers = [ + COMPRESSED_RESPONSE_HEADERS, // e.g., "HTTP/1.1 200 OK\r\nContent-Encoding: zstd\r\n" + b"Transfer-Encoding: chunked\r\n\r\n", // Indicate chunked encoding + format!("{:x}\r\n", chunk_size).as_bytes(), // Chunk size in hex + ] + .concat(); + + // Send headers + chunk size + chunk data + client_socket + .write_all([headers.as_slice(), &chunk_data].concat().as_slice()) + .await + .expect("write_all failed"); + client_socket.flush().await.expect("flush failed"); + + // Introduce a delay to simulate fragmentation + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + // Send chunk terminator + final chunk + client_socket + .write_all(b"\r\n0\r\n\r\n") + .await + .expect("write_all failed"); + client_socket.flush().await.expect("flush failed"); + }) + }); + + // Record the start time for delay verification + let start = tokio::time::Instant::now(); + + let res = reqwest::Client::new() + .get(format!("http://{}/", server.addr())) + .send() + .await + .expect("Failed to get response"); + + // Verify the decompressed response matches the original content + assert_eq!( + res.text().await.expect("Failed to read text"), + RESPONSE_CONTENT + ); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_connection_reuse_with_chunked_fragmented_multiple_frames_in_one_chunk() { + // Define constants for delay and timing margin + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); // 1-second delay + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); // Margin for timing assertions + + // We will record the peer addresses of each client request here + let peer_addrs = std::sync::Arc::new(std::sync::Mutex::new(Vec::::new())); + let peer_addrs_clone = peer_addrs.clone(); + + // Set up a low-level server (it will reuse existing client connection, executing callback for each client request) + let server = server::low_level_with_response(move |_raw_request, client_socket| { + let peer_addrs = peer_addrs_clone.clone(); + Box::new(async move { + // Split RESPONSE_CONTENT into two parts + let mid = RESPONSE_CONTENT.len() / 2; + let part1 = &RESPONSE_CONTENT[0..mid]; + let part2 = &RESPONSE_CONTENT[mid..]; + + // Compress each part separately to create two ZSTD frames + let compressed_part1 = zstd_compress(part1.as_bytes()); + let compressed_part2 = zstd_compress(part2.as_bytes()); + + // Concatenate the frames into a single chunk's data + let chunk_data = [compressed_part1.as_slice(), compressed_part2.as_slice()].concat(); + + // Calculate the chunk size in bytes + let chunk_size = chunk_data.len(); + + // Prepare the initial response part: headers + chunk size + let headers = [ + COMPRESSED_RESPONSE_HEADERS, // e.g., "HTTP/1.1 200 OK\r\nContent-Encoding: zstd\r\n" + b"Transfer-Encoding: chunked\r\n\r\n", // Indicate chunked encoding + format!("{:x}\r\n", chunk_size).as_bytes(), // Chunk size in hex + ] + .concat(); + + // Send headers + chunk size + chunk data + client_socket + .write_all([headers.as_slice(), &chunk_data].concat().as_slice()) + .await + .expect("write_all failed"); + client_socket.flush().await.expect("flush failed"); + + // Introduce a delay to simulate fragmentation + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + peer_addrs + .lock() + .unwrap() + .push(client_socket.peer_addr().unwrap()); + + // Send chunk terminator + final chunk + client_socket + .write_all(b"\r\n0\r\n\r\n") + .await + .expect("write_all failed"); + client_socket.flush().await.expect("flush failed"); + }) + }); + + let client = reqwest::Client::builder() + .pool_idle_timeout(std::time::Duration::from_secs(30)) + .pool_max_idle_per_host(1) + .build() + .unwrap(); + + const NUMBER_OF_REQUESTS: usize = 5; + + for _ in 0..NUMBER_OF_REQUESTS { + // Record the start time for delay verification + let start = tokio::time::Instant::now(); + + let res = client + .get(format!("http://{}/", server.addr())) + .send() + .await + .expect("Failed to get response"); + + // Verify the decompressed response matches the original content + assert_eq!( + res.text().await.expect("Failed to read text"), + RESPONSE_CONTENT + ); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); + } + + drop(client); + + // Check that all peer addresses are the same + let peer_addrs = peer_addrs.lock().unwrap(); + assert_eq!( + peer_addrs.len(), + NUMBER_OF_REQUESTS, + "Expected {} peer addresses, but got {}", + NUMBER_OF_REQUESTS, + peer_addrs.len() + ); + let first_addr = peer_addrs[0]; + assert!( + peer_addrs.iter().all(|addr| addr == &first_addr), + "All peer addresses should be the same, but found differences: {:?}", + peer_addrs + ); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_1() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let zstded_content = zstd_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + zstded_content.len() + ) + .as_bytes(), + &zstded_content, + ] + .concat(); + let response_second_part = b"\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_2() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let zstded_content = zstd_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + zstded_content.len() + ) + .as_bytes(), + &zstded_content, + b"\r\n", + ] + .concat(); + let response_second_part = b"0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + assert_eq!(res.text().await.expect("text"), RESPONSE_CONTENT); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} + +#[tokio::test] +async fn test_chunked_fragmented_response_with_extra_bytes() { + const DELAY_BETWEEN_RESPONSE_PARTS: tokio::time::Duration = + tokio::time::Duration::from_millis(1000); + const DELAY_MARGIN: tokio::time::Duration = tokio::time::Duration::from_millis(50); + + let server = server::low_level_with_response(|_raw_request, client_socket| { + Box::new(async move { + let zstded_content = zstd_compress(RESPONSE_CONTENT.as_bytes()); + let response_first_part = [ + COMPRESSED_RESPONSE_HEADERS, + format!( + "Transfer-Encoding: chunked\r\n\r\n{:x}\r\n", + zstded_content.len() + ) + .as_bytes(), + &zstded_content, + ] + .concat(); + let response_second_part = b"\r\n2ab\r\n0\r\n\r\n"; + + client_socket + .write_all(response_first_part.as_slice()) + .await + .expect("response_first_part write_all failed"); + client_socket + .flush() + .await + .expect("response_first_part flush failed"); + + tokio::time::sleep(DELAY_BETWEEN_RESPONSE_PARTS).await; + + client_socket + .write_all(response_second_part) + .await + .expect("response_second_part write_all failed"); + client_socket + .flush() + .await + .expect("response_second_part flush failed"); + }) + }); + + let start = tokio::time::Instant::now(); + let res = reqwest::Client::new() + .get(&format!("http://{}/", server.addr())) + .send() + .await + .expect("response"); + + let err = res.text().await.expect_err("there must be an error"); + assert!(err.is_decode()); + assert!(start.elapsed() >= DELAY_BETWEEN_RESPONSE_PARTS - DELAY_MARGIN); +} diff --git a/rust/sleeper_core/src/datafusion/cast_udf.rs b/rust/sleeper_core/src/datafusion/cast_udf.rs index b77b2d0a61..a249622e84 100644 --- a/rust/sleeper_core/src/datafusion/cast_udf.rs +++ b/rust/sleeper_core/src/datafusion/cast_udf.rs @@ -276,7 +276,7 @@ mod tests { fn should_widen_bounds_from_int32_to_int64() { // Given let udf = CastUDF::new(&DataType::Int32, &DataType::Int64, false); - let intervals = vec![ + let intervals = [ make_interval(&ScalarValue::Int32(Some(1)), &ScalarValue::Int32(Some(10))), make_interval( &ScalarValue::Int32(Some(-100)), @@ -297,7 +297,7 @@ mod tests { fn should_narrow_bounds_from_int64_to_int32_and_truncate() { // Given let udf = CastUDF::new(&DataType::Int64, &DataType::Int32, false); - let intervals = vec![ + let intervals = [ make_interval( &ScalarValue::Int64(Some(i64::from(i32::MIN) - 1)), // -2147483649 &ScalarValue::Int64(Some(i64::from(i32::MAX) + 1)), // 2147483648 @@ -321,7 +321,7 @@ mod tests { fn should_return_same_bounds_when_types_match() { // Given let udf = CastUDF::new(&DataType::Int32, &DataType::Int32, false); - let intervals = vec![ + let intervals = [ make_interval( &ScalarValue::Int32(Some(50)), &ScalarValue::Int32(Some(200)), diff --git a/rust/sleeper_core/src/datafusion/unalias.rs b/rust/sleeper_core/src/datafusion/unalias.rs index b747b29b17..1661a574d8 100644 --- a/rust/sleeper_core/src/datafusion/unalias.rs +++ b/rust/sleeper_core/src/datafusion/unalias.rs @@ -41,7 +41,7 @@ fn unalias(qualified_name: &str, original_schema: &SchemaRef) -> String { .iter() .find(|&&s| qualified_name.ends_with(s)) .expect("Can't find unaliased column name")) - .to_string() + .clone() } /// Unalias column names that were changed due to a [`ProjectionExec`]. diff --git a/rust/sleeper_core/src/lib.rs b/rust/sleeper_core/src/lib.rs index 3c2ab1809f..ad4cdf9b78 100644 --- a/rust/sleeper_core/src/lib.rs +++ b/rust/sleeper_core/src/lib.rs @@ -21,7 +21,10 @@ * limitations under the License. */ use crate::datafusion::{CompactionResult, LeafPartitionQuery}; +#[cfg(doc)] +use arrow::record_batch::RecordBatch; use color_eyre::eyre::Result; +use log::error; mod common_config; mod datafusion; @@ -80,7 +83,14 @@ pub async fn run_compaction(config: &CommonConfig<'_>) -> Result