From a0d950fcee2736029c78762f47feaf90bcf87ec8 Mon Sep 17 00:00:00 2001 From: Dmitry Dodzin Date: Tue, 23 Jul 2024 18:55:46 +0300 Subject: [PATCH] Fs path re-mapping (#2556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Well It remaps * Check that it's everywhere * Changelog * Update * Lint * Update * More "real world" like test case * Ops * Update Schema * Remap in sip too? * More correct logic I believe * Don't remap in sip * Update mirrord/layer/src/file/mapper.rs Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> * Update example for path remapping * Update docs once again * Linter wasn't working --------- Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- changelog.d/2068.added.md | 9 ++ mirrord-schema.json | 13 ++- mirrord/config/src/feature/fs.rs | 2 + mirrord/config/src/feature/fs/advanced.rs | 32 ++++++- mirrord/layer/src/detour.rs | 18 +++- mirrord/layer/src/file.rs | 1 + mirrord/layer/src/file/filter.rs | 10 +- mirrord/layer/src/file/hooks.rs | 109 ++++++++++------------ mirrord/layer/src/file/mapper.rs | 82 ++++++++++++++++ mirrord/layer/src/file/ops.rs | 71 ++++++++------ mirrord/layer/src/lib.rs | 1 + mirrord/layer/src/setup.rs | 10 +- 12 files changed, 256 insertions(+), 102 deletions(-) create mode 100644 changelog.d/2068.added.md create mode 100644 mirrord/layer/src/file/mapper.rs diff --git a/changelog.d/2068.added.md b/changelog.d/2068.added.md new file mode 100644 index 00000000000..02b7c3a4d10 --- /dev/null +++ b/changelog.d/2068.added.md @@ -0,0 +1,9 @@ +Add fs mapping, under `feature.fs.mapping` now it's possible to specify regex match and replace for paths while running mirrord exec. + +Example: + +```toml +[feature.fs.mapping] +"/var/app/temp" = "/tmp" # Will replace all calls to read/write/scan for "/var/app/temp/sample.txt" to "/tmp/sample.txt" +"/var/app/.cache" = "/workspace/mirrord$0" # Will replace "/var/app/.cache/sample.txt" to "/workspace/mirrord/var/app/.cache/sample.txt" see [Regex::replace](https://docs.rs/regex/latest/regex/struct.Regex.html#method.replace) +``` diff --git a/mirrord-schema.json b/mirrord-schema.json index f66dff4ab50..d184fcb7925 100644 --- a/mirrord-schema.json +++ b/mirrord-schema.json @@ -151,7 +151,7 @@ "additionalProperties": false, "definitions": { "AdvancedFsUserConfig": { - "description": "Allows the user to specify the default behavior for file operations:\n\n1. `\"read\"` - Read from the remote file system (default) 2. `\"write\"` - Read/Write from the remote file system. 3. `\"local\"` - Read from the local file system. 4. `\"localwithoverrides\"` - perform fs operation locally, unless the path matches a pre-defined or user-specified exception.\n\n> Note: by default, some paths are read locally or remotely, regardless of the selected FS mode. > This is described in further detail below.\n\nBesides the default behavior, the user can specify behavior for specific regex patterns. Case insensitive.\n\n1. `\"read_write\"` - List of patterns that should be read/write remotely. 2. `\"read_only\"` - List of patterns that should be read only remotely. 3. `\"local\"` - List of patterns that should be read locally. 4. `\"not_found\"` - List of patters that should never be read nor written. These files should be treated as non-existent.\n\nThe logic for choosing the behavior is as follows:\n\n1. Check if one of the patterns match the file path, do the corresponding action. There's no specified order if two lists match the same path, we will use the first one (and we do not guarantee what is first).\n\n**Warning**: Specifying the same path in two lists is unsupported and can lead to undefined behaviour.\n\n2. There are pre-defined exceptions to the set FS mode. 1. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs) are read locally by default. 2. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_remote_by_default.rs) are read remotely by default when the mode is `localwithoverrides`. 3. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/not_found_by_default.rs) under the running user's home directory will not be found by the application when the mode is not `local`.\n\nIn order to override that default setting for a path, or a pattern, include it the appropriate pattern set from above. E.g. in order to read files under `/etc/` remotely even though it is covered by [the set of patterns that are read locally by default](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs), add `\"^/etc/.\"` to the `read_only` set.\n\n3. If none of the above match, use the default behavior (mode).\n\nFor more information, check the file operations [technical reference](https://mirrord.dev/docs/reference/fileops/).\n\n```json { \"feature\": { \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\\\.json\" , \"read_only\": [ \".+\\\\.yaml\", \".+important-file\\\\.txt\" ], \"local\": [ \".+\\\\.js\", \".+\\\\.mjs\" ], \"not_found\": [ \"\\\\.config/gcloud\" ] } } } ```", + "description": "Allows the user to specify the default behavior for file operations:\n\n1. `\"read\"` - Read from the remote file system (default) 2. `\"write\"` - Read/Write from the remote file system. 3. `\"local\"` - Read from the local file system. 4. `\"localwithoverrides\"` - perform fs operation locally, unless the path matches a pre-defined or user-specified exception.\n\n> Note: by default, some paths are read locally or remotely, regardless of the selected FS mode. > This is described in further detail below.\n\nBesides the default behavior, the user can specify behavior for specific regex patterns. Case insensitive.\n\n1. `\"read_write\"` - List of patterns that should be read/write remotely. 2. `\"read_only\"` - List of patterns that should be read only remotely. 3. `\"local\"` - List of patterns that should be read locally. 4. `\"not_found\"` - List of patters that should never be read nor written. These files should be treated as non-existent. 4. `\"mapping\"` - Map of patterns and their corresponding replacers. The replacement happens before any specific behavior as defined above or mode (uses [`Regex::replace`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.replace))\n\nThe logic for choosing the behavior is as follows:\n\n1. Check agains \"mapping\" if path needs to be replaced, if matched then continue to next step with new path after replacements otherwise continue as usual. 2. Check if one of the patterns match the file path, do the corresponding action. There's no specified order if two lists match the same path, we will use the first one (and we do not guarantee what is first).\n\n**Warning**: Specifying the same path in two lists is unsupported and can lead to undefined behaviour.\n\n3. There are pre-defined exceptions to the set FS mode. 1. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs) are read locally by default. 2. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_remote_by_default.rs) are read remotely by default when the mode is `localwithoverrides`. 3. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/not_found_by_default.rs) under the running user's home directory will not be found by the application when the mode is not `local`.\n\nIn order to override that default setting for a path, or a pattern, include it the appropriate pattern set from above. E.g. in order to read files under `/etc/` remotely even though it is covered by [the set of patterns that are read locally by default](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs), add `\"^/etc/.\"` to the `read_only` set.\n\n4. If none of the above match, use the default behavior (mode).\n\nFor more information, check the file operations [technical reference](https://mirrord.dev/docs/reference/fileops/).\n\n```json { \"feature\": { \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\\\.json\" , \"read_only\": [ \".+\\\\.yaml\", \".+important-file\\\\.txt\" ], \"local\": [ \".+\\\\.js\", \".+\\\\.mjs\" ], \"not_found\": [ \"\\\\.config/gcloud\" ] } } } ```", "type": "object", "properties": { "local": { @@ -166,6 +166,17 @@ } ] }, + "mapping": { + "title": "feature.fs.mapping {#feature-fs-mapping}", + "description": "Specify map of patterns that if matched will replace the path according to specification.\n\n*Capture groups are allowed.*\n\nExample: ```json { \"^/home/(?\\S+)/dev/tomcat\": \"/etc/tomcat\" \"^/home/(?\\S+)/dev/config/(?\\S+)\": \"/mnt/configs/${user}-$app\" } ``` Will do the next replacements for any io operaton\n\n`/home/johndoe/dev/tomcat/context.xml` => `/etc/tomcat/context.xml` `/home/johndoe/dev/config/api/app.conf` => `/mnt/configs/johndoe-api/app.conf`", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, "mode": { "title": "feature.fs.mode {#feature-fs-mode}", "anyOf": [ diff --git a/mirrord/config/src/feature/fs.rs b/mirrord/config/src/feature/fs.rs index 415cf8230fa..29a8f66f7af 100644 --- a/mirrord/config/src/feature/fs.rs +++ b/mirrord/config/src/feature/fs.rs @@ -90,6 +90,7 @@ impl MirrordConfig for FsUserConfig { .source_value(context) .transpose()?, not_found: None, + mapping: None, }, FsUserConfig::Advanced(advanced) => advanced.generate_config(context)?, }; @@ -117,6 +118,7 @@ impl MirrordToggleableConfig for FsUserConfig { read_only, local, not_found: None, + mapping: None, }) } } diff --git a/mirrord/config/src/feature/fs/advanced.rs b/mirrord/config/src/feature/fs/advanced.rs index 43f49205635..3ac46621d47 100644 --- a/mirrord/config/src/feature/fs/advanced.rs +++ b/mirrord/config/src/feature/fs/advanced.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use mirrord_analytics::{AnalyticValue, CollectAnalytics}; use mirrord_config_derive::MirrordConfig; use schemars::JsonSchema; @@ -31,17 +33,21 @@ use crate::{ /// 3. `"local"` - List of patterns that should be read locally. /// 4. `"not_found"` - List of patters that should never be read nor written. These files should be /// treated as non-existent. +/// 4. `"mapping"` - Map of patterns and their corresponding replacers. The replacement happens before any specific behavior as defined above or mode (uses [`Regex::replace`](https://docs.rs/regex/latest/regex/struct.Regex.html#method.replace)) /// /// The logic for choosing the behavior is as follows: /// -/// 1. Check if one of the patterns match the file path, do the corresponding action. There's +/// +/// 1. Check agains "mapping" if path needs to be replaced, if matched then continue to next step +/// with new path after replacements otherwise continue as usual. +/// 2. Check if one of the patterns match the file path, do the corresponding action. There's /// no specified order if two lists match the same path, we will use the first one (and we /// do not guarantee what is first). /// /// **Warning**: Specifying the same path in two lists is unsupported and can lead to undefined /// behaviour. /// -/// 2. There are pre-defined exceptions to the set FS mode. +/// 3. There are pre-defined exceptions to the set FS mode. /// 1. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs) /// are read locally by default. /// 2. Paths that match [the patterns defined here](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_remote_by_default.rs) @@ -55,7 +61,7 @@ use crate::{ /// though it is covered by [the set of patterns that are read locally by default](https://github.com/metalbear-co/mirrord/tree/latest/mirrord/layer/src/file/filter/read_local_by_default.rs), /// add `"^/etc/."` to the `read_only` set. /// -/// 3. If none of the above match, use the default behavior (mode). +/// 4. If none of the above match, use the default behavior (mode). /// /// For more information, check the file operations /// [technical reference](https://mirrord.dev/docs/reference/fileops/). @@ -106,6 +112,25 @@ pub struct FsConfig { /// /// Specify file path patterns that if matched will be treated as non-existent. pub not_found: Option>, + + /// ### feature.fs.mapping {#feature-fs-mapping} + /// + /// Specify map of patterns that if matched will replace the path according to specification. + /// + /// *Capture groups are allowed.* + /// + /// Example: + /// ```json + /// { + /// "^/home/(?\S+)/dev/tomcat": "/etc/tomcat" + /// "^/home/(?\S+)/dev/config/(?\S+)": "/mnt/configs/${user}-$app" + /// } + /// ``` + /// Will do the next replacements for any io operaton + /// + /// `/home/johndoe/dev/tomcat/context.xml` => `/etc/tomcat/context.xml` + /// `/home/johndoe/dev/config/api/app.conf` => `/mnt/configs/johndoe-api/app.conf` + pub mapping: Option>, } impl MirrordToggleableConfig for AdvancedFsUserConfig { @@ -127,6 +152,7 @@ impl MirrordToggleableConfig for AdvancedFsUserConfig { read_only, local, not_found: None, + mapping: None, }) } } diff --git a/mirrord/layer/src/detour.rs b/mirrord/layer/src/detour.rs index 4fbc723ad14..b4f12c1c0f9 100644 --- a/mirrord/layer/src/detour.rs +++ b/mirrord/layer/src/detour.rs @@ -9,7 +9,9 @@ use core::{ convert, ops::{FromResidual, Residual, Try}, }; -use std::{cell::RefCell, ops::Deref, os::unix::prelude::*, path::PathBuf, sync::OnceLock}; +use std::{ + cell::RefCell, ffi::CString, ops::Deref, os::unix::prelude::*, path::PathBuf, sync::OnceLock, +}; #[cfg(target_os = "macos")] use libc::c_char; @@ -150,10 +152,10 @@ pub(crate) enum Bypass { FileOperationInMirrordBinTempDir(*const c_char), /// File [`PathBuf`] should be ignored (used for tests). - IgnoredFile(PathBuf), + IgnoredFile(CString), /// Some operations only handle absolute [`PathBuf`]s. - RelativePath(PathBuf), + RelativePath(CString), /// Started mirrord with [`FsModeConfig`](mirrord_config::feature::fs::mode::FsModeConfig) set /// to [`FsModeConfig::Read`](mirrord_config::feature::fs::FsModeConfig::Read), but @@ -209,6 +211,16 @@ pub(crate) enum Bypass { LocalDns, } +impl Bypass { + pub fn relative_path(path: impl Into>) -> Self { + Bypass::RelativePath(CString::new(path).expect("Should be CStringable")) + } + + pub fn ignored_file(path: impl Into>) -> Self { + Bypass::IgnoredFile(CString::new(path).expect("Should be CStringable")) + } +} + /// [`ControlFlow`](std::ops::ControlFlow)-like enum to be used by hooks. /// /// Conversion from `Result`: diff --git a/mirrord/layer/src/file.rs b/mirrord/layer/src/file.rs index 39f1dd454bb..14b8181f32c 100644 --- a/mirrord/layer/src/file.rs +++ b/mirrord/layer/src/file.rs @@ -24,6 +24,7 @@ use mirrord_protocol::file::{GetDEnts64Request, GetDEnts64Response}; pub(crate) mod filter; pub(crate) mod hooks; +pub(crate) mod mapper; pub(crate) mod open_dirs; pub(crate) mod ops; diff --git a/mirrord/layer/src/file/filter.rs b/mirrord/layer/src/file/filter.rs index 9877a4b3433..7bff8a8b175 100644 --- a/mirrord/layer/src/file/filter.rs +++ b/mirrord/layer/src/file/filter.rs @@ -98,6 +98,7 @@ impl FileFilter { local, mode, not_found, + .. } = fs_config; let read_write = @@ -399,12 +400,12 @@ mod tests { local, not_found, mode, + mapping: None, }; let file_filter = FileFilter::new(fs_config); - let res = - file_filter.continue_or_bypass_with(path, write, || Bypass::IgnoredFile("".into())); + let res = file_filter.continue_or_bypass_with(path, write, || Bypass::ignored_file("")); println!("filter result: {res:?}"); assert_eq!(res.kind(), expected); } @@ -439,8 +440,7 @@ mod tests { let file_filter = FileFilter::new(fs_config); - let res = - file_filter.continue_or_bypass_with(path, write, || Bypass::IgnoredFile("".into())); + let res = file_filter.continue_or_bypass_with(path, write, || Bypass::ignored_file("")); println!("filter result: {res:?}"); assert_eq!(res.kind(), expected); @@ -464,7 +464,7 @@ mod tests { #[case("/root/.nuget/packages/microsoft.azure.amqp", DetourKind::Success)] fn not_found_set(#[case] path: &str, #[case] expected: DetourKind) { let filter = FileFilter::new(Default::default()); - let res = filter.continue_or_bypass_with(path, false, || Bypass::IgnoredFile("".into())); + let res = filter.continue_or_bypass_with(path, false, || Bypass::ignored_file("")); println!("filter result: {res:?}"); assert_eq!(res.kind(), expected); diff --git a/mirrord/layer/src/file/hooks.rs b/mirrord/layer/src/file/hooks.rs index 0dda0022870..0867109fb9d 100644 --- a/mirrord/layer/src/file/hooks.rs +++ b/mirrord/layer/src/file/hooks.rs @@ -31,14 +31,12 @@ use tracing::trace; use tracing::{error, info, warn}; use super::{open_dirs, ops::*, OpenOptionsInternalExt}; -#[cfg(target_os = "macos")] -use crate::detour::Bypass; #[cfg(target_os = "linux")] use crate::error::HookError::ResponseError; use crate::{ close_layer_fd, common::CheckedInto, - detour::{Detour, DetourGuard}, + detour::{Bypass, Detour, DetourGuard}, error::HookError, file::{ open_dirs::OPEN_DIRS, @@ -55,14 +53,15 @@ type stat64 = stat; /// Take the original raw c_char pointer and a resulting bypass, and either the original pointer or /// a different one according to the bypass. /// We pass reference to bypass to make sure the bypass lives with the pointer. -#[cfg(target_os = "macos")] -fn update_ptr_from_bypass(ptr: *const c_char, bypass: Bypass) -> *const c_char { +fn update_ptr_from_bypass(ptr: *const c_char, bypass: &Bypass) -> *const c_char { match bypass { // For some reason, the program is trying to carry out an operation on a path that is // inside mirrord's temp bin dir. The detour has returned us the original path of the file // (stripped mirrord's dir path), so now we carry out the operation locally, on the stripped // path. - Bypass::FileOperationInMirrordBinTempDir(stripped_ptr) => stripped_ptr, + #[cfg(target_os = "macos")] + Bypass::FileOperationInMirrordBinTempDir(stripped_ptr) => *stripped_ptr, + Bypass::RelativePath(path) | Bypass::IgnoredFile(path) => path.as_ptr(), _ => ptr, } } @@ -94,9 +93,8 @@ pub(super) unsafe extern "C" fn open_detour( if guard.is_none() { FN_OPEN(raw_path, open_flags, mode) } else { - open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_OPEN(raw_path, open_flags, mode) }) } @@ -117,9 +115,8 @@ pub(super) unsafe extern "C" fn open64_detour( if guard.is_none() { FN_OPEN64(raw_path, open_flags, mode) } else { - open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_OPEN64(raw_path, open_flags, mode) }) } @@ -137,9 +134,8 @@ pub(super) unsafe extern "C" fn open_nocancel_detour( if guard.is_none() { FN_OPEN_NOCANCEL(raw_path, open_flags, mode) } else { - open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + open_logic(raw_path, open_flags, mode).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_OPEN_NOCANCEL(raw_path, open_flags, mode) }) } @@ -164,7 +160,10 @@ pub(super) unsafe extern "C" fn opendir_detour(raw_filename: *const c_char) -> u Detour::Error(fail) } }) - .unwrap_or_bypass_with(|_| opendir_bypass(raw_filename)) + .unwrap_or_bypass_with(|bypass| { + let raw_filename = update_ptr_from_bypass(raw_filename, &bypass); + opendir_bypass(raw_filename) + }) } /// see below, to have nice code we also implement it for other archs. @@ -312,9 +311,8 @@ pub(crate) unsafe extern "C" fn openat_detour( ) -> RawFd { let open_options = OpenOptionsInternalExt::from_flags(open_flags); - openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_OPENAT(fd, raw_path, open_flags) }) } @@ -332,9 +330,8 @@ pub(crate) unsafe extern "C" fn openat64_detour( ) -> RawFd { let open_options = OpenOptionsInternalExt::from_flags(open_flags); - openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_OPENAT64(fd, raw_path, open_flags) }) } @@ -347,9 +344,8 @@ pub(crate) unsafe extern "C" fn _openat_nocancel_detour( ) -> RawFd { let open_options = OpenOptionsInternalExt::from_flags(open_flags); - openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + openat(fd, raw_path.checked_into(), open_options).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN__OPENAT_NOCANCEL(fd, raw_path, open_flags) }) } @@ -657,9 +653,8 @@ pub(crate) unsafe extern "C" fn _write_nocancel_detour( /// Implementation of access_detour, used in access_detour and faccessat_detour unsafe fn access_logic(raw_path: *const c_char, mode: c_int) -> c_int { - access(raw_path.checked_into(), mode as u8).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + access(raw_path.checked_into(), mode as u8).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_ACCESS(raw_path, mode) }) } @@ -771,9 +766,8 @@ fn stat_logic( #[hook_guard_fn] unsafe extern "C" fn lstat_detour(raw_path: *const c_char, out_stat: *mut stat) -> c_int { stat_logic::(0, None, Some(raw_path), out_stat as *mut _).unwrap_or_bypass_with( - |_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + |bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_LSTAT(raw_path, out_stat) }, ) @@ -790,9 +784,8 @@ pub(crate) unsafe extern "C" fn fstat_detour(fd: RawFd, out_stat: *mut stat) -> #[hook_guard_fn] unsafe extern "C" fn stat_detour(raw_path: *const c_char, out_stat: *mut stat) -> c_int { stat_logic::(0, None, Some(raw_path), out_stat as *mut _).unwrap_or_bypass_with( - |_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + |bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_STAT(raw_path, out_stat) }, ) @@ -808,8 +801,10 @@ unsafe extern "C" fn statx_detour( mask: c_int, statx_buf: *mut statx, ) -> c_int { - statx_logic(dir_fd, path_name, flags, mask, statx_buf) - .unwrap_or_bypass_with(|_bypass| FN_STATX(dir_fd, path_name, flags, mask, statx_buf)) + statx_logic(dir_fd, path_name, flags, mask, statx_buf).unwrap_or_bypass_with(|bypass| { + let path_name = update_ptr_from_bypass(path_name, &bypass); + FN_STATX(dir_fd, path_name, flags, mask, statx_buf) + }) } /// Hook for libc's stat syscall wrapper. @@ -820,9 +815,8 @@ pub(crate) unsafe extern "C" fn __xstat_detour( out_stat: *mut stat, ) -> c_int { stat_logic::(ver, None, Some(raw_path), out_stat as *mut _).unwrap_or_bypass_with( - |_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + |bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN___XSTAT(ver, raw_path, out_stat) }, ) @@ -836,9 +830,8 @@ pub(crate) unsafe extern "C" fn __lxstat_detour( out_stat: *mut stat, ) -> c_int { stat_logic::(ver, None, Some(raw_path), out_stat as *mut _).unwrap_or_bypass_with( - |_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + |bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN___LXSTAT(ver, raw_path, out_stat) }, ) @@ -851,9 +844,8 @@ pub(crate) unsafe extern "C" fn __xstat64_detour( raw_path: *const c_char, out_stat: *mut stat64, ) -> c_int { - stat_logic::(ver, None, Some(raw_path), out_stat).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + stat_logic::(ver, None, Some(raw_path), out_stat).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN___XSTAT64(ver, raw_path, out_stat) }) } @@ -865,9 +857,8 @@ pub(crate) unsafe extern "C" fn __lxstat64_detour( raw_path: *const c_char, out_stat: *mut stat64, ) -> c_int { - stat_logic::(ver, None, Some(raw_path), out_stat).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + stat_logic::(ver, None, Some(raw_path), out_stat).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN___LXSTAT64(ver, raw_path, out_stat) }) } @@ -899,9 +890,8 @@ unsafe extern "C" fn fstatat_detour( out_stat: *mut stat, flag: c_int, ) -> c_int { - fstatat_logic(fd, raw_path, out_stat, flag).unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + fstatat_logic(fd, raw_path, out_stat, flag).unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_FSTATAT(fd, raw_path, out_stat, flag) }) } @@ -953,8 +943,10 @@ unsafe extern "C" fn realpath_detour( source_path: *const c_char, output_path: *mut c_char, ) -> *mut c_char { - realpath_logic(source_path, output_path) - .unwrap_or_bypass_with(|_| FN_REALPATH(source_path, output_path)) + realpath_logic(source_path, output_path).unwrap_or_bypass_with(|bypass| { + let source_path = update_ptr_from_bypass(source_path, &bypass); + FN_REALPATH(source_path, output_path) + }) } #[hook_guard_fn] @@ -962,8 +954,10 @@ unsafe extern "C" fn realpath_darwin_extsn_detour( source_path: *const c_char, output_path: *mut c_char, ) -> *mut c_char { - realpath_logic(source_path, output_path) - .unwrap_or_bypass_with(|_| FN_REALPATH_DARWIN_EXTSN(source_path, output_path)) + realpath_logic(source_path, output_path).unwrap_or_bypass_with(|bypass| { + let source_path = update_ptr_from_bypass(source_path, &bypass); + FN_REALPATH_DARWIN_EXTSN(source_path, output_path) + }) } fn vec_to_iovec(bytes: &[u8], iovecs: &[iovec]) { @@ -1054,9 +1048,8 @@ pub(crate) unsafe extern "C" fn readlink_detour( ssize_t::try_from(path_bytes.len().min(buffer_size)).unwrap() }) - .unwrap_or_bypass_with(|_bypass| { - #[cfg(target_os = "macos")] - let raw_path = update_ptr_from_bypass(raw_path, _bypass); + .unwrap_or_bypass_with(|bypass| { + let raw_path = update_ptr_from_bypass(raw_path, &bypass); FN_READLINK(raw_path, out_buffer, buffer_size) }) } diff --git a/mirrord/layer/src/file/mapper.rs b/mirrord/layer/src/file/mapper.rs new file mode 100644 index 00000000000..d4a59bd99d8 --- /dev/null +++ b/mirrord/layer/src/file/mapper.rs @@ -0,0 +1,82 @@ +use std::{borrow::Cow, collections::HashMap, path::PathBuf}; + +use regex::{Regex, RegexSet, RegexSetBuilder}; + +#[derive(Debug)] +pub struct FileRemapper { + filter: RegexSet, + mapping: Vec<(Regex, String)>, +} + +impl FileRemapper { + pub fn new(mapping: HashMap) -> Self { + let filter = RegexSetBuilder::new(mapping.keys()) + .case_insensitive(true) + .build() + .expect("Building path mapping regex set failed"); + let mapping = mapping + .into_iter() + .map(|(pattern, value)| { + ( + Regex::new(&pattern).expect("Building path mapping regex failed"), + value, + ) + }) + .collect(); + + FileRemapper { filter, mapping } + } + + #[tracing::instrument(level = "trace", skip(self), ret)] + pub fn change_path_str<'p>(&self, path_str: &'p str) -> Cow<'p, str> { + let matches = self.filter.matches(path_str); + + if let Some(index) = matches.iter().next() { + let (pattern, value) = self + .mapping + .get(index) + .expect("RegexSet matches returned an impossible index"); + + pattern.replace(path_str, value) + } else { + Cow::Borrowed(path_str) + } + } + + pub fn change_path(&self, path: PathBuf) -> PathBuf { + let path_str = path.to_str().unwrap_or_default(); + + match self.change_path_str(path_str) { + Cow::Borrowed(borrowed_path) if borrowed_path == path_str => path, + updated_path => PathBuf::from(updated_path.as_ref()), + } + } +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + + use super::*; + + fn test_mapping() -> HashMap { + [ + ("/foo".to_string(), "/bar".to_string()), + ("/(baz)".to_string(), "/tmp/mirrord-$1".to_string()), + ("^/Users/(?.+)/Library/Caches/JetBrains/(?.+)/tomcat/(?.+)/static/manifest.xml".to_string(), "/opt/tomcat/static/manifest.xml".to_string()) + ] + .into() + } + + #[rstest] + #[case("/app/test", "/app/test")] + #[case("/foo/test", "/bar/test")] + #[case("/baz/test", "/tmp/mirrord-baz/test")] + #[case("/Users/john-doe/Library/Caches/JetBrains/IntelliJIdea2023.3/tomcat/6902e44a-a069-433d-ab49-5b46477acb97/static/manifest.xml", "/opt/tomcat/static/manifest.xml")] + #[case("/Users/john-doe/Library/Caches/JetBrains/IntelliJIdea2023.3/tomcat/6902e44a-a069-433d-ab49-5b46477acb97/static/index.html", "/Users/john-doe/Library/Caches/JetBrains/IntelliJIdea2023.3/tomcat/6902e44a-a069-433d-ab49-5b46477acb97/static/index.html")] + fn simple_mapping(#[case] input: PathBuf, #[case] expect: PathBuf) { + let remapper = FileRemapper::new(test_mapping()); + + assert_eq!(remapper.change_path(input), expect); + } +} diff --git a/mirrord/layer/src/file/ops.rs b/mirrord/layer/src/file/ops.rs index 789fc076eea..f4e5f1e43e0 100644 --- a/mirrord/layer/src/file/ops.rs +++ b/mirrord/layer/src/file/ops.rs @@ -35,14 +35,28 @@ const MAX_READ_SIZE: u64 = 1024 * 1024; /// * `write` - [`bool`], stating whether the file is accessed for writing macro_rules! ensure_not_ignored { ($path:expr, $write:expr) => { - crate::setup().file_filter().continue_or_bypass_with( + $crate::setup().file_filter().continue_or_bypass_with( $path.to_str().unwrap_or_default(), $write, - || Bypass::IgnoredFile($path.clone()), + || Bypass::ignored_file($path.to_str().unwrap_or_default()), )?; }; } +macro_rules! check_relative_paths { + ($path:expr) => { + if $path.is_relative() { + Detour::Bypass(Bypass::relative_path($path.to_str().unwrap_or_default()))? + }; + }; +} + +macro_rules! remap_path { + ($path:expr) => { + $crate::setup().file_remapper().change_path($path) + }; +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub(crate) struct RemoteFile { pub fd: u64, @@ -161,10 +175,9 @@ fn close_remote_file_on_failure(fd: u64) -> Result<()> { pub(crate) fn open(path: Detour, open_options: OpenOptionsInternal) -> Detour { let path = path?; - if path.is_relative() { - // Calls with non absolute paths are sent to libc::open. - Detour::Bypass(Bypass::RelativePath(path.clone()))? - }; + check_relative_paths!(path); + + let path = remap_path!(path); ensure_not_ignored!(path, open_options.is_write()); @@ -217,6 +230,7 @@ pub(crate) fn openat( // `openat` behaves the same as `open` when the path is absolute. When called with AT_FDCWD, the // call is propagated to `open`. if path.is_absolute() || fd == AT_FDCWD { + let path = remap_path!(path); open(Detour::Success(path), open_options) } else { // Relative path requires special handling, we must identify the relative part (relative to @@ -282,12 +296,9 @@ pub(crate) fn pread(local_fd: RawFd, buffer_size: u64, offset: u64) -> Detour) -> Detour { if crate::setup().experimental().readlink { - let path = path?; + let path = remap_path!(path?); - if path.is_relative() { - // Calls with non absolute paths are sent to libc::readlink. - Detour::Bypass(Bypass::RelativePath(path.clone()))? - }; + check_relative_paths!(path); ensure_not_ignored!(path, false); @@ -361,10 +372,9 @@ pub(crate) fn write(local_fd: RawFd, write_bytes: Option>) -> Detour, mode: u8) -> Detour { let path = path?; - if path.is_relative() { - // Calls with non absolute paths are sent to libc::open. - Detour::Bypass(Bypass::RelativePath(path.clone()))? - }; + check_relative_paths!(path); + + let path = remap_path!(path); ensure_not_ignored!(path, false); @@ -404,13 +414,10 @@ pub(crate) fn xstat( let path = path?; let fd = { if fd == AT_FDCWD { - if path.is_relative() { - // Calls with non absolute paths are sent to libc::fstatat. - return Detour::Bypass(Bypass::RelativePath(path)); - } else { - ensure_not_ignored!(path, false); - None - } + check_relative_paths!(path); + + ensure_not_ignored!(remap_path!(path.clone()), false); + None } else { Some(get_remote_fd(fd)?) } @@ -420,10 +427,11 @@ pub(crate) fn xstat( // lstat/stat (Some(path), None) => { let path = path?; - if path.is_relative() { - // Calls with non absolute paths are sent to libc::open. - return Detour::Bypass(Bypass::RelativePath(path)); - } + + check_relative_paths!(path); + + let path = remap_path!(path); + ensure_not_ignored!(path, false); (Some(path), None) } @@ -484,7 +492,9 @@ pub(crate) fn statx_logic( ensure_not_ignored!(path_name, false); (None, Some(path_name)) } else if !path_name.as_os_str().is_empty() && dir_fd == libc::AT_FDCWD { - return Detour::Bypass(Bypass::RelativePath(path_name)); + return Detour::Bypass(Bypass::relative_path( + path_name.to_str().unwrap_or_default(), + )); } else if !path_name.as_os_str().is_empty() { (Some(get_remote_fd(dir_fd)?), Some(path_name)) } else if (flags & libc::AT_EMPTY_PATH) != 0 { @@ -615,10 +625,9 @@ fn absolute_path(path: PathBuf) -> PathBuf { pub(crate) fn realpath(path: Detour) -> Detour { let path = path?; - if path.is_relative() { - // Calls with non absolute paths are sent to libc::open. - Detour::Bypass(Bypass::RelativePath(path.clone()))? - }; + check_relative_paths!(path); + + let path = remap_path!(path); let realpath = absolute_path(path); diff --git a/mirrord/layer/src/lib.rs b/mirrord/layer/src/lib.rs index 51d053a20a7..06e58cdfd57 100644 --- a/mirrord/layer/src/lib.rs +++ b/mirrord/layer/src/lib.rs @@ -452,6 +452,7 @@ fn sip_only_layer_start(mut config: LayerConfig, patch_binaries: Vec) { read_only: None, local: None, not_found: None, + mapping: None, }; let debugger_ports = DebuggerPorts::from_env(); let setup = LayerSetup::new(config, debugger_ports, true); diff --git a/mirrord/layer/src/setup.rs b/mirrord/layer/src/setup.rs index f34638ffee3..eafca7b931e 100644 --- a/mirrord/layer/src/setup.rs +++ b/mirrord/layer/src/setup.rs @@ -19,7 +19,7 @@ use regex::RegexSet; use crate::{ debugger_ports::DebuggerPorts, - file::filter::FileFilter, + file::{filter::FileFilter, mapper::FileRemapper}, socket::{dns_selector::DnsSelector, OutgoingSelector}, }; @@ -30,6 +30,7 @@ use crate::{ pub struct LayerSetup { config: LayerConfig, file_filter: FileFilter, + file_remapper: FileRemapper, debugger_ports: DebuggerPorts, remote_unix_streams: RegexSet, outgoing_selector: OutgoingSelector, @@ -45,6 +46,8 @@ pub struct LayerSetup { impl LayerSetup { pub fn new(config: LayerConfig, debugger_ports: DebuggerPorts, local_hostname: bool) -> Self { let file_filter = FileFilter::new(config.feature.fs.clone()); + let file_remapper = + FileRemapper::new(config.feature.fs.mapping.clone().unwrap_or_default()); let remote_unix_streams = config .feature @@ -77,6 +80,7 @@ impl LayerSetup { Self { config, file_filter, + file_remapper, debugger_ports, remote_unix_streams, outgoing_selector, @@ -101,6 +105,10 @@ impl LayerSetup { &self.file_filter } + pub fn file_remapper(&self) -> &FileRemapper { + &self.file_remapper + } + pub fn incoming_config(&self) -> &IncomingConfig { &self.config.feature.network.incoming }