diff --git a/changelog.d/2500.fixed.md b/changelog.d/2500.fixed.md new file mode 100644 index 00000000000..6659207f6f6 --- /dev/null +++ b/changelog.d/2500.fixed.md @@ -0,0 +1 @@ +Fixed SIP issue with Turbo \ No newline at end of file diff --git a/mirrord/layer/src/common.rs b/mirrord/layer/src/common.rs index 92a9b48ff80..d5dc222f3f5 100644 --- a/mirrord/layer/src/common.rs +++ b/mirrord/layer/src/common.rs @@ -4,8 +4,6 @@ use std::{ffi::CStr, fmt::Debug, path::PathBuf}; use libc::c_char; use mirrord_intproxy_protocol::{IsLayerRequest, IsLayerRequestWithResponse, MessageId}; use mirrord_protocol::file::OpenOptionsInternal; -#[cfg(target_os = "macos")] -use mirrord_sip::{MIRRORD_TEMP_BIN_DIR_CANONIC_STRING, MIRRORD_TEMP_BIN_DIR_STRING}; use tracing::warn; use crate::{ @@ -84,9 +82,14 @@ impl CheckedInto for *const c_char { #[cfg(target_os = "macos")] pub fn strip_mirrord_path(path_str: &str) -> Option<&str> { + use mirrord_sip::MIRRORD_PATCH_DIR; + + // SAFETY: We only slice after we find the string in the path + // so it must be valid + #[allow(clippy::indexing_slicing)] path_str - .strip_prefix(MIRRORD_TEMP_BIN_DIR_STRING.as_str()) - .or_else(|| path_str.strip_prefix(MIRRORD_TEMP_BIN_DIR_CANONIC_STRING.as_str())) + .find(MIRRORD_PATCH_DIR) + .map(|index| &path_str[(MIRRORD_PATCH_DIR.len() + index)..]) } impl CheckedInto for *const c_char { diff --git a/mirrord/layer/src/exec_utils.rs b/mirrord/layer/src/exec_utils.rs index 3fae927ae5c..54233a0fe9e 100644 --- a/mirrord/layer/src/exec_utils.rs +++ b/mirrord/layer/src/exec_utils.rs @@ -2,7 +2,7 @@ use std::{ env, - ffi::{c_void, CString}, + ffi::{c_void, CStr, CString}, marker::PhantomData, path::PathBuf, ptr, @@ -11,9 +11,7 @@ use std::{ use libc::{c_char, c_int, pid_t}; use mirrord_layer_macro::{hook_fn, hook_guard_fn}; -use mirrord_sip::{ - sip_patch, SipError, MIRRORD_TEMP_BIN_DIR_CANONIC_STRING, MIRRORD_TEMP_BIN_DIR_STRING, -}; +use mirrord_sip::{sip_patch, SipError, MIRRORD_PATCH_DIR}; use null_terminated::Nul; use tracing::{trace, warn}; @@ -149,10 +147,12 @@ fn intercept_tmp_dir(argv_arr: &Nul<*const c_char>) -> Detour { let arg_str: &str = arg.checked_into()?; trace!("exec arg: {arg_str}"); + // SAFETY: We only slice after we find the string in the path + // so it must be valid + #[allow(clippy::indexing_slicing)] let stripped = arg_str - .strip_prefix(MIRRORD_TEMP_BIN_DIR_STRING.as_str()) - // If /var/folders... not a prefix, check /private/var/folers... - .or(arg_str.strip_prefix(MIRRORD_TEMP_BIN_DIR_CANONIC_STRING.as_str())) + .find(MIRRORD_PATCH_DIR) + .map(|index| &arg_str[(MIRRORD_PATCH_DIR.len() + index)..]) .inspect(|original_path| { trace!( "Intercepted mirrord's temp dir in argv: {}. Replacing with original path: {}.", @@ -171,6 +171,33 @@ fn intercept_tmp_dir(argv_arr: &Nul<*const c_char>) -> Detour { Success(c_string_vec) } +/// Verifies that mirrord environment is passed to child process +fn intercept_environment(envp_arr: &Nul<*const c_char>) -> Detour { + let mut c_string_vec = Argv::default(); + + let mut found_dyld = false; + for arg in envp_arr.iter() { + let Detour::Success(arg_str): Detour<&str> = arg.checked_into() else { + tracing::debug!("Failed to convert envp argument to string. Skipping."); + unsafe { c_string_vec.0.push(CStr::from_ptr(*arg).to_owned()) }; + continue; + }; + + if arg_str.split('=').next() == Some("DYLD_INSERT_LIBRARIES") { + found_dyld = true; + } + + c_string_vec.0.push(CString::new(arg_str)?) + } + + if !found_dyld { + for (key, value) in crate::setup().env_backup() { + c_string_vec.0.push(CString::new(format!("{key}={value}"))?); + } + } + Success(c_string_vec) +} + /// Patch the new executable for SIP if necessary. Also: if mirrord's temporary directory appears /// in any of the arguments, remove it and leave only the original path of the file. If for example /// `argv[1]` is `"/tmp/mirrord-bin/bin/bash"`, create a new `argv` where `argv[1]` is @@ -178,7 +205,8 @@ fn intercept_tmp_dir(argv_arr: &Nul<*const c_char>) -> Detour { unsafe fn patch_sip_for_new_process( path: *const c_char, argv: *const *const c_char, -) -> Detour<(CString, Argv)> { + envp: *const *const c_char, +) -> Detour<(CString, Argv, Argv)> { let calling_exe = env::current_exe() .map(|path| path.to_string_lossy().to_string()) .unwrap_or_default(); @@ -196,14 +224,16 @@ unsafe fn patch_sip_for_new_process( .unwrap_or(CString::new(path_str.to_string())?); let argv_arr = Nul::new_unchecked(argv); + let envp_arr = Nul::new_unchecked(envp); let argv_vec = intercept_tmp_dir(argv_arr)?; - Success((path_c_string, argv_vec)) + let envp_vec = intercept_environment(envp_arr)?; + Success((path_c_string, argv_vec, envp_vec)) } /// Hook for `libc::execve`. /// -/// We change 2 arguments and then call the original functions: +/// We change 3 arguments and then call the original functions: /// 1. The executable path - we check it for SIP, create a patched binary and use the path to the /// new path instead of the original path. If there is no SIP, we use a new string with the same /// path. @@ -211,7 +241,10 @@ unsafe fn patch_sip_for_new_process( /// "/var/folders/1337/mirrord-bin/opt/homebrew/bin/npx" Switch it to "/opt/homebrew/bin/npx" /// Also here we create a new array with pointers to new strings, even if there are no changes /// needed (except for the case of an error). -/// +/// 3. envp - We found out that Turbopack (Vercel) spawns a clean "Node" instance without env, +/// basically stripping all of the important mirrord env. +/// https://github.com/metalbear-co/mirrord/issues/2500 +/// We restore the `DYLD_INSERT_LIBRARIES` environment variable and all env vars starting with `MIRRORD_` if the dyld var can't be found in `envp`. /// If there is an error in the detour, we don't exit or anything, we just call the original libc /// function with the original passed arguments. #[hook_guard_fn] @@ -220,13 +253,14 @@ pub(crate) unsafe extern "C" fn execve_detour( argv: *const *const c_char, envp: *const *const c_char, ) -> c_int { - match patch_sip_for_new_process(path, argv) { - Success((new_path, new_argv)) => { + match patch_sip_for_new_process(path, argv, envp) { + Success((new_path, new_argv, new_envp)) => { let new_argv = new_argv.null_vec(); + let new_envp = new_envp.null_vec(); FN_EXECVE( new_path.as_ptr(), new_argv.as_ptr() as *const *const c_char, - envp, + new_envp.as_ptr() as *const *const c_char, ) } _ => FN_EXECVE(path, argv, envp), @@ -245,16 +279,18 @@ pub(crate) unsafe extern "C" fn posix_spawn_detour( argv: *const *const c_char, envp: *const *const c_char, ) -> c_int { - match patch_sip_for_new_process(path, argv) { - Success((new_path, new_argv)) => { + match patch_sip_for_new_process(path, argv, envp) { + Success((new_path, new_argv, new_envp)) => { let new_argv = new_argv.null_vec(); + let new_envp = new_envp.null_vec(); + FN_POSIX_SPAWN( pid, new_path.as_ptr(), file_actions, attrp, new_argv.as_ptr() as *const *const c_char, - envp, + new_envp.as_ptr() as *const *const c_char, ) } _ => FN_POSIX_SPAWN(pid, path, file_actions, attrp, argv, envp), diff --git a/mirrord/layer/src/setup.rs b/mirrord/layer/src/setup.rs index 191541a47ab..471455cc054 100644 --- a/mirrord/layer/src/setup.rs +++ b/mirrord/layer/src/setup.rs @@ -33,6 +33,9 @@ pub struct LayerSetup { proxy_address: SocketAddr, incoming_mode: IncomingMode, local_hostname: bool, + // to be used on macOS to restore env on execv + #[cfg(target_os = "macos")] + env_backup: Vec<(String, String)>, } impl LayerSetup { @@ -62,6 +65,10 @@ impl LayerSetup { .expect("failed to parse internal proxy address"); let incoming_mode = IncomingMode::new(&config.feature.network.incoming); + #[cfg(target_os = "macos")] + let env_backup = std::env::vars() + .filter(|(k, _)| k.starts_with("MIRRORD_") || k == "DYLD_INSERT_LIBRARIES") + .collect(); Self { config, @@ -72,6 +79,8 @@ impl LayerSetup { proxy_address, incoming_mode, local_hostname, + #[cfg(target_os = "macos")] + env_backup, } } @@ -143,6 +152,11 @@ impl LayerSetup { pub fn local_hostname(&self) -> bool { self.local_hostname } + + #[cfg(target_os = "macos")] + pub fn env_backup(&self) -> &Vec<(String, String)> { + &self.env_backup + } } /// HTTP filter used by the layer with the `steal` feature. diff --git a/mirrord/sip/src/lib.rs b/mirrord/sip/src/lib.rs index 4e6bd8755cd..ad30278e325 100644 --- a/mirrord/sip/src/lib.rs +++ b/mirrord/sip/src/lib.rs @@ -35,7 +35,10 @@ mod main { }; /// Where patched files are stored, relative to the temp dir (`/tmp/mirrord-bin/...`). - pub const MIRRORD_PATCH_DIR: &str = "mirrord-bin"; + /// We added some random characaters to the end so we'll be able to identify dir better + /// in situations where $TMPDIR changes between exec's, leading to strip not working + /// https://github.com/metalbear-co/mirrord/issues/2500#issuecomment-2160026642 + pub const MIRRORD_PATCH_DIR: &str = "mirrord-bin-ghu3278mz"; pub const FRAMEWORKS_ENV_VAR_NAME: &str = "DYLD_FALLBACK_FRAMEWORK_PATH"; @@ -59,17 +62,6 @@ mod main { pub static MIRRORD_TEMP_BIN_DIR_STRING: Lazy = Lazy::new(|| get_temp_bin_str_prefix(&MIRRORD_TEMP_BIN_DIR_PATH_BUF)); - /// Canonicalized version of `MIRRORD_TEMP_BIN_DIR`. - pub static MIRRORD_TEMP_BIN_DIR_CANONIC_STRING: Lazy = Lazy::new(|| { - MIRRORD_TEMP_BIN_DIR_PATH_BUF - // Resolve symbolic links! (specifically /var -> private/var). - .canonicalize() - .as_deref() - .map(get_temp_bin_str_prefix) - // If canonicalization fails, we use the uncanonicalized path string. - .unwrap_or(MIRRORD_TEMP_BIN_DIR_STRING.to_string()) - }); - /// Check if a cpu subtype (already parsed with the correct endianness) is arm64e, given its /// main cpu type is arm64. We only consider the lowest byte in the check. fn is_cpu_subtype_arm64e(subtype: u32) -> bool {