Skip to content

Commit

Permalink
Fix execve (#2628)
Browse files Browse the repository at this point in the history
* ..

* maybe fix execve

* ..

* format

* ..

* ..

* fix locks

* Apply suggestions from code review

Co-authored-by: Dmitry Dodzin <[email protected]>

---------

Co-authored-by: Dmitry Dodzin <[email protected]>
  • Loading branch information
aviramha and DmitryDodzin authored Aug 5, 2024
1 parent a833437 commit 2071535
Show file tree
Hide file tree
Showing 13 changed files with 583 additions and 513 deletions.
818 changes: 420 additions & 398 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions changelog.d/2624.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix execve, make hooks on Linux enabled by default (fix data race on process initialization, might fix more stuff)
2 changes: 1 addition & 1 deletion mirrord/config/src/experimental.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub struct ExperimentalConfig {
/// Enables exec hooks on Linux. Enable Linux hooks can fix issues when the application
/// shares sockets with child commands (e.g Python web servers with reload),
/// but the feature is not stable and may cause other issues.
#[config(default = false)]
#[config(default = true)]
pub enable_exec_hooks_linux: bool,
}

Expand Down
1 change: 0 additions & 1 deletion mirrord/layer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ itertools = "0.13"
os_info = "3"
bytemuck = "1"
bimap.workspace = true
dashmap = "5.4"
hashbrown = "0.14"
exec.workspace = true
syscalls = { version = "0.6", features = ["full"] }
Expand Down
7 changes: 5 additions & 2 deletions mirrord/layer/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::{
error::{HookError, HookResult},
exec_hooks::Argv,
file::OpenOptionsInternalExt,
socket::SHARED_SOCKETS_ENV_VAR,
PROXY_CONNECTION,
};

Expand Down Expand Up @@ -118,8 +119,9 @@ impl CheckedInto<PathBuf> for *const c_char {
}
}

// **Warning**: The implementation here expects that `*const *const c_char` be a valid,
// null-terminated list! We're using `Nul::new_unchecked`, which doesn't check for this.
/// **Warning**: The implementation here expects that `*const *const c_char` be a valid,
/// null-terminated list! We're using `Nul::new_unchecked`, which doesn't check for this.
/// NOTE: It also strips shared sockets to avoid it being double set.
impl CheckedInto<Argv> for *const *const c_char {
fn checked_into(self) -> Detour<Argv> {
let c_list = self
Expand All @@ -132,6 +134,7 @@ impl CheckedInto<Argv> for *const *const c_char {
// Remove the last `null` pointer.
.filter(|value| !value.is_null())
.map(|value| unsafe { CStr::from_ptr(*value) }.to_owned())
.filter(|value| !value.to_string_lossy().starts_with(SHARED_SOCKETS_ENV_VAR))
.collect::<Argv>();

Detour::Success(list)
Expand Down
36 changes: 16 additions & 20 deletions mirrord/layer/src/exec_hooks/hooks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use mirrord_layer_macro::hook_fn;
use mirrord_layer_macro::hook_guard_fn;

use super::*;
#[cfg(not(target_os = "macos"))]
use crate::common::CheckedInto;
#[cfg(target_os = "macos")]
use crate::exec_utils::*;
use crate::{
Expand All @@ -20,11 +22,14 @@ use crate::{

/// Converts the [`SOCKETS`] map into a vector of pairs `(Fd, UserSocket)`, so we can rebuild
/// it as a map.
fn shared_sockets() -> Vec<(i32, UserSocket)> {
SOCKETS
.iter()
.map(|inner| (*inner.key(), UserSocket::clone(inner.value())))
.collect::<Vec<_>>()
fn shared_sockets() -> Detour<Vec<(i32, UserSocket)>> {
Detour::Success(
SOCKETS
.lock()?
.iter()
.map(|(key, value)| (*key, value.as_ref().clone()))
.collect::<Vec<_>>(),
)
}

/// Takes an [`Argv`] with the enviroment variables from an `exec` call, extending it with
Expand All @@ -38,7 +43,7 @@ pub(crate) fn prepare_execve_envp(env_vars: Detour<Argv>) -> Detour<Argv> {
other => Detour::Bypass(other),
})?;

let encoded = bincode::encode_to_vec(shared_sockets(), bincode::config::standard())
let encoded = bincode::encode_to_vec(shared_sockets()?, bincode::config::standard())
.map(|bytes| BASE64_URL_SAFE.encode(bytes))?;

env_vars.push(CString::new(format!("{SHARED_SOCKETS_ENV_VAR}={encoded}"))?);
Expand All @@ -62,16 +67,12 @@ unsafe fn environ() -> *const *const c_char {
#[cfg(not(target_os = "macos"))]
#[hook_fn]
unsafe extern "C" fn execv_detour(path: *const c_char, argv: *const *const c_char) -> c_int {
let encoded = bincode::encode_to_vec(shared_sockets(), bincode::config::standard())
.map(|bytes| BASE64_URL_SAFE.encode(bytes))
.unwrap_or_default();

// `encoded` is emtpy if the encoding failed, so we don't set the env var.
if !encoded.is_empty() {
std::env::set_var(SHARED_SOCKETS_ENV_VAR, encoded);
let envp = environ();
if let Detour::Success(envp) = prepare_execve_envp(envp.checked_into()) {
FN_EXECVE(path, argv, envp.leak())
} else {
FN_EXECVE(path, argv, envp)
}

FN_EXECVE(path, argv, environ())
}

/// Hook for `libc::execve`.
Expand All @@ -84,11 +85,6 @@ pub(crate) unsafe extern "C" fn execve_detour(
argv: *const *const c_char,
envp: *const *const c_char,
) -> c_int {
use crate::{common::CheckedInto, detour::DetourGuard};

let _guard = DetourGuard::new();

// Hopefully `envp` is a properly null-terminated list.
if let Detour::Success(envp) = prepare_execve_envp(envp.checked_into()) {
FN_EXECVE(path, argv, envp.leak())
} else {
Expand Down
8 changes: 4 additions & 4 deletions mirrord/layer/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::{
collections::HashMap,
os::unix::io::RawFd,
sync::{Arc, LazyLock},
sync::{Arc, LazyLock, Mutex},
};

use dashmap::DashMap;
use libc::{c_int, O_ACCMODE, O_APPEND, O_CREAT, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY};
use mirrord_protocol::file::{
AccessFileRequest, CloseFileRequest, FdOpenDirRequest, OpenDirResponse, OpenOptionsInternal,
Expand Down Expand Up @@ -36,8 +36,8 @@ type DirStreamFd = usize;
/// We use Arc so we can support dup more nicely, this means that if user
/// Opens file `A`, receives fd 1, then dups, receives 2 - both stay open, until both are closed.
/// Previously in such scenario we would close the remote, causing issues.
pub(crate) static OPEN_FILES: LazyLock<DashMap<LocalFd, Arc<ops::RemoteFile>>> =
LazyLock::new(|| DashMap::with_capacity(4));
pub(crate) static OPEN_FILES: LazyLock<Mutex<HashMap<LocalFd, Arc<ops::RemoteFile>>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));

/// Extension trait for [`OpenOptionsInternal`], used to convert between `libc`-ish open options and
/// Rust's [`std::fs::OpenOptions`]
Expand Down
25 changes: 18 additions & 7 deletions mirrord/layer/src/file/open_dirs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
//! `readdir` family.
use std::{
collections::HashMap,
ffi::CString,
sync::{Arc, LazyLock, Mutex},
};

use dashmap::DashMap;
use mirrord_protocol::file::{CloseDirRequest, DirEntryInternal, ReadDirRequest, ReadDirResponse};

use super::{DirStreamFd, LocalFd, RemoteFd, OPEN_FILES};
Expand All @@ -21,14 +21,14 @@ pub static OPEN_DIRS: LazyLock<OpenDirs> = LazyLock::new(OpenDirs::new);

/// State related to open remote directories.
pub struct OpenDirs {
inner: DashMap<DirStreamFd, Arc<Mutex<OpenDir>>>,
inner: Mutex<HashMap<DirStreamFd, Arc<Mutex<OpenDir>>>>,
}

impl OpenDirs {
/// Creates an empty state.
fn new() -> Self {
Self {
inner: DashMap::with_capacity(4),
inner: Mutex::new(HashMap::new()),
}
}

Expand All @@ -38,17 +38,24 @@ impl OpenDirs {
///
/// * `local_dir_fd` - opaque identifier
/// * `remote_fd` - descriptor of the remote directory (received from the agent)
pub fn insert(&self, local_dir_fd: DirStreamFd, remote_fd: RemoteFd, base_fd: LocalFd) {
self.inner.insert(
pub fn insert(
&self,
local_dir_fd: DirStreamFd,
remote_fd: RemoteFd,
base_fd: LocalFd,
) -> Detour<()> {
self.inner.lock()?.insert(
local_dir_fd,
Mutex::new(OpenDir::new(local_dir_fd, remote_fd, base_fd)).into(),
);
Detour::Success(())
}

/// Reads next entry from the open directory with the given [`DirStreamFd`].
pub fn read_r(&self, local_dir_fd: DirStreamFd) -> Detour<Option<DirEntryInternal>> {
let dir = self
.inner
.lock()?
.get(&local_dir_fd)
.ok_or(Bypass::LocalDirStreamNotFound(local_dir_fd))?
.clone();
Expand All @@ -62,6 +69,7 @@ impl OpenDirs {
pub fn get_fd(&self, local_dir_fd: DirStreamFd) -> Detour<LocalFd> {
let dir = self
.inner
.lock()?
.get(&local_dir_fd)
.ok_or(Bypass::LocalDirStreamNotFound(local_dir_fd))?
.clone();
Expand All @@ -86,6 +94,7 @@ impl OpenDirs {
pub fn read(&self, local_dir_fd: DirStreamFd) -> Detour<*const libc::dirent> {
let dir = self
.inner
.lock()?
.get(&local_dir_fd)
.ok_or(Bypass::LocalDirStreamNotFound(local_dir_fd))?
.clone();
Expand Down Expand Up @@ -117,6 +126,7 @@ impl OpenDirs {
pub fn read64(&self, local_dir_fd: DirStreamFd) -> Detour<*const libc::dirent64> {
let dir = self
.inner
.lock()?
.get(&local_dir_fd)
.ok_or(Bypass::LocalDirStreamNotFound(local_dir_fd))?
.clone();
Expand All @@ -134,14 +144,15 @@ impl OpenDirs {

/// Closes the open directory with the given [`DirStreamFd`].
pub fn close(&self, local_dir_fd: DirStreamFd) -> Detour<libc::c_int> {
let (_, dir) = self
let dir = self
.inner
.lock()?
.remove(&local_dir_fd)
.ok_or(Bypass::LocalDirStreamNotFound(local_dir_fd))?;

let mut guard = dir.lock().expect("lock poisoned");
guard.closed = true;
OPEN_FILES.remove(&guard.base_fd);
OPEN_FILES.lock()?.remove(&guard.base_fd);
common::make_proxy_request_no_response(CloseDirRequest {
remote_fd: guard.remote_fd,
})?;
Expand Down
13 changes: 9 additions & 4 deletions mirrord/layer/src/file/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ fn get_remote_fd(local_fd: RawFd) -> Detour<u64> {
// don't add a trace here since it causes deadlocks in some cases.
Detour::Success(
OPEN_FILES
.lock()?
.get(&local_fd)
.map(|remote_file| remote_file.fd)
// Bypass if we're not managing the relative part.
Expand Down Expand Up @@ -188,7 +189,7 @@ pub(crate) fn open(path: Detour<PathBuf>, open_options: OpenOptionsInternal) ->
// the fd to a string.
let local_file_fd = create_local_fake_file(remote_fd)?;

OPEN_FILES.insert(
OPEN_FILES.lock()?.insert(
local_file_fd,
Arc::new(RemoteFile::new(remote_fd, path.display().to_string())),
);
Expand All @@ -202,7 +203,11 @@ pub(crate) fn fdopendir(fd: RawFd) -> Detour<usize> {
// usize == ptr size
// we don't return a pointer to an address that contains DIR

let remote_file_fd = OPEN_FILES.get(&fd).ok_or(Bypass::LocalFdNotFound(fd))?.fd;
let remote_file_fd = OPEN_FILES
.lock()?
.get(&fd)
.ok_or(Bypass::LocalFdNotFound(fd))?
.fd;

let open_dir_request = FdOpenDirRequest {
remote_fd: remote_file_fd,
Expand All @@ -212,7 +217,7 @@ pub(crate) fn fdopendir(fd: RawFd) -> Detour<usize> {
common::make_proxy_request_with_response(open_dir_request)??;

let local_dir_fd = create_local_fake_file(remote_dir_fd)?;
OPEN_DIRS.insert(local_dir_fd as usize, remote_dir_fd, fd);
OPEN_DIRS.insert(local_dir_fd as usize, remote_dir_fd, fd)?;

// Let it stay in OPEN_FILES, as some functions might use it in comibination with dirfd

Expand Down Expand Up @@ -248,7 +253,7 @@ pub(crate) fn openat(

let local_file_fd = create_local_fake_file(remote_fd)?;

OPEN_FILES.insert(
OPEN_FILES.lock()?.insert(
local_file_fd,
Arc::new(RemoteFile::new(remote_fd, path.display().to_string())),
);
Expand Down
7 changes: 5 additions & 2 deletions mirrord/layer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,12 +573,15 @@ fn enable_hooks(state: &LayerSetup) {
#[mirrord_layer_macro::instrument(level = "trace", fields(pid = std::process::id()))]
pub(crate) fn close_layer_fd(fd: c_int) {
// Remove from sockets.
if let Some((_, socket)) = SOCKETS.remove(&fd) {
if let Some(socket) = SOCKETS.lock().expect("SOCKETS lock failed").remove(&fd) {
// Closed file is a socket, so if it's already bound to a port - notify agent to stop
// mirroring/stealing that port.
socket.close();
} else if setup().fs_config().is_active() {
OPEN_FILES.remove(&fd);
OPEN_FILES
.lock()
.expect("OPEN_FILES lock failed")
.remove(&fd);
}
}

Expand Down
27 changes: 15 additions & 12 deletions mirrord/layer/src/socket.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
//! We implement each hook function in a safe function as much as possible, having the unsafe do the
//! absolute minimum
use std::{
collections::HashMap,
net::{SocketAddr, ToSocketAddrs},
os::unix::io::RawFd,
str::FromStr,
sync::{Arc, LazyLock},
sync::{Arc, LazyLock, Mutex},
};

use base64::prelude::*;
use bincode::{Decode, Encode};
use dashmap::DashMap;
use hashbrown::hash_set::HashSet;
use hooks::FN_FCNTL;
use libc::{c_int, sockaddr, socklen_t};
Expand Down Expand Up @@ -53,7 +53,7 @@ pub(crate) const SHARED_SOCKETS_ENV_VAR: &str = "MIRRORD_SHARED_SOCKETS";
/// - [`libc::FD_CLOEXEC`] behaviour: While rebuilding sockets from the env var, we also
/// check if they're set with the cloexec flag, so that children processes don't end up using
/// sockets that are exclusive for their parents.
pub(crate) static SOCKETS: LazyLock<DashMap<RawFd, Arc<UserSocket>>> = LazyLock::new(|| {
pub(crate) static SOCKETS: LazyLock<Mutex<HashMap<RawFd, Arc<UserSocket>>>> = LazyLock::new(|| {
std::env::var(SHARED_SOCKETS_ENV_VAR)
.ok()
.and_then(|encoded| BASE64_URL_SAFE.decode(encoded.into_bytes()).ok())
Expand All @@ -65,14 +65,16 @@ pub(crate) static SOCKETS: LazyLock<DashMap<RawFd, Arc<UserSocket>>> = LazyLock:
.ok()
})
.map(|(fds_and_sockets, _)| {
DashMap::from_iter(fds_and_sockets.into_iter().filter_map(|(fd, socket)| {
// Do not inherit sockets that are `FD_CLOEXEC`.
if unsafe { FN_FCNTL(fd, libc::F_GETFD, 0) != -1 } {
Some((fd, Arc::new(socket)))
} else {
None
}
}))
Mutex::new(HashMap::from_iter(fds_and_sockets.into_iter().filter_map(
|(fd, socket)| {
// Do not inherit sockets that are `FD_CLOEXEC`.
if unsafe { FN_FCNTL(fd, libc::F_GETFD, 0) != -1 } {
Some((fd, Arc::new(socket)))
} else {
None
}
},
)))
})
.unwrap_or_default()
});
Expand Down Expand Up @@ -372,8 +374,9 @@ impl OutgoingSelector {
}

let cached = REMOTE_DNS_REVERSE_MAPPING
.lock()?
.get(&address.ip())
.map(|entry| entry.value().clone());
.cloned();
let Some(hostname) = cached else {
return Ok(address);
};
Expand Down
Loading

0 comments on commit 2071535

Please sign in to comment.