From 6f0bf209fe621869a2e18f3a078fb1c38d25e0f3 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:48:39 -0300 Subject: [PATCH] Add a tokio sleep after MirrordExecution::start so we can await on it and drive the runtime, preventing the websocket reset. (#2954) * Add a tokio sleep after MirrordExecution::start so we can await on it and drive the runtime, preventing the websocket reset. * Talk about 2 connections being made. * fix docs * changelog --- changelog.d/+79-bogus-websocket-reset.fixed.md | 1 + mirrord/cli/src/execution.rs | 10 ++++++++++ mirrord/cli/src/main.rs | 7 +++++++ mirrord/operator/src/client.rs | 6 ++++++ 4 files changed, 24 insertions(+) create mode 100644 changelog.d/+79-bogus-websocket-reset.fixed.md diff --git a/changelog.d/+79-bogus-websocket-reset.fixed.md b/changelog.d/+79-bogus-websocket-reset.fixed.md new file mode 100644 index 00000000000..bf72506ebbb --- /dev/null +++ b/changelog.d/+79-bogus-websocket-reset.fixed.md @@ -0,0 +1 @@ +Add a sleep and await on it after websocket connection to drive IO runtime and prevent websocket closing without handshake. diff --git a/mirrord/cli/src/execution.rs b/mirrord/cli/src/execution.rs index ec23590a9dc..412f2690aae 100644 --- a/mirrord/cli/src/execution.rs +++ b/mirrord/cli/src/execution.rs @@ -164,6 +164,16 @@ impl MirrordExecution { /// 1. Drop this struct when an error occurs, /// 2. Successfully `exec`, /// 3. Wait for intproxy exit with [`MirrordExecution::wait`]. + /// + /// # `async` shenanigans when using the mirrord operator. + /// + /// Here we connect a websocket to the operator created agent, and this connection + /// might get reset if we don't drive its IO (call `await` on the runtime after the + /// websocket is up). This is an issue because we start things with `execv`, so we're + /// kinda out of the whole Rust world of nicely dropping things. + /// + /// tl;dr: In `exec_process`, you need to call and `await` either + /// [`tokio::time::sleep`] or [`tokio::task::yield_now`] after calling this function. #[tracing::instrument(level = Level::TRACE, skip_all)] pub(crate) async fn start

( config: &LayerConfig, diff --git a/mirrord/cli/src/main.rs b/mirrord/cli/src/main.rs index e02b713ee64..bd9dc386714 100644 --- a/mirrord/cli/src/main.rs +++ b/mirrord/cli/src/main.rs @@ -90,6 +90,12 @@ where #[cfg(not(target_os = "macos"))] let execution_info = MirrordExecution::start(&config, &mut sub_progress, analytics).await?; + // This is not being yielded, as this is not proper async, something along those lines. + // We need an `await` somewhere in this function to drive our socket IO that happens + // in `MirrordExecution::start`. If we don't have this here, then the websocket + // connection resets, and in the operator you'll get a websocket error. + tokio::time::sleep(Duration::from_micros(1)).await; + #[cfg(target_os = "macos")] let (_did_sip_patch, binary) = match execution_info.patched_path { None => (false, args.binary.clone()), @@ -139,6 +145,7 @@ where .into_iter() .map(CString::new) .collect::, _>>()?; + // env vars should be formatted as "varname=value" CStrings let env = env_vars .into_iter() diff --git a/mirrord/operator/src/client.rs b/mirrord/operator/src/client.rs index 46c555ce49d..b92b3b1ea97 100644 --- a/mirrord/operator/src/client.rs +++ b/mirrord/operator/src/client.rs @@ -531,6 +531,12 @@ impl OperatorApi { /// Starts a new operator session and connects to the target. /// Returned [`OperatorSessionConnection::session`] can be later used to create another /// connection in the same session with [`OperatorApi::connect_in_existing_session`]. + /// + /// 2 connections are made to the operator (this means that we hit the target's + /// `connect_resource` twice): + /// + /// 1. The 1st one is here; + /// 2. The 2nd is on the `AgentConnection::new`; #[tracing::instrument( level = Level::TRACE, skip(layer_config, progress),