From 635cd6eb3224f6b06dd13a8a567d250b10bf576c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 17 Dec 2024 16:44:38 -0300 Subject: [PATCH 01/85] Add metrics system (prometheus) to the agent. --- Cargo.lock | 114 ++++++++++++++++- mirrord/agent/Cargo.toml | 4 + mirrord/agent/README.md | 6 + mirrord/agent/src/entrypoint.rs | 37 ++++-- mirrord/agent/src/file.rs | 131 ++++++++++++------- mirrord/agent/src/main.rs | 2 + mirrord/agent/src/metrics.rs | 174 ++++++++++++++++++++++++++ mirrord/kube/src/api/container/job.rs | 5 + mirrord/kube/src/api/container/pod.rs | 4 + 9 files changed, 420 insertions(+), 57 deletions(-) create mode 100644 mirrord/agent/src/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 008a42858c5..bc8172f72af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -946,6 +946,7 @@ checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", "axum-core", + "axum-macros", "base64 0.22.1", "bytes", "futures-util", @@ -996,6 +997,36 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-macros" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "axum-server" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56bac90848f6a9393ac03c63c640925c4b7c8ca21654de40d53f55964667c7d8" +dependencies = [ + "bytes", + "futures-util", + "http 1.2.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower 0.4.13", + "tower-service", +] + [[package]] name = "backoff" version = "0.4.0" @@ -3703,6 +3734,34 @@ dependencies = [ "serde_json", ] +[[package]] +name = "kameo" +version = "0.13.0" +source = "git+https://github.com/tqwewe/kameo?branch=main#fcd9987669d7530ec5853be8f05932b2d78c901d" +dependencies = [ + "dyn-clone", + "futures", + "itertools 0.13.0", + "kameo_macros", + "once_cell", + "serde", + "tokio", + "tokio-stream", + "tracing", +] + +[[package]] +name = "kameo_macros" +version = "0.13.0" +source = "git+https://github.com/tqwewe/kameo?branch=main#fcd9987669d7530ec5853be8f05932b2d78c901d" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.90", + "uuid", +] + [[package]] name = "konst" version = "0.3.15" @@ -4183,6 +4242,8 @@ version = "3.127.0" dependencies = [ "actix-codec", "async-trait", + "axum", + "axum-server", "bollard", "bytes", "clap", @@ -4201,13 +4262,15 @@ dependencies = [ "hyper-util", "iptables", "k8s-cri", + "kameo", "libc", "mirrord-protocol", "mockall", "nix 0.29.0", "oci-spec", "pnet", - "procfs", + "procfs 0.17.0", + "prometheus", "rand", "rawsocket", "rcgen", @@ -5436,6 +5499,19 @@ dependencies = [ "yansi", ] +[[package]] +name = "procfs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" +dependencies = [ + "bitflags 2.6.0", + "hex", + "lazy_static", + "procfs-core 0.16.0", + "rustix", +] + [[package]] name = "procfs" version = "0.17.0" @@ -5446,10 +5522,20 @@ dependencies = [ "chrono", "flate2", "hex", - "procfs-core", + "procfs-core 0.17.0", "rustix", ] +[[package]] +name = "procfs-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" +dependencies = [ + "bitflags 2.6.0", + "hex", +] + [[package]] name = "procfs-core" version = "0.17.0" @@ -5461,6 +5547,23 @@ dependencies = [ "hex", ] +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot", + "procfs 0.16.0", + "protobuf", + "thiserror 1.0.69", +] + [[package]] name = "prost" version = "0.13.3" @@ -5514,6 +5617,12 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "quanta" version = "0.12.3" @@ -7144,6 +7253,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", + "tracing", "windows-sys 0.52.0", ] diff --git a/mirrord/agent/Cargo.toml b/mirrord/agent/Cargo.toml index b20c013a79f..e20d7991d82 100644 --- a/mirrord/agent/Cargo.toml +++ b/mirrord/agent/Cargo.toml @@ -70,6 +70,10 @@ x509-parser = "0.16" rustls.workspace = true envy = "0.4" socket2.workspace = true +prometheus = { version = "0.13", features = ["process"] } +kameo = { git = "https://github.com/tqwewe/kameo", branch = "main" } +axum = { version = "0.7", features = ["macros"] } +axum-server = "*" [target.'cfg(target_os = "linux")'.dependencies] iptables = { git = "https://github.com/metalbear-co/rust-iptables.git", rev = "e66c7332e361df3c61a194f08eefe3f40763d624" } diff --git a/mirrord/agent/README.md b/mirrord/agent/README.md index bf077b5fdcf..61d84351b4e 100644 --- a/mirrord/agent/README.md +++ b/mirrord/agent/README.md @@ -6,3 +6,9 @@ Agent part of [mirrord](https://github.com/metalbear-co/mirrord) responsible for mirrord-agent is written in Rust for safety, low memory consumption and performance. mirrord-agent is distributed as a container image (currently only x86) that is published on [GitHub Packages publicly](https://github.com/metalbear-co/mirrord-agent/pkgs/container/mirrord-agent). + +## Enabling prometheus metrics + +- If you make any changes to the 5-configmap.yaml file, remember to `kubectl apply` it + **before** restarting the `prometheus` deployment. + diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 28eb5f26634..125ca57ff8a 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -12,6 +12,8 @@ use std::{ use client_connection::AgentTlsConnector; use dns::{DnsCommand, DnsWorker}; use futures::TryFutureExt; +use kameo::actor::ActorRef; +use metrics::{MetricsActor, MetricsIncrementFd}; use mirrord_protocol::{ClientMessage, DaemonMessage, GetEnvVarsRequest, LogMessage}; use sniffer::tcp_capture::RawSocketTcpCapture; use tokio::{ @@ -24,7 +26,7 @@ use tokio::{ time::{timeout, Duration}, }; use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, trace, warn, Level}; use tracing_subscriber::{fmt::format::FmtSpan, prelude::*}; use crate::{ @@ -68,11 +70,13 @@ struct State { ephemeral: bool, /// When present, it is used to secure incoming TCP connections. tls_connector: Option, + + metrics: Option>, } impl State { /// Return [`Err`] if container runtime operations failed. - pub async fn new(args: &Args) -> Result { + pub async fn new(args: &Args, metrics: Option>) -> Result { let tls_connector = args .operator_tls_cert_pem .clone() @@ -126,6 +130,7 @@ impl State { env: Arc::new(env), ephemeral, tls_connector, + metrics, }) } @@ -215,7 +220,10 @@ impl ClientConnectionHandler { ) -> Result { let pid = state.container_pid(); - let file_manager = FileManager::new(pid.or_else(|| state.ephemeral.then_some(1))); + let file_manager = FileManager::new( + pid.or_else(|| state.ephemeral.then_some(1)), + state.metrics.clone().unwrap(), + ); let tcp_sniffer_api = Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection).await; let tcp_stealer_api = @@ -396,11 +404,11 @@ impl ClientConnectionHandler { /// Handles incoming messages from the connected client (`mirrord-layer`). /// /// Returns `false` if the client disconnected. - #[tracing::instrument(level = "trace", skip(self))] + #[tracing::instrument(level = Level::TRACE, skip(self), err)] async fn handle_client_message(&mut self, message: ClientMessage) -> Result { match message { ClientMessage::FileRequest(req) => { - if let Some(response) = self.file_manager.handle_message(req)? { + if let Some(response) = self.file_manager.handle_message(req).await? { self.respond(DaemonMessage::File(response)) .await .inspect_err(|fail| { @@ -488,17 +496,19 @@ impl ClientConnectionHandler { } /// Initializes the agent's [`State`], channels, threads, and runs [`ClientConnectionHandler`]s. -#[tracing::instrument(level = "trace", ret)] +#[tracing::instrument(level = Level::TRACE, ret, err)] async fn start_agent(args: Args) -> Result<()> { trace!("start_agent -> Starting agent with args: {args:?}"); + let metrics = kameo::spawn(MetricsActor::default()); + let listener = TcpListener::bind(SocketAddrV4::new( Ipv4Addr::UNSPECIFIED, args.communicate_port, )) .await?; - let state = State::new(&args).await?; + let state = State::new(&args, Some(metrics)).await?; let cancellation_token = CancellationToken::new(); @@ -761,7 +771,7 @@ async fn run_child_agent() -> Result<()> { async fn start_iptable_guard(args: Args) -> Result<()> { debug!("start_iptable_guard -> Initializing iptable-guard."); - let state = State::new(&args).await?; + let state = State::new(&args, None).await?; let pid = state.container_pid(); std::env::set_var(IPTABLE_PREROUTING_ENV, IPTABLE_PREROUTING.as_str()); @@ -832,6 +842,17 @@ pub async fn main() -> Result<()> { let args = cli::parse_args(); + // TODO(alex) [high]: Could start metrics from here, as the agent itself has 2 + // different starting points. So start task here, and pass comms to both. + // + // CANNOT `bind` anything before `start_agent`, we might hit addrinuse. + // let metrics = kameo::spawn(MetricsActor::default()); + // let listener = TcpListener::bind("0.0.0.0:0") + // .await + // .map_err(AgentError::from) + // .inspect_err(|fail| tracing::error!(?fail, "Generic listener!")) + // .inspect(|s| tracing::info!(?s, "Listening"))?; + let agent_result = if args.mode.is_targetless() || (std::env::var(IPTABLE_PREROUTING_ENV).is_ok() && std::env::var(IPTABLE_MESH_ENV).is_ok() diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 947fd8d2f78..f27713e454c 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -10,11 +10,15 @@ use std::{ }; use faccess::{AccessMode, PathExt}; +use kameo::actor::ActorRef; use libc::DT_DIR; use mirrord_protocol::{file::*, FileRequest, FileResponse, RemoteResult, ResponseError}; use tracing::{error, trace, Level}; -use crate::error::Result; +use crate::{ + error::Result, + metrics::{MetricsActor, MetricsDecrementFd, MetricsIncrementFd}, +}; #[derive(Debug)] pub enum RemoteFile { @@ -70,18 +74,7 @@ pub(crate) struct FileManager { dir_streams: HashMap>, getdents_streams: HashMap>, fds_iter: RangeInclusive, -} - -impl Default for FileManager { - fn default() -> Self { - Self { - root_path: Default::default(), - open_files: Default::default(), - dir_streams: Default::default(), - getdents_streams: Default::default(), - fds_iter: (0..=u64::MAX), - } - } + metrics: ActorRef, } pub fn get_root_path_from_optional_pid(pid: Option) -> PathBuf { @@ -147,8 +140,11 @@ pub fn resolve_path + std::fmt::Debug, R: AsRef + std::fmt: impl FileManager { /// Executes the request and returns the response. - #[tracing::instrument(level = "trace", skip(self))] - pub fn handle_message(&mut self, request: FileRequest) -> Result> { + #[tracing::instrument(level = Level::TRACE, skip(self), err)] + pub(crate) async fn handle_message( + &mut self, + request: FileRequest, + ) -> Result> { Ok(match request { FileRequest::Open(OpenFileRequest { path, open_options }) => { // TODO: maybe not agent error on this? @@ -156,7 +152,7 @@ impl FileManager { .strip_prefix("/") .inspect_err(|fail| error!("file_worker -> {:#?}", fail))?; - let open_result = self.open(path.into(), open_options); + let open_result = self.open(path.into(), open_options).await; Some(FileResponse::Open(open_result)) } FileRequest::OpenRelative(OpenRelativeFileRequest { @@ -164,7 +160,7 @@ impl FileManager { path, open_options, }) => { - let open_result = self.open_relative(relative_fd, path, open_options); + let open_result = self.open_relative(relative_fd, path, open_options).await; Some(FileResponse::Open(open_result)) } FileRequest::Read(ReadFileRequest { @@ -202,10 +198,7 @@ impl FileManager { let write_result = self.write_limited(remote_fd, start_from, write_bytes); Some(FileResponse::WriteLimited(write_result)) } - FileRequest::Close(CloseFileRequest { fd }) => { - self.close(fd); - None - } + FileRequest::Close(CloseFileRequest { fd }) => self.close(fd).await, FileRequest::Access(AccessFileRequest { pathname, mode }) => { let pathname = pathname .strip_prefix("/") @@ -229,7 +222,7 @@ impl FileManager { // dir operations FileRequest::FdOpenDir(FdOpenDirRequest { remote_fd }) => { - let open_dir_result = self.fdopen_dir(remote_fd); + let open_dir_result = self.fdopen_dir(remote_fd).await; Some(FileResponse::OpenDir(open_dir_result)) } FileRequest::ReadDir(ReadDirRequest { remote_fd }) => { @@ -240,10 +233,7 @@ impl FileManager { let read_dir_result = self.read_dir_batch(remote_fd, amount); Some(FileResponse::ReadDirBatch(read_dir_result)) } - FileRequest::CloseDir(CloseDirRequest { remote_fd }) => { - self.close_dir(remote_fd); - None - } + FileRequest::CloseDir(CloseDirRequest { remote_fd }) => self.close_dir(remote_fd).await, FileRequest::GetDEnts64(GetDEnts64Request { remote_fd, buffer_size, @@ -253,19 +243,28 @@ impl FileManager { }) } - #[tracing::instrument(level = "trace")] - pub fn new(pid: Option) -> Self { + #[tracing::instrument(level = Level::TRACE)] + pub fn new(pid: Option, metrics: ActorRef) -> Self { let root_path = get_root_path_from_optional_pid(pid); trace!("Agent root path >> {root_path:?}"); + Self { - open_files: HashMap::new(), + metrics, root_path, - ..Default::default() + open_files: Default::default(), + dir_streams: Default::default(), + getdents_streams: Default::default(), + fds_iter: (0..=u64::MAX), } } - #[tracing::instrument(level = "trace", skip(self))] - fn open( + // TODO(alex) [mid]: Fails with the wrong error? + /* + mirrord_agent::file: error: IO failed for remote operation with `Failed performing `getaddrinfo` with Some(2) and kind NotFound!! + at mirrord/agent/src/file.rs:261 + */ + #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] + async fn open( &mut self, path: PathBuf, open_options: OpenOptionsInternal, @@ -286,13 +285,19 @@ impl FileManager { RemoteFile::File(file) }; - self.open_files.insert(fd, remote_file); + if self.open_files.insert(fd, remote_file).is_none() { + let _ = self + .metrics + .tell(MetricsIncrementFd) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + } Ok(OpenFileResponse { fd }) } - #[tracing::instrument(level = "trace", skip(self))] - fn open_relative( + #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] + async fn open_relative( &mut self, relative_fd: u64, path: PathBuf, @@ -320,7 +325,13 @@ impl FileManager { RemoteFile::File(file) }; - self.open_files.insert(fd, remote_file); + if self.open_files.insert(fd, remote_file).is_none() { + let _ = self + .metrics + .tell(MetricsIncrementFd) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + } Ok(OpenFileResponse { fd }) } else { @@ -524,20 +535,38 @@ impl FileManager { }) } - pub(crate) fn close(&mut self, fd: u64) { - trace!("FileManager::close -> fd {:#?}", fd,); - + /// Always returns `None`, since we don't return any [`FileResponse`] back to mirrord + /// on `close` of an fd. + #[tracing::instrument(level = Level::TRACE, skip(self))] + pub(crate) async fn close(&mut self, fd: u64) -> Option { if self.open_files.remove(&fd).is_none() { - error!("FileManager::close -> fd {:#?} not found", fd); + error!(fd, "fd not found!"); + } else { + let _ = self + .metrics + .tell(MetricsDecrementFd) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } - } - pub(crate) fn close_dir(&mut self, fd: u64) { - trace!("FileManager::close_dir -> fd {:#?}", fd,); + None + } + /// Always returns `None`, since we don't return any [`FileResponse`] back to mirrord + /// on `close_dir` of an fd. + #[tracing::instrument(level = Level::TRACE, skip(self))] + pub(crate) async fn close_dir(&mut self, fd: u64) -> Option { if self.dir_streams.remove(&fd).is_none() && self.getdents_streams.remove(&fd).is_none() { error!("FileManager::close_dir -> fd {:#?} not found", fd); + } else { + let _ = self + .metrics + .tell(MetricsDecrementFd) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } + + None } pub(crate) fn access( @@ -641,8 +670,8 @@ impl FileManager { }) } - #[tracing::instrument(level = "trace", skip(self))] - pub(crate) fn fdopen_dir(&mut self, fd: u64) -> RemoteResult { + #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] + pub(crate) async fn fdopen_dir(&mut self, fd: u64) -> RemoteResult { let path = match self .open_files .get(&fd) @@ -658,7 +687,14 @@ impl FileManager { .ok_or_else(|| ResponseError::IdsExhausted("fdopen_dir".to_string()))?; let dir_stream = path.read_dir()?.enumerate(); - self.dir_streams.insert(fd, dir_stream); + + if self.dir_streams.insert(fd, dir_stream).is_none() { + let _ = self + .metrics + .tell(MetricsIncrementFd) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + } Ok(OpenDirResponse { fd }) } @@ -707,7 +743,7 @@ impl FileManager { /// The possible remote errors are: /// [`ResponseError::NotFound`] if there is not such fd here. /// [`ResponseError::NotDirectory`] if the fd points to a file with a non-directory file type. - #[tracing::instrument(level = "trace", skip(self))] + #[tracing::instrument(level = Level::TRACE, skip(self))] pub(crate) fn get_or_create_getdents64_stream( &mut self, fd: u64, @@ -720,6 +756,7 @@ impl FileManager { let current_and_parent = Self::get_current_and_parent_entries(dir); let stream = GetDEnts64Stream::new(dir.read_dir()?, current_and_parent).peekable(); + // TODO(alex) [mid]: Do we also want to count streams of stuffs? Ok(e.insert(stream)) } }, diff --git a/mirrord/agent/src/main.rs b/mirrord/agent/src/main.rs index c33a7cbe45e..777bdb8feb1 100644 --- a/mirrord/agent/src/main.rs +++ b/mirrord/agent/src/main.rs @@ -40,6 +40,8 @@ mod vpn; #[cfg(target_os = "linux")] mod watched_task; +mod metrics; + #[cfg(target_os = "linux")] #[tokio::main(flavor = "current_thread")] async fn main() -> crate::error::Result<()> { diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs new file mode 100644 index 00000000000..ce3da381613 --- /dev/null +++ b/mirrord/agent/src/metrics.rs @@ -0,0 +1,174 @@ +use axum::{response::IntoResponse, routing::get, Extension, Router}; +use kameo::{ + actor::ActorRef, + error::BoxError, + mailbox::unbounded::UnboundedMailbox, + message::{Context, Message}, + Actor, Reply, +}; +use serde::Serialize; +use thiserror::Error; +use tokio::net::TcpListener; +use tracing::Level; + +use crate::error::AgentError; + +#[derive(Error, Debug)] +pub(crate) enum MetricsError { + #[error(transparent)] + GetAll(#[from] kameo::error::SendError), + + #[error(transparent)] + FromUtf8(#[from] std::string::FromUtf8Error), + + #[error(transparent)] + Prometheus(#[from] prometheus::Error), +} + +impl IntoResponse for MetricsError { + fn into_response(self) -> axum::response::Response { + (http::StatusCode::INTERNAL_SERVER_ERROR, self.to_string()).into_response() + } +} + +#[tracing::instrument(level = Level::INFO, ret, err)] +async fn get_metrics(metrics: Extension>) -> Result { + use prometheus::{register_int_gauge, Encoder, TextEncoder}; + + let MetricsGetAllReply { + open_fds_count, + connected_clients_count, + } = metrics.ask(MetricsGetAll).await?; + + register_int_gauge!( + "mirrord_agent_open_fds_count", + "amount of open fds in mirrord-agent" + )? + .set(open_fds_count as i64); + + register_int_gauge!( + "mirrord_agent_connected_clients_count", + "amount of connected clients in mirrord-agent" + )? + .set(connected_clients_count as i64); + + let metric_families = prometheus::gather(); + + let mut buffer = Vec::new(); + TextEncoder + .encode(&metric_families, &mut buffer) + .inspect_err(|error| tracing::error!(%error, "unable to encode prometheus metrics"))?; + + Ok(String::from_utf8(buffer)?) +} + +#[derive(Default)] +pub(crate) struct MetricsActor { + open_fds_count: u64, + connected_clients_count: u64, +} + +impl Actor for MetricsActor { + type Mailbox = UnboundedMailbox; + + #[tracing::instrument(level = Level::INFO, skip_all, ret ,err)] + async fn on_start(&mut self, metrics: ActorRef) -> Result<(), BoxError> { + let app = Router::new() + .route("/metrics", get(get_metrics)) + .layer(Extension(metrics)); + + let listener = TcpListener::bind("0.0.0.0:9000") + .await + .map_err(AgentError::from) + .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; + + tokio::spawn(async move { + axum::serve(listener, app).await.inspect_err(|fail| { + tracing::error!(%fail, "Could not start agent metrics + server!") + }) + }); + + Ok(()) + } +} + +pub(crate) struct MetricsIncrementFd; +pub(crate) struct MetricsDecrementFd; +pub(crate) struct MetricsClientConnected; +pub(crate) struct MetricsClientDisconnected; +pub(crate) struct MetricsGetAll; + +#[derive(Reply, Serialize)] +pub(crate) struct MetricsGetAllReply { + open_fds_count: u64, + connected_clients_count: u64, +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncrementFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fds_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecrementFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fds_count = self.open_fds_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsClientConnected, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.connected_clients_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsClientDisconnected, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.connected_clients_count = self.connected_clients_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = MetricsGetAllReply; + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsGetAll, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + MetricsGetAllReply { + open_fds_count: self.open_fds_count, + connected_clients_count: self.connected_clients_count, + } + } +} diff --git a/mirrord/kube/src/api/container/job.rs b/mirrord/kube/src/api/container/job.rs index 907aefffeeb..b8d702f13f9 100644 --- a/mirrord/kube/src/api/container/job.rs +++ b/mirrord/kube/src/api/container/job.rs @@ -152,6 +152,8 @@ where "disabled".to_string(), ), ("app".to_string(), "mirrord".to_string()), + ("prometheus.io/scrape".to_string(), "true".to_string()), + ("prometheus.io/port".to_string(), "9000".to_string()), ])); let mut annotations = config @@ -163,6 +165,9 @@ where annotations.extend(BTreeMap::from([ ("sidecar.istio.io/inject".to_string(), "false".to_string()), ("linkerd.io/inject".to_string(), "disabled".to_string()), + ("prometheus.io/scrape".to_string(), "true".to_string()), + // ("prometheus.io/path".to_string(), "/metrics".to_string()), + ("prometheus.io/port".to_string(), "9000".to_string()), ])); pod.labels_mut().extend(labels.clone()); diff --git a/mirrord/kube/src/api/container/pod.rs b/mirrord/kube/src/api/container/pod.rs index f8461e8a002..9ecb9317cea 100644 --- a/mirrord/kube/src/api/container/pod.rs +++ b/mirrord/kube/src/api/container/pod.rs @@ -106,6 +106,8 @@ impl ContainerVariant for PodVariant<'_> { [ ("sidecar.istio.io/inject".to_string(), "false".to_string()), ("linkerd.io/inject".to_string(), "disabled".to_string()), + ("prometheus.io/scrape".to_string(), "true".to_string()), + ("prometheus.io/port".to_string(), "9000".to_string()), ] .into(), ), @@ -116,6 +118,8 @@ impl ContainerVariant for PodVariant<'_> { "disabled".to_string(), ), ("app".to_string(), "mirrord".to_string()), + ("prometheus.io/scrape".to_string(), "true".to_string()), + ("prometheus.io/port".to_string(), "9000".to_string()), ] .into(), ), From 9c3c374bae3033f27a94bac807f6ddc9abacee80 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 17 Dec 2024 16:59:22 -0300 Subject: [PATCH 02/85] Always have metrics, just doesnt do anything. --- mirrord/agent/src/entrypoint.rs | 15 +++++++------ mirrord/agent/src/metrics.rs | 40 +++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 125ca57ff8a..db3c0f2cb64 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -13,7 +13,7 @@ use client_connection::AgentTlsConnector; use dns::{DnsCommand, DnsWorker}; use futures::TryFutureExt; use kameo::actor::ActorRef; -use metrics::{MetricsActor, MetricsIncrementFd}; +use metrics::MetricsActor; use mirrord_protocol::{ClientMessage, DaemonMessage, GetEnvVarsRequest, LogMessage}; use sniffer::tcp_capture::RawSocketTcpCapture; use tokio::{ @@ -71,12 +71,12 @@ struct State { /// When present, it is used to secure incoming TCP connections. tls_connector: Option, - metrics: Option>, + metrics: ActorRef, } impl State { /// Return [`Err`] if container runtime operations failed. - pub async fn new(args: &Args, metrics: Option>) -> Result { + pub async fn new(args: &Args, metrics: ActorRef) -> Result { let tls_connector = args .operator_tls_cert_pem .clone() @@ -222,7 +222,7 @@ impl ClientConnectionHandler { let file_manager = FileManager::new( pid.or_else(|| state.ephemeral.then_some(1)), - state.metrics.clone().unwrap(), + state.metrics.clone(), ); let tcp_sniffer_api = Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection).await; @@ -500,7 +500,7 @@ impl ClientConnectionHandler { async fn start_agent(args: Args) -> Result<()> { trace!("start_agent -> Starting agent with args: {args:?}"); - let metrics = kameo::spawn(MetricsActor::default()); + let metrics = kameo::spawn(MetricsActor::new(true)); let listener = TcpListener::bind(SocketAddrV4::new( Ipv4Addr::UNSPECIFIED, @@ -508,7 +508,7 @@ async fn start_agent(args: Args) -> Result<()> { )) .await?; - let state = State::new(&args, Some(metrics)).await?; + let state = State::new(&args, metrics).await?; let cancellation_token = CancellationToken::new(); @@ -771,7 +771,8 @@ async fn run_child_agent() -> Result<()> { async fn start_iptable_guard(args: Args) -> Result<()> { debug!("start_iptable_guard -> Initializing iptable-guard."); - let state = State::new(&args, None).await?; + let metrics = kameo::spawn(MetricsActor::new(false)); + let state = State::new(&args, metrics).await?; let pid = state.container_pid(); std::env::set_var(IPTABLE_PREROUTING_ENV, IPTABLE_PREROUTING.as_str()); diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index ce3da381613..e57787b099f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -64,30 +64,42 @@ async fn get_metrics(metrics: Extension>) -> Result Self { + Self { + enabled, + ..Default::default() + } + } +} + impl Actor for MetricsActor { type Mailbox = UnboundedMailbox; #[tracing::instrument(level = Level::INFO, skip_all, ret ,err)] async fn on_start(&mut self, metrics: ActorRef) -> Result<(), BoxError> { - let app = Router::new() - .route("/metrics", get(get_metrics)) - .layer(Extension(metrics)); - - let listener = TcpListener::bind("0.0.0.0:9000") - .await - .map_err(AgentError::from) - .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; - - tokio::spawn(async move { - axum::serve(listener, app).await.inspect_err(|fail| { - tracing::error!(%fail, "Could not start agent metrics + if self.enabled { + let app = Router::new() + .route("/metrics", get(get_metrics)) + .layer(Extension(metrics)); + + let listener = TcpListener::bind("0.0.0.0:9000") + .await + .map_err(AgentError::from) + .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; + + tokio::spawn(async move { + axum::serve(listener, app).await.inspect_err(|fail| { + tracing::error!(%fail, "Could not start agent metrics server!") - }) - }); + }) + }); + } Ok(()) } From 806b5ee3d8411f5da2650d69d5feac506ac08d0f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 17 Dec 2024 18:18:49 -0300 Subject: [PATCH 03/85] Add metrics for sniffer. --- mirrord/agent/src/entrypoint.rs | 7 ++- mirrord/agent/src/file.rs | 12 ++-- mirrord/agent/src/metrics.rs | 102 +++++++++++++++++++++++++------ mirrord/agent/src/sniffer.rs | 19 ++++-- mirrord/agent/src/sniffer/api.rs | 37 ++++++++++- mirrord/protocol/src/codec.rs | 4 ++ mirrord/protocol/src/tcp.rs | 14 +++++ 7 files changed, 161 insertions(+), 34 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index db3c0f2cb64..e8f5d6c6e7a 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -225,7 +225,9 @@ impl ClientConnectionHandler { state.metrics.clone(), ); - let tcp_sniffer_api = Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection).await; + let tcp_sniffer_api = + Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection, state.metrics.clone()) + .await; let tcp_stealer_api = Self::create_stealer_api(id, bg_tasks.stealer, &mut connection).await?; let dns_api = Self::create_dns_api(bg_tasks.dns); @@ -253,9 +255,10 @@ impl ClientConnectionHandler { id: ClientId, task: BackgroundTask, connection: &mut ClientConnection, + metrics: ActorRef, ) -> Option { if let BackgroundTask::Running(sniffer_status, sniffer_sender) = task { - match TcpSnifferApi::new(id, sniffer_sender, sniffer_status).await { + match TcpSnifferApi::new(id, sniffer_sender, sniffer_status, metrics).await { Ok(api) => Some(api), Err(e) => { let message = format!( diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index f27713e454c..ab6b0407d75 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -17,7 +17,7 @@ use tracing::{error, trace, Level}; use crate::{ error::Result, - metrics::{MetricsActor, MetricsDecrementFd, MetricsIncrementFd}, + metrics::{MetricsActor, MetricsDecFd, MetricsIncFd}, }; #[derive(Debug)] @@ -288,7 +288,7 @@ impl FileManager { if self.open_files.insert(fd, remote_file).is_none() { let _ = self .metrics - .tell(MetricsIncrementFd) + .tell(MetricsIncFd) .await .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } @@ -328,7 +328,7 @@ impl FileManager { if self.open_files.insert(fd, remote_file).is_none() { let _ = self .metrics - .tell(MetricsIncrementFd) + .tell(MetricsIncFd) .await .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } @@ -544,7 +544,7 @@ impl FileManager { } else { let _ = self .metrics - .tell(MetricsDecrementFd) + .tell(MetricsDecFd) .await .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } @@ -561,7 +561,7 @@ impl FileManager { } else { let _ = self .metrics - .tell(MetricsDecrementFd) + .tell(MetricsDecFd) .await .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } @@ -691,7 +691,7 @@ impl FileManager { if self.dir_streams.insert(fd, dir_stream).is_none() { let _ = self .metrics - .tell(MetricsIncrementFd) + .tell(MetricsIncFd) .await .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index e57787b099f..c250ef0a555 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -65,8 +65,10 @@ async fn get_metrics(metrics: Extension>) -> Result for MetricsActor { +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fd_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fd_count = self.open_fd_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncClient, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.connected_client_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecClient, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.connected_client_count = self.connected_client_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsIncrementFd, + _: MetricsIncPortSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.open_fds_count += 1; + self.port_subscription_count += 1; } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsDecrementFd, + _: MetricsDecPortSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.open_fds_count = self.open_fds_count.saturating_sub(1); + self.port_subscription_count = self.port_subscription_count.saturating_sub(1); } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsClientConnected, + _: MetricsIncConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.connected_clients_count += 1; + self.connection_subscription_count += 1; } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsClientDisconnected, + _: MetricsDecConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.connected_clients_count = self.connected_clients_count.saturating_sub(1); + self.connection_subscription_count = self.connection_subscription_count.saturating_sub(1); } } @@ -179,8 +241,8 @@ impl Message for MetricsActor { _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { MetricsGetAllReply { - open_fds_count: self.open_fds_count, - connected_clients_count: self.connected_clients_count, + open_fds_count: self.open_fd_count, + connected_clients_count: self.connected_client_count, } } } diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index b1c232eae6d..ffb5f899ba0 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -434,7 +434,10 @@ mod test { use tokio::sync::mpsc; use super::*; - use crate::watched_task::{TaskStatus, WatchedTask}; + use crate::{ + metrics::MetricsActor, + watched_task::{TaskStatus, WatchedTask}, + }; struct TestSnifferSetup { command_tx: Sender, @@ -448,9 +451,17 @@ mod test { async fn get_api(&mut self) -> TcpSnifferApi { let client_id = self.next_client_id; self.next_client_id += 1; - TcpSnifferApi::new(client_id, self.command_tx.clone(), self.task_status.clone()) - .await - .unwrap() + + let metrics = kameo::spawn(MetricsActor::new(false)); + + TcpSnifferApi::new( + client_id, + self.command_tx.clone(), + self.task_status.clone(), + metrics, + ) + .await + .unwrap() } fn times_filter_changed(&self) -> usize { diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 31ec4107f97..016ba22dd11 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -1,6 +1,7 @@ use std::ops::RangeInclusive; use futures::{stream::FuturesUnordered, StreamExt}; +use kameo::actor::ActorRef; use mirrord_protocol::{ tcp::{DaemonTcp, LayerTcp, NewTcpConnection, TcpClose, TcpData}, ConnectionId, LogMessage, Port, @@ -15,7 +16,15 @@ use tokio_stream::{ }; use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; -use crate::{error::AgentError, util::ClientId, watched_task::TaskStatus}; +use crate::{ + error::AgentError, + metrics::{ + MetricsActor, MetricsDecConnectionSubscription, MetricsDecPortSubscription, + MetricsIncPortSubscription, + }, + util::ClientId, + watched_task::TaskStatus, +}; /// Interface used by clients to interact with the /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). Multiple instances of this struct operate @@ -36,6 +45,7 @@ pub(crate) struct TcpSnifferApi { connection_ids_iter: RangeInclusive, /// [`LayerTcp::PortSubscribe`] requests in progress. subscriptions_in_progress: FuturesUnordered>, + metrics: ActorRef, } impl TcpSnifferApi { @@ -51,10 +61,12 @@ impl TcpSnifferApi { /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer) /// * `task_status` - handle to the [`TcpConnectionSniffer`](super::TcpConnectionSniffer) exit /// status + /// * `metrics` - used to send agent metrics messages to our metrics actor; pub async fn new( client_id: ClientId, sniffer_sender: Sender, mut task_status: TaskStatus, + metrics: ActorRef, ) -> Result { let (sender, receiver) = mpsc::channel(Self::CONNECTION_CHANNEL_SIZE); @@ -74,6 +86,7 @@ impl TcpSnifferApi { connections: Default::default(), connection_ids_iter: (0..=ConnectionId::MAX), subscriptions_in_progress: Default::default(), + metrics, }) } @@ -158,7 +171,7 @@ impl TcpSnifferApi { } } - /// Tansform the given message into a [`SnifferCommand`] and pass it to the connected + /// Tansforms a [`LayerTcp`] message into a [`SnifferCommand`] and passes it to the connected /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). pub async fn handle_client_message(&mut self, message: LayerTcp) -> Result<(), AgentError> { match message { @@ -168,17 +181,37 @@ impl TcpSnifferApi { .await?; self.subscriptions_in_progress.push(rx); + let _ = self + .metrics + .tell(MetricsIncPortSubscription) + .await + .inspect_err(|fail| tracing::trace!(?fail)); + Ok(()) } LayerTcp::PortUnsubscribe(port) => { self.send_command(SnifferCommandInner::UnsubscribePort(port)) + .await?; + + let _ = self + .metrics + .tell(MetricsDecPortSubscription) .await + .inspect_err(|fail| tracing::trace!(?fail)); + + Ok(()) } LayerTcp::ConnectionUnsubscribe(connection_id) => { self.connections.remove(&connection_id); + let _ = self + .metrics + .tell(MetricsDecConnectionSubscription) + .await + .inspect_err(|fail| tracing::trace!(?fail)); + Ok(()) } } diff --git a/mirrord/protocol/src/codec.rs b/mirrord/protocol/src/codec.rs index 06957e562ef..bb3992215cc 100644 --- a/mirrord/protocol/src/codec.rs +++ b/mirrord/protocol/src/codec.rs @@ -93,6 +93,10 @@ pub static CLIENT_READY_FOR_LOGS: LazyLock = #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum ClientMessage { Close, + /// TCP sniffer message. + /// + /// These are the messages used by the `mirror` feature, and handled by the + /// `TcpSnifferApi` in the agent. Tcp(LayerTcp), TcpSteal(LayerTcpSteal), TcpOutgoing(LayerTcpOutgoing), diff --git a/mirrord/protocol/src/tcp.rs b/mirrord/protocol/src/tcp.rs index 023369129ad..d4fedd2ff85 100644 --- a/mirrord/protocol/src/tcp.rs +++ b/mirrord/protocol/src/tcp.rs @@ -57,10 +57,24 @@ pub struct TcpClose { } /// Messages related to Tcp handler from client. +/// +/// Part of the `mirror` feature. #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum LayerTcp { + /// User is interested in mirroring traffic on this `Port`, so add it to the list of + /// ports that the sniffer is filtering. PortSubscribe(Port), + + /// User is not interested in the connection with `ConnectionId` anymore. + /// + /// This means that their app has closed the connection they were `listen`ning on. + /// + /// There is no `ConnectionSubscribe` counter-part of this variant, the subscription + /// happens when the sniffer receives an (agent) internal `SniffedConnection`. ConnectionUnsubscribe(ConnectionId), + + /// Removes this `Port` from the sniffer's filter, the traffic won't be cloned to mirrord + /// anymore. PortUnsubscribe(Port), } From c326d02f5410be1d6093ce1760af34e856405b06 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 18 Dec 2024 15:42:10 -0300 Subject: [PATCH 04/85] Add metrics for stealer. --- mirrord/agent/src/entrypoint.rs | 16 +-- mirrord/agent/src/metrics.rs | 125 +++++++++++++++++++++-- mirrord/agent/src/sniffer/api.rs | 10 +- mirrord/agent/src/steal/connection.rs | 84 ++++++++++++--- mirrord/agent/src/steal/http/filter.rs | 2 +- mirrord/agent/src/steal/subscriptions.rs | 34 ++++-- mirrord/protocol/src/codec.rs | 5 + mirrord/protocol/src/tcp.rs | 28 +++++ 8 files changed, 258 insertions(+), 46 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index e8f5d6c6e7a..a7d1909438a 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -579,13 +579,15 @@ async fn start_agent(args: Args) -> Result<()> { let cancellation_token = cancellation_token.clone(); let watched_task = WatchedTask::new( TcpConnectionStealer::TASK_NAME, - TcpConnectionStealer::new(stealer_command_rx).and_then(|stealer| async move { - let res = stealer.start(cancellation_token).await; - if let Err(err) = res.as_ref() { - error!("Stealer failed: {err}"); - } - res - }), + TcpConnectionStealer::new(stealer_command_rx, state.metrics.clone()).and_then( + |stealer| async move { + let res = stealer.start(cancellation_token).await; + if let Err(err) = res.as_ref() { + error!("Stealer failed: {err}"); + } + res + }, + ), ); let status = watched_task.status(); let task = run_thread_in_namespace( diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index c250ef0a555..6152f6be85b 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -67,7 +67,10 @@ pub(crate) struct MetricsActor { enabled: bool, open_fd_count: u64, connected_client_count: u64, - port_subscription_count: u64, + mirror_port_subscription_count: u64, + steal_filtered_port_subscription_count: u64, + steal_unfiltered_port_subscription_count: u64, + steal_connection_subscription_count: u64, connection_subscription_count: u64, } @@ -113,8 +116,22 @@ pub(crate) struct MetricsDecFd; pub(crate) struct MetricsIncClient; pub(crate) struct MetricsDecClient; -pub(crate) struct MetricsIncPortSubscription; -pub(crate) struct MetricsDecPortSubscription; +pub(crate) struct MetricsIncMirrorPortSubscription; +pub(crate) struct MetricsDecMirrorPortSubscription; + +pub(crate) struct MetricsIncStealPortSubscription { + pub(crate) filtered: bool, +} +pub(crate) struct MetricsDecStealPortSubscription { + pub(crate) filtered: bool, +} + +pub(crate) struct MetricsDecStealPortSubscriptionMany { + pub(crate) removed_subscriptions: Vec, +} + +pub(crate) struct MetricsIncStealConnectionSubscription; +pub(crate) struct MetricsDecStealConnectionSubscription; pub(crate) struct MetricsIncConnectionSubscription; pub(crate) struct MetricsDecConnectionSubscription; @@ -179,29 +196,119 @@ impl Message for MetricsActor { } } -impl Message for MetricsActor { +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncMirrorPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_port_subscription_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecMirrorPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_port_subscription_count = self.mirror_port_subscription_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsIncStealPortSubscription { filtered }: MetricsIncStealPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + if filtered { + self.steal_filtered_port_subscription_count += 1; + } else { + self.steal_unfiltered_port_subscription_count += 1; + } + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsDecStealPortSubscription { filtered }: MetricsDecStealPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + if filtered { + self.steal_filtered_port_subscription_count = self + .steal_filtered_port_subscription_count + .saturating_sub(1); + } else { + self.steal_unfiltered_port_subscription_count = self + .steal_unfiltered_port_subscription_count + .saturating_sub(1); + } + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsDecStealPortSubscriptionMany { + removed_subscriptions, + }: MetricsDecStealPortSubscriptionMany, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + for filtered in removed_subscriptions { + if filtered { + self.steal_filtered_port_subscription_count = self + .steal_filtered_port_subscription_count + .saturating_sub(1); + } else { + self.steal_unfiltered_port_subscription_count = self + .steal_unfiltered_port_subscription_count + .saturating_sub(1); + } + } + } +} + +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsIncPortSubscription, + _: MetricsIncStealConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.port_subscription_count += 1; + self.steal_connection_subscription_count += 1; } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsDecPortSubscription, + _: MetricsDecStealConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.port_subscription_count = self.port_subscription_count.saturating_sub(1); + self.steal_connection_subscription_count = + self.steal_connection_subscription_count.saturating_sub(1); } } diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 016ba22dd11..059deeb13f9 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -19,8 +19,8 @@ use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; use crate::{ error::AgentError, metrics::{ - MetricsActor, MetricsDecConnectionSubscription, MetricsDecPortSubscription, - MetricsIncPortSubscription, + MetricsActor, MetricsDecConnectionSubscription, MetricsDecMirrorPortSubscription, + MetricsIncMirrorPortSubscription, }, util::ClientId, watched_task::TaskStatus, @@ -29,6 +29,8 @@ use crate::{ /// Interface used by clients to interact with the /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). Multiple instances of this struct operate /// on a single sniffer instance. +/// +/// Enabled by the `mirror` feature for incoming traffic. pub(crate) struct TcpSnifferApi { /// Id of the client using this struct. client_id: ClientId, @@ -183,7 +185,7 @@ impl TcpSnifferApi { let _ = self .metrics - .tell(MetricsIncPortSubscription) + .tell(MetricsIncMirrorPortSubscription) .await .inspect_err(|fail| tracing::trace!(?fail)); @@ -196,7 +198,7 @@ impl TcpSnifferApi { let _ = self .metrics - .tell(MetricsDecPortSubscription) + .tell(MetricsDecMirrorPortSubscription) .await .inspect_err(|fail| tracing::trace!(?fail)); diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 463c61f88d0..ea695c5bcfc 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -11,6 +11,7 @@ use hyper::{ body::Incoming, http::{header::UPGRADE, request::Parts}, }; +use kameo::actor::ActorRef; use mirrord_protocol::{ body_chunks::{BodyExt as _, Frames}, tcp::{ @@ -29,10 +30,15 @@ use tokio::{ sync::mpsc::{Receiver, Sender}, }; use tokio_util::sync::CancellationToken; -use tracing::warn; +use tracing::{trace, warn, Level}; use crate::{ error::{AgentError, Result}, + metrics::{ + MetricsActor, MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscription, + MetricsDecStealPortSubscriptionMany, MetricsIncStealConnectionSubscription, + MetricsIncStealPortSubscription, + }, steal::{ connections::{ ConnectionMessageIn, ConnectionMessageOut, StolenConnection, StolenConnections, @@ -273,6 +279,8 @@ struct TcpStealerConfig { /// Meant to be run (see [`TcpConnectionStealer::start`]) in a separate thread while the agent /// lives. When handling port subscription requests, this struct manipulates iptables, so it should /// run in the same network namespace as the agent's target. +/// +/// Enabled by the `steal` feature for incoming traffic. pub(crate) struct TcpConnectionStealer { /// For managing active subscriptions and port redirections. port_subscriptions: PortSubscriptions, @@ -289,6 +297,8 @@ pub(crate) struct TcpConnectionStealer { /// Set of active connections stolen by [`Self::port_subscriptions`]. connections: StolenConnections, + + metrics: ActorRef, } impl TcpConnectionStealer { @@ -296,8 +306,11 @@ impl TcpConnectionStealer { /// Initializes a new [`TcpConnectionStealer`], but doesn't start the actual work. /// You need to call [`TcpConnectionStealer::start`] to do so. - #[tracing::instrument(level = "trace")] - pub(crate) async fn new(command_rx: Receiver) -> Result { + #[tracing::instrument(level = Level::TRACE, err)] + pub(crate) async fn new( + command_rx: Receiver, + metrics: ActorRef, + ) -> Result { let config = envy::prefixed("MIRRORD_AGENT_") .from_env::() .unwrap_or_default(); @@ -315,6 +328,7 @@ impl TcpConnectionStealer { clients: HashMap::with_capacity(8), clients_closed: Default::default(), connections: StolenConnections::with_capacity(8), + metrics, }) } @@ -351,7 +365,15 @@ impl TcpConnectionStealer { }, accept = self.port_subscriptions.next_connection() => match accept { - Ok((stream, peer)) => self.incoming_connection(stream, peer).await?, + Ok((stream, peer)) => { + self.incoming_connection(stream, peer).await?; + + let _ = self + .metrics + .tell(MetricsIncStealConnectionSubscription) + .await + .inspect_err(|fail| trace!(?fail)); + } Err(error) => { tracing::error!(?error, "Failed to accept a stolen connection"); break Err(error); @@ -534,11 +556,14 @@ impl TcpConnectionStealer { Ok(()) } - /// Helper function to handle [`Command::PortSubscribe`] messages. + /// Helper function to handle [`Command::PortSubscribe`] messages for the `TcpStealer`. /// - /// Inserts a subscription into [`Self::port_subscriptions`]. - #[tracing::instrument(level = "trace", skip(self))] - async fn port_subscribe(&mut self, client_id: ClientId, port_steal: StealType) -> Result<()> { + /// Checks if [`StealType`] is a valid [`HttpFilter`], then inserts a subscription into + /// [`Self::port_subscriptions`]. + /// + /// - Returns: `true` if this is an HTTP filtered subscription. + #[tracing::instrument(level = Level::TRACE, skip(self), err)] + async fn port_subscribe(&mut self, client_id: ClientId, port_steal: StealType) -> Result { let spec = match port_steal { StealType::All(port) => Ok((port, None)), StealType::FilteredHttp(port, filter) => Regex::new(&format!("(?i){filter}")) @@ -549,6 +574,11 @@ impl TcpConnectionStealer { .map_err(|err| BadHttpFilterExRegex(filter, err.to_string())), }; + let filtered = spec + .as_ref() + .map(|(_, filter)| filter.is_some()) + .unwrap_or_default(); + let res = match spec { Ok((port, filter)) => self.port_subscriptions.add(client_id, port, filter).await?, Err(e) => Err(e.into()), @@ -557,7 +587,7 @@ impl TcpConnectionStealer { let client = self.clients.get(&client_id).expect("client not found"); let _ = client.tx.send(DaemonTcp::SubscribeResult(res)).await; - Ok(()) + Ok(filtered) } /// Removes the client with `client_id` from our list of clients (layers), and also removes @@ -565,10 +595,18 @@ impl TcpConnectionStealer { /// connections. #[tracing::instrument(level = "trace", skip(self))] async fn close_client(&mut self, client_id: ClientId) -> Result<(), AgentError> { - self.port_subscriptions.remove_all(client_id).await?; + let removed_subscriptions = self.port_subscriptions.remove_all(client_id).await?; + + let _ = self + .metrics + .tell(MetricsDecStealPortSubscriptionMany { + removed_subscriptions, + }) + .await + .inspect_err(|fail| trace!(?fail)); let client = self.clients.remove(&client_id).expect("client not found"); - for connection in client.subscribed_connections.into_iter() { + for connection in client.subscribed_connections { self.connections .send(connection, ConnectionMessageIn::Unsubscribed { client_id }) .await; @@ -620,7 +658,7 @@ impl TcpConnectionStealer { } /// Handles [`Command`]s that were received by [`TcpConnectionStealer::command_rx`]. - #[tracing::instrument(level = "trace", skip(self))] + #[tracing::instrument(level = Level::TRACE, skip(self), err)] async fn handle_command(&mut self, command: StealerCommand) -> Result<(), AgentError> { let StealerCommand { client_id, command } = command; @@ -649,14 +687,32 @@ impl TcpConnectionStealer { ConnectionMessageIn::Unsubscribed { client_id }, ) .await; + + let _ = self + .metrics + .tell(MetricsDecStealConnectionSubscription) + .await + .inspect_err(|fail| trace!(?fail)); } Command::PortSubscribe(port_steal) => { - self.port_subscribe(client_id, port_steal).await? + let filtered = self.port_subscribe(client_id, port_steal).await?; + + let _ = self + .metrics + .tell(MetricsIncStealPortSubscription { filtered }) + .await + .inspect_err(|fail| trace!(?fail)); } Command::PortUnsubscribe(port) => { - self.port_subscriptions.remove(client_id, port).await?; + if let Some(filtered) = self.port_subscriptions.remove(client_id, port).await? { + let _ = self + .metrics + .tell(MetricsDecStealPortSubscription { filtered }) + .await + .inspect_err(|fail| trace!(?fail)); + } } Command::ResponseData(TcpData { diff --git a/mirrord/agent/src/steal/http/filter.rs b/mirrord/agent/src/steal/http/filter.rs index 8afcbc85f25..caba22302b1 100644 --- a/mirrord/agent/src/steal/http/filter.rs +++ b/mirrord/agent/src/steal/http/filter.rs @@ -3,7 +3,7 @@ use hyper::Request; use tracing::Level; /// Currently supported filtering criterias. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum HttpFilter { /// Header based filter. /// This [`Regex`] should be used against each header after transforming it to `k: v` format. diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 0ff0e1fa8ea..0fcf313d7e6 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -215,30 +215,39 @@ impl PortSubscriptions { /// * `client_id` - identifier of the client that issued the subscription /// * `port` - number of the subscription port /// + /// # Returns + /// + /// `Some(true)` if the subscprition has an HTTP filter, `Some(false)` if it's unfiltered, and + /// `None` if we could not find the [`PortSubscription`]. + /// /// # Warning /// /// If this method returns an [`Err`], it means that this set is out of sync with the inner /// [`PortRedirector`] and it is no longer usable. It is a caller's responsibility to clean /// up any external state. - pub async fn remove(&mut self, client_id: ClientId, port: Port) -> Result<(), R::Error> { + pub async fn remove( + &mut self, + client_id: ClientId, + port: Port, + ) -> Result, R::Error> { let Entry::Occupied(mut e) = self.subscriptions.entry(port) else { - return Ok(()); + return Ok(None); }; - let remove_redirect = match e.get_mut() { + let (remove_redirect, filtered) = match e.get_mut() { PortSubscription::Unfiltered(subscribed_client) if *subscribed_client == client_id => { e.remove(); - true + (true, Some(false)) } - PortSubscription::Unfiltered(..) => false, + PortSubscription::Unfiltered(..) => (false, Some(false)), PortSubscription::Filtered(filters) => { filters.remove(&client_id); if filters.is_empty() { e.remove(); - true + (true, Some(true)) } else { - false + (false, Some(true)) } } }; @@ -251,7 +260,7 @@ impl PortSubscriptions { } } - Ok(()) + Ok(filtered) } /// Remove all client subscriptions from this set. @@ -265,18 +274,21 @@ impl PortSubscriptions { /// If this method returns an [`Err`], it means that this set is out of sync with the inner /// [`PortRedirector`] and it is no longer usable. It is a caller's responsibility to clean /// up any external state. - pub async fn remove_all(&mut self, client_id: ClientId) -> Result<(), R::Error> { + pub async fn remove_all(&mut self, client_id: ClientId) -> Result, R::Error> { let ports = self .subscriptions .iter() .filter_map(|(k, v)| v.has_client(client_id).then_some(*k)) .collect::>(); + let mut all_removed = Vec::new(); for port in ports { - self.remove(client_id, port).await?; + if let Some(removed) = self.remove(client_id, port).await? { + all_removed.push(removed); + } } - Ok(()) + Ok(all_removed) } /// Return a subscription for the given `port`. diff --git a/mirrord/protocol/src/codec.rs b/mirrord/protocol/src/codec.rs index bb3992215cc..ce77a8a41a8 100644 --- a/mirrord/protocol/src/codec.rs +++ b/mirrord/protocol/src/codec.rs @@ -98,6 +98,11 @@ pub enum ClientMessage { /// These are the messages used by the `mirror` feature, and handled by the /// `TcpSnifferApi` in the agent. Tcp(LayerTcp), + + /// TCP stealer message. + /// + /// These are the messages used by the `steal` feature, and handled by the `TcpStealerApi` in + /// the agent. TcpSteal(LayerTcpSteal), TcpOutgoing(LayerTcpOutgoing), UdpOutgoing(LayerUdpOutgoing), diff --git a/mirrord/protocol/src/tcp.rs b/mirrord/protocol/src/tcp.rs index d4fedd2ff85..04232f3aed3 100644 --- a/mirrord/protocol/src/tcp.rs +++ b/mirrord/protocol/src/tcp.rs @@ -233,10 +233,38 @@ impl StealType { } /// Messages related to Steal Tcp handler from client. +/// +/// `PortSubscribe`, `PortUnsubscribe`, and `ConnectionUnsubscribe` variants are similar +/// to what you'll find in the [`LayerTcp`], but they're handled by different tasks in +/// the agent. +/// +/// Stolen traffic might have an additional overhead when compared to mirrored traffic, as +/// we have an intermmediate HTTP server to handle filtering (based on HTTP headers, etc). #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum LayerTcpSteal { + /// User is interested in stealing traffic on this `Port`, so add it to the list of + /// ports that the stealer is filtering. + /// + /// The `TcpConnectionStealer` supports an [`HttpFilter`] granting the ability to steal + /// only traffic that matches the user configured filter. It's also possible to just steal + /// all traffic (which we refer as `Unfiltered`). For more info see [`StealType`]. + /// + /// This variant is somewhat related to [`LayerTcpSteal::ConnectionUnsubscribe`], since + /// we don't have a `ConnectionSubscribe` message anywhere, instead what we do is: when + /// a new connection comes in one of the ports we are subscribed to, we consider it a + /// connection subscription (so this mechanism represents the **non-existing** + /// `ConnectionSubscribe` variant). PortSubscribe(StealType), + + /// User has stopped stealing from this connection with [`ConnectionId`]. + /// + /// We do **not** have a `ConnectionSubscribe` variant/message. What happens instead is that we + /// call a _connection subscription_ the act of `accept`ing a new connection on one of the + /// ports we are subscribed to. See the [`LayerTcpSteal::PortSubscribe`] for more info. ConnectionUnsubscribe(ConnectionId), + + /// Removes this `Port` from the stealers's filter, the traffic won't be stolen by mirrord + /// anymore. PortUnsubscribe(Port), Data(TcpData), HttpResponse(HttpResponse>), From cbc66cbfd2601eb3be631baa4255047a149ba04e Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 18 Dec 2024 17:43:44 -0300 Subject: [PATCH 05/85] How to install prometheus // Add more metrics to the reply --- mirrord/agent/README.md | 214 +++++++++++++++++++++++++++++++++++ mirrord/agent/src/metrics.rs | 83 +++++++------- 2 files changed, 257 insertions(+), 40 deletions(-) diff --git a/mirrord/agent/README.md b/mirrord/agent/README.md index 61d84351b4e..8b5fa759232 100644 --- a/mirrord/agent/README.md +++ b/mirrord/agent/README.md @@ -9,6 +9,220 @@ mirrord-agent is distributed as a container image (currently only x86) that is p ## Enabling prometheus metrics +TODO(alex) [mid]: Talk how to enable it from env whatever. + +### Installing prometheus + +Run `kubectl apply -f {file-name}.yaml` on these sequences of `yaml` files and you should +get prometheus running in your cluster. You can access the dashboard from your browser at +`http://{cluster-ip}:30909`, if you're using minikube it might be +`http://192.168.49.2:30909`. + +You'll get prometheus running under the `monitoring` namespace, but it'll be able to look +into resources from all namespaces. The config in `configmap.yaml` sets prometheus to look +at pods only, if you want to use it to scrape other stuff, check +[this example](https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml). + +1. `create-namespace.yaml` + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring +``` + +2. `cluster-role.yaml` + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: +- apiGroups: [""] + resources: + - nodes + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] +``` + +3. `service-account.yaml` + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring +``` + +4. `cluster-role-binding.yaml` + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: prometheus + namespace: monitoring +``` + +5. `configmap.yaml` + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: monitoring +data: + prometheus.yml: | + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + # + # If you are using Kubernetes 1.7.2 or earlier, please take note of the comments + # for the kubernetes-cadvisor job; you will need to edit or remove this job. + + # Keep at most 100 sets of details of targets dropped by relabeling. + # This information is used to display in the UI for troubleshooting. + global: + keep_dropped_targets: 100 + + # Scrape config for API servers. + # + # Kubernetes exposes API servers as endpoints to the default/kubernetes + # service so this uses `endpoints` role and uses relabelling to only keep + # the endpoints associated with the default/kubernetes service using the + # default named port `https`. This works for single API server deployments as + # well as HA API server deployments. + scrape_configs: + # Example scrape config for pods + # + # The relabeling allows the actual pod scrape to be configured + # for all the declared ports (or port-free target if none is declared) + # or only some ports. + - job_name: "kubernetes-pods" + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + # Example relabel to scrape only pods that have + # "example.io/should_be_scraped = true" annotation. + # - source_labels: [__meta_kubernetes_pod_annotation_example_io_should_be_scraped] + # action: keep + # regex: true + # + # Example relabel to customize metric path based on pod + # "example.io/metric_path = " annotation. + # - source_labels: [__meta_kubernetes_pod_annotation_example_io_metric_path] + # action: replace + # target_label: __metrics_path__ + # regex: (.+) + # + # Example relabel to scrape only single, desired port for the pod + # based on pod "example.io/scrape_port = " annotation. + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod +``` + - If you make any changes to the 5-configmap.yaml file, remember to `kubectl apply` it **before** restarting the `prometheus` deployment. +6. `deployment.yaml` + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus +spec: + replicas: 1 + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: prometheus + containers: + - name: prometheus + image: prom/prometheus + args: + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - name: web + containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus + restartPolicy: Always + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-config +``` + +7. `service.yaml` + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: prometheus-service + namespace: monitoring + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9090' +spec: + selector: + app: prometheus + type: NodePort + ports: + - port: 8080 + targetPort: 9090 + nodePort: 30909 +``` diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 6152f6be85b..8a7e05a4cfd 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -36,21 +36,46 @@ async fn get_metrics(metrics: Extension>) -> Result>) -> Result for MetricsActor { @@ -170,32 +195,6 @@ impl Message for MetricsActor { } } -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncClient, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.connected_client_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecClient, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.connected_client_count = self.connected_client_count.saturating_sub(1); - } -} - impl Message for MetricsActor { type Reply = (); @@ -348,8 +347,12 @@ impl Message for MetricsActor { _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { MetricsGetAllReply { - open_fds_count: self.open_fd_count, - connected_clients_count: self.connected_client_count, + open_fd_count: self.open_fd_count, + mirror_port_subscription_count: self.mirror_port_subscription_count, + steal_filtered_port_subscription_count: self.steal_filtered_port_subscription_count, + steal_unfiltered_port_subscription_count: self.steal_unfiltered_port_subscription_count, + steal_connection_subscription_count: self.steal_connection_subscription_count, + connection_subscription_count: self.connection_subscription_count, } } } From 8b8d4ffd25967ef28a09a784ae1d0947c362de81 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 18 Dec 2024 18:39:04 -0300 Subject: [PATCH 06/85] Do not register the gauge multiple times. --- mirrord/agent/src/metrics.rs | 135 ++++++++++++++++++++----------- mirrord/agent/src/sniffer/api.rs | 4 +- 2 files changed, 91 insertions(+), 48 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 8a7e05a4cfd..be0e77137b0 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -6,6 +6,7 @@ use kameo::{ message::{Context, Message}, Actor, Reply, }; +use prometheus::core::{AtomicI64, GenericGauge}; use serde::Serialize; use thiserror::Error; use tokio::net::TcpListener; @@ -25,57 +26,52 @@ pub(crate) enum MetricsError { Prometheus(#[from] prometheus::Error), } +unsafe impl Send for MetricsError {} + impl IntoResponse for MetricsError { fn into_response(self) -> axum::response::Response { (http::StatusCode::INTERNAL_SERVER_ERROR, self.to_string()).into_response() } } -#[tracing::instrument(level = Level::INFO, ret, err)] -async fn get_metrics(metrics: Extension>) -> Result { - use prometheus::{register_int_gauge, Encoder, TextEncoder}; +#[tracing::instrument(level = Level::INFO, skip(prometheus_metrics), ret, err)] +#[axum::debug_handler] +async fn get_metrics( + metrics: Extension>, + prometheus_metrics: Extension, +) -> Result { + use prometheus::{Encoder, TextEncoder}; let MetricsGetAllReply { open_fd_count, mirror_port_subscription_count, + mirror_connection_subscription_count, steal_filtered_port_subscription_count, steal_unfiltered_port_subscription_count, steal_connection_subscription_count, - connection_subscription_count, } = metrics.ask(MetricsGetAll).await?; - register_int_gauge!( - "mirrord_agent_open_fd_count", - "amount of open fds in mirrord-agent" - )? - .set(open_fd_count as i64); + prometheus_metrics.open_fd_count.set(open_fd_count as i64); - register_int_gauge!("mirrord_agent_mirror_port_subscription_count", "")? + prometheus_metrics + .mirror_port_subscription_count .set(mirror_port_subscription_count as i64); - register_int_gauge!( - "mirrord_agent_steal_filtered_port_subscription_count", - "amount of connected clients in mirrord-agent" - )? - .set(steal_filtered_port_subscription_count as i64); - - register_int_gauge!( - "mirrord_agent_steal_unfiltered_port_subscription_count", - "amount of connected clients in mirrord-agent" - )? - .set(steal_unfiltered_port_subscription_count as i64); - - register_int_gauge!( - "mirrord_agent_steal_connection_subscription_count", - "amount of connected clients in mirrord-agent" - )? - .set(steal_connection_subscription_count as i64); - - register_int_gauge!( - "mirrord_agent_connection_subscription_count", - "amount of connected clients in mirrord-agent" - )? - .set(connection_subscription_count as i64); + prometheus_metrics + .mirror_connection_subscription_count + .set(mirror_connection_subscription_count as i64); + + prometheus_metrics + .steal_filtered_port_subscription_count + .set(steal_filtered_port_subscription_count as i64); + + prometheus_metrics + .steal_unfiltered_port_subscription_count + .set(steal_unfiltered_port_subscription_count as i64); + + prometheus_metrics + .steal_connection_subscription_count + .set(steal_connection_subscription_count as i64); let metric_families = prometheus::gather(); @@ -87,15 +83,58 @@ async fn get_metrics(metrics: Extension>) -> Result, + mirror_port_subscription_count: GenericGauge, + mirror_connection_subscription_count: GenericGauge, + steal_filtered_port_subscription_count: GenericGauge, + steal_unfiltered_port_subscription_count: GenericGauge, + steal_connection_subscription_count: GenericGauge, +} + +impl PrometheusMetrics { + fn new() -> Result { + use prometheus::register_int_gauge; + + Ok(Self { + open_fd_count: register_int_gauge!( + "mirrord_agent_open_fd_count", + "amount of open fds in mirrord-agent" + )?, + mirror_port_subscription_count: register_int_gauge!( + "mirrord_agent_mirror_port_subscription_count", + "amount of mirror port subscriptions in mirror-agent" + )?, + mirror_connection_subscription_count: register_int_gauge!( + "mirrord_agent_mirror_connection_subscription_count", + "amount of connections in steal mode in mirrord-agent" + )?, + steal_filtered_port_subscription_count: register_int_gauge!( + "mirrord_agent_steal_filtered_port_subscription_count", + "amount of filtered steal port subscriptions in mirrord-agent" + )?, + steal_unfiltered_port_subscription_count: register_int_gauge!( + "mirrord_agent_steal_unfiltered_port_subscription_count", + "amount of unfiltered steal port subscriptions in mirrord-agent" + )?, + steal_connection_subscription_count: register_int_gauge!( + "mirrord_agent_steal_connection_subscription_count", + "amount of connections in steal mode in mirrord-agent" + )?, + }) + } +} + #[derive(Default)] pub(crate) struct MetricsActor { enabled: bool, open_fd_count: u64, mirror_port_subscription_count: u64, + mirror_connection_subscription_count: u64, steal_filtered_port_subscription_count: u64, steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, - connection_subscription_count: u64, } impl MetricsActor { @@ -113,9 +152,12 @@ impl Actor for MetricsActor { #[tracing::instrument(level = Level::INFO, skip_all, ret ,err)] async fn on_start(&mut self, metrics: ActorRef) -> Result<(), BoxError> { if self.enabled { + let prometheus_metrics = PrometheusMetrics::new()?; + let app = Router::new() .route("/metrics", get(get_metrics)) - .layer(Extension(metrics)); + .layer(Extension(metrics)) + .layer(Extension(prometheus_metrics)); let listener = TcpListener::bind("0.0.0.0:9000") .await @@ -140,6 +182,9 @@ pub(crate) struct MetricsDecFd; pub(crate) struct MetricsIncMirrorPortSubscription; pub(crate) struct MetricsDecMirrorPortSubscription; +pub(crate) struct MetricsIncMirrorConnectionSubscription; +pub(crate) struct MetricsDecMirrorConnectionSubscription; + pub(crate) struct MetricsIncStealPortSubscription { pub(crate) filtered: bool, } @@ -154,19 +199,16 @@ pub(crate) struct MetricsDecStealPortSubscriptionMany { pub(crate) struct MetricsIncStealConnectionSubscription; pub(crate) struct MetricsDecStealConnectionSubscription; -pub(crate) struct MetricsIncConnectionSubscription; -pub(crate) struct MetricsDecConnectionSubscription; - pub(crate) struct MetricsGetAll; #[derive(Reply, Serialize)] pub(crate) struct MetricsGetAllReply { open_fd_count: u64, mirror_port_subscription_count: u64, + mirror_connection_subscription_count: u64, steal_filtered_port_subscription_count: u64, steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, - connection_subscription_count: u64, } impl Message for MetricsActor { @@ -311,29 +353,30 @@ impl Message for MetricsActor { } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsIncConnectionSubscription, + _: MetricsIncMirrorConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.connection_subscription_count += 1; + self.mirror_connection_subscription_count += 1; } } -impl Message for MetricsActor { +impl Message for MetricsActor { type Reply = (); #[tracing::instrument(level = Level::INFO, skip_all)] async fn handle( &mut self, - _: MetricsDecConnectionSubscription, + _: MetricsDecMirrorConnectionSubscription, _ctx: Context<'_, Self, Self::Reply>, ) -> Self::Reply { - self.connection_subscription_count = self.connection_subscription_count.saturating_sub(1); + self.mirror_connection_subscription_count = + self.mirror_connection_subscription_count.saturating_sub(1); } } @@ -349,10 +392,10 @@ impl Message for MetricsActor { MetricsGetAllReply { open_fd_count: self.open_fd_count, mirror_port_subscription_count: self.mirror_port_subscription_count, + mirror_connection_subscription_count: self.mirror_connection_subscription_count, steal_filtered_port_subscription_count: self.steal_filtered_port_subscription_count, steal_unfiltered_port_subscription_count: self.steal_unfiltered_port_subscription_count, steal_connection_subscription_count: self.steal_connection_subscription_count, - connection_subscription_count: self.connection_subscription_count, } } } diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 059deeb13f9..5cd00639a7a 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -19,7 +19,7 @@ use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; use crate::{ error::AgentError, metrics::{ - MetricsActor, MetricsDecConnectionSubscription, MetricsDecMirrorPortSubscription, + MetricsActor, MetricsDecMirrorConnectionSubscription, MetricsDecMirrorPortSubscription, MetricsIncMirrorPortSubscription, }, util::ClientId, @@ -210,7 +210,7 @@ impl TcpSnifferApi { let _ = self .metrics - .tell(MetricsDecConnectionSubscription) + .tell(MetricsDecMirrorConnectionSubscription) .await .inspect_err(|fail| tracing::trace!(?fail)); From 1ebc509b65f794b2f96459fb41a96702cafbe337 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 17:25:28 -0300 Subject: [PATCH 07/85] tcpoutgoing metrics --- mirrord/agent/src/entrypoint.rs | 2 +- mirrord/agent/src/metrics.rs | 42 +++++++++++++++++++++ mirrord/agent/src/outgoing.rs | 55 +++++++++++++++++++++++++++- mirrord/protocol/src/codec.rs | 5 +++ mirrord/protocol/src/outgoing/tcp.rs | 31 ++++++++++++++++ 5 files changed, 132 insertions(+), 3 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index a7d1909438a..52355b55abb 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -232,7 +232,7 @@ impl ClientConnectionHandler { Self::create_stealer_api(id, bg_tasks.stealer, &mut connection).await?; let dns_api = Self::create_dns_api(bg_tasks.dns); - let tcp_outgoing_api = TcpOutgoingApi::new(pid); + let tcp_outgoing_api = TcpOutgoingApi::new(pid, state.metrics.clone()); let udp_outgoing_api = UdpOutgoingApi::new(pid); let client_handler = Self { diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index be0e77137b0..abc579dcc9f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -49,6 +49,7 @@ async fn get_metrics( steal_filtered_port_subscription_count, steal_unfiltered_port_subscription_count, steal_connection_subscription_count, + tcp_outgoing_connection_count, } = metrics.ask(MetricsGetAll).await?; prometheus_metrics.open_fd_count.set(open_fd_count as i64); @@ -73,6 +74,10 @@ async fn get_metrics( .steal_connection_subscription_count .set(steal_connection_subscription_count as i64); + prometheus_metrics + .tcp_outgoing_connection_count + .set(tcp_outgoing_connection_count as i64); + let metric_families = prometheus::gather(); let mut buffer = Vec::new(); @@ -91,6 +96,7 @@ struct PrometheusMetrics { steal_filtered_port_subscription_count: GenericGauge, steal_unfiltered_port_subscription_count: GenericGauge, steal_connection_subscription_count: GenericGauge, + tcp_outgoing_connection_count: GenericGauge, } impl PrometheusMetrics { @@ -122,6 +128,10 @@ impl PrometheusMetrics { "mirrord_agent_steal_connection_subscription_count", "amount of connections in steal mode in mirrord-agent" )?, + tcp_outgoing_connection_count: register_int_gauge!( + "mirrord_agent_tcp_outgoing_connection_count", + "amount of tcp outgoing connections in mirrord-agent" + )?, }) } } @@ -135,6 +145,7 @@ pub(crate) struct MetricsActor { steal_filtered_port_subscription_count: u64, steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, + tcp_outgoing_connection_count: u64, } impl MetricsActor { @@ -199,6 +210,9 @@ pub(crate) struct MetricsDecStealPortSubscriptionMany { pub(crate) struct MetricsIncStealConnectionSubscription; pub(crate) struct MetricsDecStealConnectionSubscription; +pub(crate) struct MetricsIncTcpOutgoingConnection; +pub(crate) struct MetricsDecTcpOutgoingConnection; + pub(crate) struct MetricsGetAll; #[derive(Reply, Serialize)] @@ -209,6 +223,7 @@ pub(crate) struct MetricsGetAllReply { steal_filtered_port_subscription_count: u64, steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, + tcp_outgoing_connection_count: u64, } impl Message for MetricsActor { @@ -380,6 +395,32 @@ impl Message for MetricsActor { } } +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncTcpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.tcp_outgoing_connection_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecTcpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.tcp_outgoing_connection_count = self.tcp_outgoing_connection_count.saturating_sub(1); + } +} + impl Message for MetricsActor { type Reply = MetricsGetAllReply; @@ -396,6 +437,7 @@ impl Message for MetricsActor { steal_filtered_port_subscription_count: self.steal_filtered_port_subscription_count, steal_unfiltered_port_subscription_count: self.steal_unfiltered_port_subscription_count, steal_connection_subscription_count: self.steal_connection_subscription_count, + tcp_outgoing_connection_count: self.tcp_outgoing_connection_count, } } } diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 13e3a9e1e06..7e171fd1126 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -1,6 +1,7 @@ use std::{collections::HashMap, fmt, thread, time::Duration}; use bytes::Bytes; +use kameo::actor::ActorRef; use mirrord_protocol::{ outgoing::{tcp::*, *}, ConnectionId, RemoteError, ResponseError, @@ -19,6 +20,7 @@ use tracing::Level; use crate::{ error::Result, + metrics::{MetricsActor, MetricsDecTcpOutgoingConnection, MetricsIncTcpOutgoingConnection}, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -55,13 +57,13 @@ impl TcpOutgoingApi { /// /// * `pid` - process id of the agent's target container #[tracing::instrument(level = Level::TRACE)] - pub(crate) fn new(pid: Option) -> Self { + pub(crate) fn new(pid: Option, metrics: ActorRef) -> Self { let (layer_tx, layer_rx) = mpsc::channel(1000); let (daemon_tx, daemon_rx) = mpsc::channel(1000); let watched_task = WatchedTask::new( Self::TASK_NAME, - TcpOutgoingTask::new(pid, layer_rx, daemon_tx).run(), + TcpOutgoingTask::new(pid, layer_rx, daemon_tx, metrics).run(), ); let task_status = watched_task.status(); let task = run_thread_in_namespace( @@ -110,6 +112,7 @@ struct TcpOutgoingTask { pid: Option, layer_rx: Receiver, daemon_tx: Sender, + metrics: ActorRef, } impl fmt::Debug for TcpOutgoingTask { @@ -138,6 +141,7 @@ impl TcpOutgoingTask { pid: Option, layer_rx: Receiver, daemon_tx: Sender, + metrics: ActorRef, ) -> Self { Self { next_connection_id: 0, @@ -146,6 +150,7 @@ impl TcpOutgoingTask { pid, layer_rx, daemon_tx, + metrics, } } @@ -219,6 +224,12 @@ impl TcpOutgoingTask { let daemon_message = DaemonTcpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; + + let _ = self + .metrics + .tell(MetricsDecTcpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } // EOF occurred in one of peer connections. @@ -249,6 +260,12 @@ impl TcpOutgoingTask { self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) .await?; + + let _ = self + .metrics + .tell(MetricsDecTcpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } } } @@ -299,9 +316,18 @@ impl TcpOutgoingTask { result = ?daemon_connect, "Connection attempt finished.", ); + self.daemon_tx .send(DaemonTcpOutgoing::Connect(daemon_connect)) + .await?; + + let _ = self + .metrics + .tell(MetricsIncTcpOutgoingConnection) .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + + Ok(()) } // This message handles two cases: @@ -341,9 +367,20 @@ impl TcpOutgoingTask { connection_id, "Peer connection is shut down as well, sending close message to the client.", ); + self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) + .await?; + + let _ = self + .metrics + .tell(MetricsDecTcpOutgoingConnection) .await + .inspect_err( + |fail| tracing::warn!(%fail, "agent metrics failure!"), + ); + + Ok(()) } } @@ -360,7 +397,15 @@ impl TcpOutgoingTask { ); self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) + .await?; + + let _ = self + .metrics + .tell(MetricsDecTcpOutgoingConnection) .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + + Ok(()) } } } @@ -371,6 +416,12 @@ impl TcpOutgoingTask { self.writers.remove(&connection_id); self.readers.remove(&connection_id); + let _ = self + .metrics + .tell(MetricsDecTcpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + Ok(()) } } diff --git a/mirrord/protocol/src/codec.rs b/mirrord/protocol/src/codec.rs index ce77a8a41a8..23b90230d35 100644 --- a/mirrord/protocol/src/codec.rs +++ b/mirrord/protocol/src/codec.rs @@ -104,6 +104,11 @@ pub enum ClientMessage { /// These are the messages used by the `steal` feature, and handled by the `TcpStealerApi` in /// the agent. TcpSteal(LayerTcpSteal), + // TODO(alex) [high]: Outgoing is next! + /// TCP outgoing message. + /// + /// These are the messages used by the `outgoing` feature (tcp), and handled by the + /// `TcpOutgoingApi` in the agent. TcpOutgoing(LayerTcpOutgoing), UdpOutgoing(LayerUdpOutgoing), FileRequest(FileRequest), diff --git a/mirrord/protocol/src/outgoing/tcp.rs b/mirrord/protocol/src/outgoing/tcp.rs index e38fa0c44d0..ce00d944f65 100644 --- a/mirrord/protocol/src/outgoing/tcp.rs +++ b/mirrord/protocol/src/outgoing/tcp.rs @@ -3,14 +3,45 @@ use crate::RemoteResult; #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum LayerTcpOutgoing { + /// User is interested in connecting via tcp to some remote address, specified in + /// [`LayerConnect`]. + /// + /// The layer will get a mirrord managed address that it'll `connect` to, meanwhile + /// in the agent we `connect` to the actual remote address. Connect(LayerConnect), + + /// Write data to the remote address the agent is `connect`ed to. + /// + /// There's no `Read` message, as we're calling `read` in the agent, and we send + /// a [`DaemonTcpOutgoing::Read`] message in case we get some data from this connection. Write(LayerWrite), + + /// The layer closed the connection, this message syncs up the agent, closing it + /// over there as well. + /// + /// Connections in the agent may be closed in other ways, such as when an error happens + /// when reading or writing. Which means that this message is not the only way of + /// closing outgoing tcp connections. Close(LayerClose), } #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum DaemonTcpOutgoing { + /// The agent attempted a connection to the remote address specified by + /// [`LayerTcpOutgoing::Connect`], and it might've been successful or not. Connect(RemoteResult), + + /// Read data from the connection. + /// + /// There's no `Write` message, as `write`s come from the user (layer). The agent sending + /// a `write` to the layer like this would make no sense, since it could just `write` it + /// to the remote connection itself. Read(RemoteResult), + + // TODO(alex) [high]: For other connections, check places where we `DaemonClose` to + // dec their counters. + /// Tell the layer that this connection has been `close`d, either by a request from + /// the user with [`LayerTcpOutgoing::Close`], or from some error in the agent when + /// writing or reading from the connection. Close(ConnectionId), } From df0a00727d5b65e34d0d591536abc1228b832a98 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 17:45:47 -0300 Subject: [PATCH 08/85] udpoutgoing metrics --- mirrord/agent/src/entrypoint.rs | 2 +- mirrord/agent/src/metrics.rs | 42 ++++++++++++++++++++++++++++ mirrord/agent/src/outgoing/udp.rs | 42 ++++++++++++++++++++++++---- mirrord/protocol/src/codec.rs | 6 +++- mirrord/protocol/src/outgoing/udp.rs | 36 ++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 8 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 52355b55abb..0b8b5d8573e 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -233,7 +233,7 @@ impl ClientConnectionHandler { let dns_api = Self::create_dns_api(bg_tasks.dns); let tcp_outgoing_api = TcpOutgoingApi::new(pid, state.metrics.clone()); - let udp_outgoing_api = UdpOutgoingApi::new(pid); + let udp_outgoing_api = UdpOutgoingApi::new(pid, state.metrics.clone()); let client_handler = Self { id, diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index abc579dcc9f..a00736ef7ee 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -50,6 +50,7 @@ async fn get_metrics( steal_unfiltered_port_subscription_count, steal_connection_subscription_count, tcp_outgoing_connection_count, + udp_outgoing_connection_count, } = metrics.ask(MetricsGetAll).await?; prometheus_metrics.open_fd_count.set(open_fd_count as i64); @@ -78,6 +79,10 @@ async fn get_metrics( .tcp_outgoing_connection_count .set(tcp_outgoing_connection_count as i64); + prometheus_metrics + .udp_outgoing_connection_count + .set(udp_outgoing_connection_count as i64); + let metric_families = prometheus::gather(); let mut buffer = Vec::new(); @@ -97,6 +102,7 @@ struct PrometheusMetrics { steal_unfiltered_port_subscription_count: GenericGauge, steal_connection_subscription_count: GenericGauge, tcp_outgoing_connection_count: GenericGauge, + udp_outgoing_connection_count: GenericGauge, } impl PrometheusMetrics { @@ -132,6 +138,10 @@ impl PrometheusMetrics { "mirrord_agent_tcp_outgoing_connection_count", "amount of tcp outgoing connections in mirrord-agent" )?, + udp_outgoing_connection_count: register_int_gauge!( + "mirrord_agent_udp_outgoing_connection_count", + "amount of udp outgoing connections in mirrord-agent" + )?, }) } } @@ -146,6 +156,7 @@ pub(crate) struct MetricsActor { steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, tcp_outgoing_connection_count: u64, + udp_outgoing_connection_count: u64, } impl MetricsActor { @@ -213,6 +224,9 @@ pub(crate) struct MetricsDecStealConnectionSubscription; pub(crate) struct MetricsIncTcpOutgoingConnection; pub(crate) struct MetricsDecTcpOutgoingConnection; +pub(crate) struct MetricsIncUdpOutgoingConnection; +pub(crate) struct MetricsDecUdpOutgoingConnection; + pub(crate) struct MetricsGetAll; #[derive(Reply, Serialize)] @@ -224,6 +238,7 @@ pub(crate) struct MetricsGetAllReply { steal_unfiltered_port_subscription_count: u64, steal_connection_subscription_count: u64, tcp_outgoing_connection_count: u64, + udp_outgoing_connection_count: u64, } impl Message for MetricsActor { @@ -421,6 +436,32 @@ impl Message for MetricsActor { } } +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncUdpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.udp_outgoing_connection_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecUdpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.udp_outgoing_connection_count = self.udp_outgoing_connection_count.saturating_sub(1); + } +} + impl Message for MetricsActor { type Reply = MetricsGetAllReply; @@ -438,6 +479,7 @@ impl Message for MetricsActor { steal_unfiltered_port_subscription_count: self.steal_unfiltered_port_subscription_count, steal_connection_subscription_count: self.steal_connection_subscription_count, tcp_outgoing_connection_count: self.tcp_outgoing_connection_count, + udp_outgoing_connection_count: self.udp_outgoing_connection_count, } } } diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index b6baa5e537e..f191bc63911 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -9,6 +9,7 @@ use futures::{ prelude::*, stream::{SplitSink, SplitStream}, }; +use kameo::actor::ActorRef; use mirrord_protocol::{ outgoing::{udp::*, *}, ConnectionId, ResponseError, @@ -22,8 +23,10 @@ use tokio::{ use tokio_util::{codec::BytesCodec, udp::UdpFramed}; use tracing::{debug, trace, warn}; +use super::MetricsActor; use crate::{ error::Result, + metrics::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -71,12 +74,14 @@ async fn connect(remote_address: SocketAddr) -> Result impl UdpOutgoingApi { const TASK_NAME: &'static str = "UdpOutgoing"; - pub(crate) fn new(pid: Option) -> Self { + pub(crate) fn new(pid: Option, metrics: ActorRef) -> Self { let (layer_tx, layer_rx) = mpsc::channel(1000); let (daemon_tx, daemon_rx) = mpsc::channel(1000); - let watched_task = - WatchedTask::new(Self::TASK_NAME, Self::interceptor_task(layer_rx, daemon_tx)); + let watched_task = WatchedTask::new( + Self::TASK_NAME, + Self::interceptor_task(layer_rx, daemon_tx, metrics), + ); let task_status = watched_task.status(); let task = run_thread_in_namespace( @@ -101,6 +106,7 @@ impl UdpOutgoingApi { async fn interceptor_task( mut layer_rx: Receiver, daemon_tx: Sender, + metrics: ActorRef, ) -> Result<()> { let mut connection_ids = 0..=ConnectionId::MAX; @@ -136,11 +142,14 @@ impl UdpOutgoingApi { .ok_or_else(|| ResponseError::IdsExhausted("connect".into()))?; debug!("interceptor_task -> mirror_socket {:#?}", mirror_socket); + let peer_address = mirror_socket.peer_addr()?; let local_address = mirror_socket.local_addr()?; let local_address = SocketAddress::Ip(local_address); + let framed = UdpFramed::new(mirror_socket, BytesCodec::new()); debug!("interceptor_task -> framed {:#?}", framed); + let (sink, stream): ( SplitSink, (BytesMut, SocketAddr)>, SplitStream>, @@ -158,7 +167,13 @@ impl UdpOutgoingApi { let daemon_message = DaemonUdpOutgoing::Connect(daemon_connect); debug!("interceptor_task -> daemon_message {:#?}", daemon_message); - daemon_tx.send(daemon_message).await? + + daemon_tx.send(daemon_message).await?; + + let _ = metrics + .tell(MetricsIncUdpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } // [user] -> [layer] -> [agent] -> [remote] // `user` wrote some message to the remote host. @@ -183,7 +198,12 @@ impl UdpOutgoingApi { readers.remove(&connection_id); let daemon_message = DaemonUdpOutgoing::Close(connection_id); - daemon_tx.send(daemon_message).await? + daemon_tx.send(daemon_message).await?; + + let _ = metrics + .tell(MetricsDecUdpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } } // [layer] -> [agent] @@ -191,6 +211,11 @@ impl UdpOutgoingApi { LayerUdpOutgoing::Close(LayerClose { ref connection_id }) => { writers.remove(connection_id); readers.remove(connection_id); + + let _ = metrics + .tell(MetricsDecUdpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } } } @@ -216,7 +241,12 @@ impl UdpOutgoingApi { readers.remove(&connection_id); let daemon_message = DaemonUdpOutgoing::Close(connection_id); - daemon_tx.send(daemon_message).await? + daemon_tx.send(daemon_message).await?; + + let _ = metrics + .tell(MetricsDecUdpOutgoingConnection) + .await + .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); } } } diff --git a/mirrord/protocol/src/codec.rs b/mirrord/protocol/src/codec.rs index 23b90230d35..e2ed6532e3b 100644 --- a/mirrord/protocol/src/codec.rs +++ b/mirrord/protocol/src/codec.rs @@ -104,12 +104,16 @@ pub enum ClientMessage { /// These are the messages used by the `steal` feature, and handled by the `TcpStealerApi` in /// the agent. TcpSteal(LayerTcpSteal), - // TODO(alex) [high]: Outgoing is next! /// TCP outgoing message. /// /// These are the messages used by the `outgoing` feature (tcp), and handled by the /// `TcpOutgoingApi` in the agent. TcpOutgoing(LayerTcpOutgoing), + + /// UDP outgoing message. + /// + /// These are the messages used by the `outgoing` feature (udp), and handled by the + /// `UdpOutgoingApi` in the agent. UdpOutgoing(LayerUdpOutgoing), FileRequest(FileRequest), GetEnvVarsRequest(GetEnvVarsRequest), diff --git a/mirrord/protocol/src/outgoing/udp.rs b/mirrord/protocol/src/outgoing/udp.rs index 02b4d97f830..f58378beeea 100644 --- a/mirrord/protocol/src/outgoing/udp.rs +++ b/mirrord/protocol/src/outgoing/udp.rs @@ -3,14 +3,50 @@ use crate::RemoteResult; #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum LayerUdpOutgoing { + /// User is interested in connecting via udp to some remote address, specified in + /// [`LayerConnect`]. + /// + /// The layer will get a mirrord managed address that it'll `connect` to, meanwhile + /// in the agent we `connect` to the actual remote address. + /// + /// Saying that we have an _udp connection_ is a bit weird, considering it's a + /// _connectionless_ protocol, but in mirrord we use a _fakeish_ connection mechanism + /// when dealing with outgoing udp traffic. Connect(LayerConnect), + + /// Write data to the remote address the agent is `connect`ed to. + /// + /// There's no `Read` message, as we're calling `read` in the agent, and we send + /// a [`DaemonUdpOutgoing::Read`] message in case we get some data from this connection. Write(LayerWrite), + + /// The layer closed the connection, this message syncs up the agent, closing it + /// over there as well. + /// + /// Connections in the agent may be closed in other ways, such as when an error happens + /// when reading or writing. Which means that this message is not the only way of + /// closing outgoing udp connections. Close(LayerClose), } #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum DaemonUdpOutgoing { + /// The agent attempted a connection to the remote address specified by + /// [`LayerUdpOutgoing::Connect`], and it might've been successful or not. + /// + /// See the docs for [`LayerUdpOutgoing::Connect`] for a bit more information on the + /// weird idea of `connect` and udp in mirrord. Connect(RemoteResult), + + /// Read data from the connection. + /// + /// There's no `Write` message, as `write`s come from the user (layer). The agent sending + /// a `write` to the layer like this would make no sense, since it could just `write` it + /// to the remote connection itself. Read(RemoteResult), + + /// Tell the layer that this connection has been `close`d, either by a request from + /// the user with [`LayerUdpOutgoing::Close`], or from some error in the agent when + /// writing or reading from the connection. Close(ConnectionId), } From 0c91441db26d042e0bd0a2d409b728ffb8e1080f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 18:25:35 -0300 Subject: [PATCH 09/85] docs --- mirrord/agent/src/entrypoint.rs | 11 +++++++++++ mirrord/protocol/src/outgoing/tcp.rs | 2 -- mirrord/protocol/src/tcp.rs | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 0b8b5d8573e..218ad922c29 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -811,6 +811,17 @@ async fn start_iptable_guard(args: Args) -> Result<()> { result } +/// The agent is somewhat started twice, first with [`start_iptable_guard`], and then the +/// proper agent with [`start_agent`]. +/// +/// ## Things to keep in mind due to the double initialization +/// +/// Since the _second_ agent gets spawned as a child of the _first_, they share resources, +/// like the `namespace`, which means: +/// +/// 1. If you try to `bind` a socket to some address before [`start_agent`], it'll actually +/// be bound **twice**, which incurs an error (address already in use). You could get around +/// this by `bind`ing on `0.0.0.0:0`, but this is most likely **not** what you want. pub async fn main() -> Result<()> { rustls::crypto::CryptoProvider::install_default(rustls::crypto::aws_lc_rs::default_provider()) .expect("Failed to install crypto provider"); diff --git a/mirrord/protocol/src/outgoing/tcp.rs b/mirrord/protocol/src/outgoing/tcp.rs index ce00d944f65..877e0d2f6c0 100644 --- a/mirrord/protocol/src/outgoing/tcp.rs +++ b/mirrord/protocol/src/outgoing/tcp.rs @@ -38,8 +38,6 @@ pub enum DaemonTcpOutgoing { /// to the remote connection itself. Read(RemoteResult), - // TODO(alex) [high]: For other connections, check places where we `DaemonClose` to - // dec their counters. /// Tell the layer that this connection has been `close`d, either by a request from /// the user with [`LayerTcpOutgoing::Close`], or from some error in the agent when /// writing or reading from the connection. diff --git a/mirrord/protocol/src/tcp.rs b/mirrord/protocol/src/tcp.rs index 04232f3aed3..46946188f7a 100644 --- a/mirrord/protocol/src/tcp.rs +++ b/mirrord/protocol/src/tcp.rs @@ -79,6 +79,9 @@ pub enum LayerTcp { } /// Messages related to Tcp handler from server. +/// +/// They are the same for both `steal` and `mirror` modes, even though their layer +/// counterparts ([`LayerTcpSteal`] and [`LayerTcp`]) are different. #[derive(Encode, Decode, Debug, PartialEq, Eq, Clone)] pub enum DaemonTcp { NewConnection(NewTcpConnection), From c17482cc9a6872ae016abe01d9fa45ffc680e064 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 18:38:22 -0300 Subject: [PATCH 10/85] move metrics to modules --- mirrord/agent/src/entrypoint.rs | 11 - mirrord/agent/src/file.rs | 5 +- mirrord/agent/src/metrics.rs | 262 +----------------- mirrord/agent/src/metrics/file_ops.rs | 33 +++ mirrord/agent/src/metrics/incoming_traffic.rs | 167 +++++++++++ mirrord/agent/src/metrics/outgoing_traffic.rs | 62 +++++ mirrord/agent/src/outgoing.rs | 5 +- mirrord/agent/src/outgoing/udp.rs | 2 +- mirrord/agent/src/sniffer/api.rs | 7 +- mirrord/agent/src/steal/connection.rs | 9 +- 10 files changed, 289 insertions(+), 274 deletions(-) create mode 100644 mirrord/agent/src/metrics/file_ops.rs create mode 100644 mirrord/agent/src/metrics/incoming_traffic.rs create mode 100644 mirrord/agent/src/metrics/outgoing_traffic.rs diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 218ad922c29..8bbdaa0bd62 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -859,17 +859,6 @@ pub async fn main() -> Result<()> { let args = cli::parse_args(); - // TODO(alex) [high]: Could start metrics from here, as the agent itself has 2 - // different starting points. So start task here, and pass comms to both. - // - // CANNOT `bind` anything before `start_agent`, we might hit addrinuse. - // let metrics = kameo::spawn(MetricsActor::default()); - // let listener = TcpListener::bind("0.0.0.0:0") - // .await - // .map_err(AgentError::from) - // .inspect_err(|fail| tracing::error!(?fail, "Generic listener!")) - // .inspect(|s| tracing::info!(?s, "Listening"))?; - let agent_result = if args.mode.is_targetless() || (std::env::var(IPTABLE_PREROUTING_ENV).is_ok() && std::env::var(IPTABLE_MESH_ENV).is_ok() diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index ab6b0407d75..0808cc1d382 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -17,7 +17,10 @@ use tracing::{error, trace, Level}; use crate::{ error::Result, - metrics::{MetricsActor, MetricsDecFd, MetricsIncFd}, + metrics::{ + file_ops::{MetricsDecFd, MetricsIncFd}, + MetricsActor, + }, }; #[derive(Debug)] diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index a00736ef7ee..babba1aa245 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -14,6 +14,10 @@ use tracing::Level; use crate::error::AgentError; +pub(crate) mod file_ops; +pub(crate) mod incoming_traffic; +pub(crate) mod outgoing_traffic; + #[derive(Error, Debug)] pub(crate) enum MetricsError { #[error(transparent)] @@ -34,8 +38,7 @@ impl IntoResponse for MetricsError { } } -#[tracing::instrument(level = Level::INFO, skip(prometheus_metrics), ret, err)] -#[axum::debug_handler] +#[tracing::instrument(level = Level::TRACE, skip(prometheus_metrics), ret, err)] async fn get_metrics( metrics: Extension>, prometheus_metrics: Extension, @@ -171,7 +174,7 @@ impl MetricsActor { impl Actor for MetricsActor { type Mailbox = UnboundedMailbox; - #[tracing::instrument(level = Level::INFO, skip_all, ret ,err)] + #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] async fn on_start(&mut self, metrics: ActorRef) -> Result<(), BoxError> { if self.enabled { let prometheus_metrics = PrometheusMetrics::new()?; @@ -198,35 +201,6 @@ impl Actor for MetricsActor { } } -pub(crate) struct MetricsIncFd; -pub(crate) struct MetricsDecFd; - -pub(crate) struct MetricsIncMirrorPortSubscription; -pub(crate) struct MetricsDecMirrorPortSubscription; - -pub(crate) struct MetricsIncMirrorConnectionSubscription; -pub(crate) struct MetricsDecMirrorConnectionSubscription; - -pub(crate) struct MetricsIncStealPortSubscription { - pub(crate) filtered: bool, -} -pub(crate) struct MetricsDecStealPortSubscription { - pub(crate) filtered: bool, -} - -pub(crate) struct MetricsDecStealPortSubscriptionMany { - pub(crate) removed_subscriptions: Vec, -} - -pub(crate) struct MetricsIncStealConnectionSubscription; -pub(crate) struct MetricsDecStealConnectionSubscription; - -pub(crate) struct MetricsIncTcpOutgoingConnection; -pub(crate) struct MetricsDecTcpOutgoingConnection; - -pub(crate) struct MetricsIncUdpOutgoingConnection; -pub(crate) struct MetricsDecUdpOutgoingConnection; - pub(crate) struct MetricsGetAll; #[derive(Reply, Serialize)] @@ -240,232 +214,10 @@ pub(crate) struct MetricsGetAllReply { tcp_outgoing_connection_count: u64, udp_outgoing_connection_count: u64, } - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncFd, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.open_fd_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecFd, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.open_fd_count = self.open_fd_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncMirrorPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_port_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecMirrorPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_port_subscription_count = self.mirror_port_subscription_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - MetricsIncStealPortSubscription { filtered }: MetricsIncStealPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - if filtered { - self.steal_filtered_port_subscription_count += 1; - } else { - self.steal_unfiltered_port_subscription_count += 1; - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - MetricsDecStealPortSubscription { filtered }: MetricsDecStealPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - if filtered { - self.steal_filtered_port_subscription_count = self - .steal_filtered_port_subscription_count - .saturating_sub(1); - } else { - self.steal_unfiltered_port_subscription_count = self - .steal_unfiltered_port_subscription_count - .saturating_sub(1); - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - MetricsDecStealPortSubscriptionMany { - removed_subscriptions, - }: MetricsDecStealPortSubscriptionMany, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - for filtered in removed_subscriptions { - if filtered { - self.steal_filtered_port_subscription_count = self - .steal_filtered_port_subscription_count - .saturating_sub(1); - } else { - self.steal_unfiltered_port_subscription_count = self - .steal_unfiltered_port_subscription_count - .saturating_sub(1); - } - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncStealConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.steal_connection_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecStealConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.steal_connection_subscription_count = - self.steal_connection_subscription_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncMirrorConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_connection_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecMirrorConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_connection_subscription_count = - self.mirror_connection_subscription_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncTcpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.tcp_outgoing_connection_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecTcpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.tcp_outgoing_connection_count = self.tcp_outgoing_connection_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsIncUdpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.udp_outgoing_connection_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::INFO, skip_all)] - async fn handle( - &mut self, - _: MetricsDecUdpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.udp_outgoing_connection_count = self.udp_outgoing_connection_count.saturating_sub(1); - } -} - impl Message for MetricsActor { type Reply = MetricsGetAllReply; - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsGetAll, diff --git a/mirrord/agent/src/metrics/file_ops.rs b/mirrord/agent/src/metrics/file_ops.rs new file mode 100644 index 00000000000..ed040442604 --- /dev/null +++ b/mirrord/agent/src/metrics/file_ops.rs @@ -0,0 +1,33 @@ +use kameo::message::{Context, Message}; +use tracing::Level; + +use crate::metrics::MetricsActor; + +pub(crate) struct MetricsIncFd; +pub(crate) struct MetricsDecFd; + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fd_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecFd, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.open_fd_count = self.open_fd_count.saturating_sub(1); + } +} diff --git a/mirrord/agent/src/metrics/incoming_traffic.rs b/mirrord/agent/src/metrics/incoming_traffic.rs new file mode 100644 index 00000000000..e8de28d249a --- /dev/null +++ b/mirrord/agent/src/metrics/incoming_traffic.rs @@ -0,0 +1,167 @@ +use kameo::message::{Context, Message}; +use tracing::Level; + +use crate::metrics::MetricsActor; + +pub(crate) struct MetricsIncMirrorPortSubscription; +pub(crate) struct MetricsDecMirrorPortSubscription; + +pub(crate) struct MetricsIncMirrorConnectionSubscription; +pub(crate) struct MetricsDecMirrorConnectionSubscription; + +pub(crate) struct MetricsIncStealPortSubscription { + pub(crate) filtered: bool, +} +pub(crate) struct MetricsDecStealPortSubscription { + pub(crate) filtered: bool, +} + +pub(crate) struct MetricsDecStealPortSubscriptionMany { + pub(crate) removed_subscriptions: Vec, +} + +pub(crate) struct MetricsIncStealConnectionSubscription; +pub(crate) struct MetricsDecStealConnectionSubscription; + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncMirrorPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_port_subscription_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecMirrorPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_port_subscription_count = self.mirror_port_subscription_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsIncStealPortSubscription { filtered }: MetricsIncStealPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + if filtered { + self.steal_filtered_port_subscription_count += 1; + } else { + self.steal_unfiltered_port_subscription_count += 1; + } + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsDecStealPortSubscription { filtered }: MetricsDecStealPortSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + if filtered { + self.steal_filtered_port_subscription_count = self + .steal_filtered_port_subscription_count + .saturating_sub(1); + } else { + self.steal_unfiltered_port_subscription_count = self + .steal_unfiltered_port_subscription_count + .saturating_sub(1); + } + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + MetricsDecStealPortSubscriptionMany { + removed_subscriptions, + }: MetricsDecStealPortSubscriptionMany, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + for filtered in removed_subscriptions { + if filtered { + self.steal_filtered_port_subscription_count = self + .steal_filtered_port_subscription_count + .saturating_sub(1); + } else { + self.steal_unfiltered_port_subscription_count = self + .steal_unfiltered_port_subscription_count + .saturating_sub(1); + } + } + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncStealConnectionSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.steal_connection_subscription_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecStealConnectionSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.steal_connection_subscription_count = + self.steal_connection_subscription_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncMirrorConnectionSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_connection_subscription_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecMirrorConnectionSubscription, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.mirror_connection_subscription_count = + self.mirror_connection_subscription_count.saturating_sub(1); + } +} diff --git a/mirrord/agent/src/metrics/outgoing_traffic.rs b/mirrord/agent/src/metrics/outgoing_traffic.rs new file mode 100644 index 00000000000..d9d21a591ca --- /dev/null +++ b/mirrord/agent/src/metrics/outgoing_traffic.rs @@ -0,0 +1,62 @@ +use kameo::message::{Context, Message}; +use tracing::Level; + +use crate::metrics::MetricsActor; + +pub(crate) struct MetricsIncTcpOutgoingConnection; +pub(crate) struct MetricsDecTcpOutgoingConnection; + +pub(crate) struct MetricsIncUdpOutgoingConnection; +pub(crate) struct MetricsDecUdpOutgoingConnection; + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncTcpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.tcp_outgoing_connection_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecTcpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.tcp_outgoing_connection_count = self.tcp_outgoing_connection_count.saturating_sub(1); + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsIncUdpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.udp_outgoing_connection_count += 1; + } +} + +impl Message for MetricsActor { + type Reply = (); + + #[tracing::instrument(level = Level::INFO, skip_all)] + async fn handle( + &mut self, + _: MetricsDecUdpOutgoingConnection, + _ctx: Context<'_, Self, Self::Reply>, + ) -> Self::Reply { + self.udp_outgoing_connection_count = self.udp_outgoing_connection_count.saturating_sub(1); + } +} diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 7e171fd1126..e9f5e071182 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -20,7 +20,10 @@ use tracing::Level; use crate::{ error::Result, - metrics::{MetricsActor, MetricsDecTcpOutgoingConnection, MetricsIncTcpOutgoingConnection}, + metrics::{ + outgoing_traffic::{MetricsDecTcpOutgoingConnection, MetricsIncTcpOutgoingConnection}, + MetricsActor, + }, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index f191bc63911..d60d98e895e 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -26,7 +26,7 @@ use tracing::{debug, trace, warn}; use super::MetricsActor; use crate::{ error::Result, - metrics::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, + metrics::outgoing_traffic::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 5cd00639a7a..1ea3bd32ea9 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -19,8 +19,11 @@ use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; use crate::{ error::AgentError, metrics::{ - MetricsActor, MetricsDecMirrorConnectionSubscription, MetricsDecMirrorPortSubscription, - MetricsIncMirrorPortSubscription, + incoming_traffic::{ + MetricsDecMirrorConnectionSubscription, MetricsDecMirrorPortSubscription, + MetricsIncMirrorPortSubscription, + }, + MetricsActor, }, util::ClientId, watched_task::TaskStatus, diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index ea695c5bcfc..a75baaeaa04 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -35,9 +35,12 @@ use tracing::{trace, warn, Level}; use crate::{ error::{AgentError, Result}, metrics::{ - MetricsActor, MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscription, - MetricsDecStealPortSubscriptionMany, MetricsIncStealConnectionSubscription, - MetricsIncStealPortSubscription, + incoming_traffic::{ + MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscription, + MetricsDecStealPortSubscriptionMany, MetricsIncStealConnectionSubscription, + MetricsIncStealPortSubscription, + }, + MetricsActor, }, steal::{ connections::{ From 08b0acadbd02e9547422bfd2fb87fec464bb00bd Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 18:41:42 -0300 Subject: [PATCH 11/85] bump protocol --- Cargo.lock | 2 +- mirrord/protocol/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 207675b71f3..c0ec2159964 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4566,7 +4566,7 @@ dependencies = [ [[package]] name = "mirrord-protocol" -version = "1.13.0" +version = "1.13.1" dependencies = [ "actix-codec", "bincode", diff --git a/mirrord/protocol/Cargo.toml b/mirrord/protocol/Cargo.toml index 91f44f8bde5..fda199ceac2 100644 --- a/mirrord/protocol/Cargo.toml +++ b/mirrord/protocol/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mirrord-protocol" -version = "1.13.0" +version = "1.13.1" authors.workspace = true description.workspace = true documentation.workspace = true From 03b01c053a59d7c27c2f5ba28e5aa51e6693881a Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 18:42:35 -0300 Subject: [PATCH 12/85] fix axum-server version --- mirrord/agent/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/Cargo.toml b/mirrord/agent/Cargo.toml index e20d7991d82..2acca3d0ac9 100644 --- a/mirrord/agent/Cargo.toml +++ b/mirrord/agent/Cargo.toml @@ -73,7 +73,7 @@ socket2.workspace = true prometheus = { version = "0.13", features = ["process"] } kameo = { git = "https://github.com/tqwewe/kameo", branch = "main" } axum = { version = "0.7", features = ["macros"] } -axum-server = "*" +axum-server = "0.7" [target.'cfg(target_os = "linux")'.dependencies] iptables = { git = "https://github.com/metalbear-co/rust-iptables.git", rev = "e66c7332e361df3c61a194f08eefe3f40763d624" } From 9c232158d1b19b5ac9c008f977b3cd79c5389f07 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 19 Dec 2024 18:44:51 -0300 Subject: [PATCH 13/85] info -> trace --- mirrord/agent/src/metrics/file_ops.rs | 4 ++-- mirrord/agent/src/metrics/incoming_traffic.rs | 18 +++++++++--------- mirrord/agent/src/metrics/outgoing_traffic.rs | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/mirrord/agent/src/metrics/file_ops.rs b/mirrord/agent/src/metrics/file_ops.rs index ed040442604..7effd1cd08f 100644 --- a/mirrord/agent/src/metrics/file_ops.rs +++ b/mirrord/agent/src/metrics/file_ops.rs @@ -9,7 +9,7 @@ pub(crate) struct MetricsDecFd; impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncFd, @@ -22,7 +22,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecFd, diff --git a/mirrord/agent/src/metrics/incoming_traffic.rs b/mirrord/agent/src/metrics/incoming_traffic.rs index e8de28d249a..53d90696fcc 100644 --- a/mirrord/agent/src/metrics/incoming_traffic.rs +++ b/mirrord/agent/src/metrics/incoming_traffic.rs @@ -26,7 +26,7 @@ pub(crate) struct MetricsDecStealConnectionSubscription; impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncMirrorPortSubscription, @@ -39,7 +39,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecMirrorPortSubscription, @@ -52,7 +52,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, MetricsIncStealPortSubscription { filtered }: MetricsIncStealPortSubscription, @@ -69,7 +69,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, MetricsDecStealPortSubscription { filtered }: MetricsDecStealPortSubscription, @@ -90,7 +90,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, MetricsDecStealPortSubscriptionMany { @@ -115,7 +115,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncStealConnectionSubscription, @@ -128,7 +128,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecStealConnectionSubscription, @@ -142,7 +142,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncMirrorConnectionSubscription, @@ -155,7 +155,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecMirrorConnectionSubscription, diff --git a/mirrord/agent/src/metrics/outgoing_traffic.rs b/mirrord/agent/src/metrics/outgoing_traffic.rs index d9d21a591ca..3b3c1ff4f1e 100644 --- a/mirrord/agent/src/metrics/outgoing_traffic.rs +++ b/mirrord/agent/src/metrics/outgoing_traffic.rs @@ -12,7 +12,7 @@ pub(crate) struct MetricsDecUdpOutgoingConnection; impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncTcpOutgoingConnection, @@ -25,7 +25,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecTcpOutgoingConnection, @@ -38,7 +38,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsIncUdpOutgoingConnection, @@ -51,7 +51,7 @@ impl Message for MetricsActor { impl Message for MetricsActor { type Reply = (); - #[tracing::instrument(level = Level::INFO, skip_all)] + #[tracing::instrument(level = Level::TRACE, skip_all)] async fn handle( &mut self, _: MetricsDecUdpOutgoingConnection, From f82a2fd841b78f17c3e49febc2412ed09adcfe81 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 11:39:09 -0300 Subject: [PATCH 14/85] just have metrics atomics everywhere --- mirrord/agent/src/file.rs | 35 +++---------- mirrord/agent/src/metrics.rs | 71 ++++++++++++++++++++++++++- mirrord/agent/src/outgoing.rs | 41 +++------------- mirrord/agent/src/outgoing/udp.rs | 25 +++------- mirrord/agent/src/sniffer/api.rs | 26 ++-------- mirrord/agent/src/steal/connection.rs | 56 +++++++++------------ 6 files changed, 118 insertions(+), 136 deletions(-) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index f2ffdc37d68..c8238f70f7d 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -17,10 +17,7 @@ use tracing::{error, trace, Level}; use crate::{ error::Result, - metrics::{ - file_ops::{MetricsDecFd, MetricsIncFd}, - MetricsActor, - }, + metrics::{MetricsActor, OPEN_FD_COUNT}, }; #[derive(Debug)] @@ -297,11 +294,7 @@ impl FileManager { }; if self.open_files.insert(fd, remote_file).is_none() { - let _ = self - .metrics - .tell(MetricsIncFd) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + OPEN_FD_COUNT.inc(); } Ok(OpenFileResponse { fd }) @@ -337,11 +330,7 @@ impl FileManager { }; if self.open_files.insert(fd, remote_file).is_none() { - let _ = self - .metrics - .tell(MetricsIncFd) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + OPEN_FD_COUNT.inc(); } Ok(OpenFileResponse { fd }) @@ -593,11 +582,7 @@ impl FileManager { if self.open_files.remove(&fd).is_none() { error!(fd, "fd not found!"); } else { - let _ = self - .metrics - .tell(MetricsDecFd) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + OPEN_FD_COUNT.dec(); } None @@ -610,11 +595,7 @@ impl FileManager { if self.dir_streams.remove(&fd).is_none() && self.getdents_streams.remove(&fd).is_none() { error!("FileManager::close_dir -> fd {:#?} not found", fd); } else { - let _ = self - .metrics - .tell(MetricsDecFd) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + OPEN_FD_COUNT.dec(); } None @@ -740,11 +721,7 @@ impl FileManager { let dir_stream = path.read_dir()?.enumerate(); if self.dir_streams.insert(fd, dir_stream).is_none() { - let _ = self - .metrics - .tell(MetricsIncFd) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + OPEN_FD_COUNT.dec(); } Ok(OpenDirResponse { fd }) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index babba1aa245..54f9fb23ce5 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -1,3 +1,5 @@ +use std::sync::LazyLock; + use axum::{response::IntoResponse, routing::get, Extension, Router}; use kameo::{ actor::ActorRef, @@ -6,7 +8,10 @@ use kameo::{ message::{Context, Message}, Actor, Reply, }; -use prometheus::core::{AtomicI64, GenericGauge}; +use prometheus::{ + core::{AtomicI64, GenericGauge}, + register_int_gauge, IntGauge, +}; use serde::Serialize; use thiserror::Error; use tokio::net::TcpListener; @@ -18,6 +23,70 @@ pub(crate) mod file_ops; pub(crate) mod incoming_traffic; pub(crate) mod outgoing_traffic; +pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_open_fd_count", + "amount of open fds in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static MIRROR_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_mirror_port_subscription_count", + "amount of mirror port subscriptions in mirror-agent" + ) + .expect("Valid at initialization") +}); + +pub(crate) static MIRROR_CONNECTION_SUBSCRIPTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_mirror_connection_subscription_count", + "amount of connections in steal mode in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static STEAL_FILTERED_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_steal_filtered_port_subscription_count", + "amount of filtered steal port subscriptions in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static STEAL_UNFILTERED_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_steal_unfiltered_port_subscription_count", + "amount of unfiltered steal port subscriptions in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static STEAL_CONNECTION_SUBSCRIPTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_steal_connection_subscription_count", + "amount of connections in steal mode in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static TCP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_tcp_outgoing_connection_count", + "amount of tcp outgoing connections in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + +pub(crate) static UDP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_udp_outgoing_connection_count", + "amount of udp outgoing connections in mirrord-agent" + ) + .expect("Valid at initialization!") +}); + #[derive(Error, Debug)] pub(crate) enum MetricsError { #[error(transparent)] diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index e9f5e071182..887ce9c24a5 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -22,7 +22,7 @@ use crate::{ error::Result, metrics::{ outgoing_traffic::{MetricsDecTcpOutgoingConnection, MetricsIncTcpOutgoingConnection}, - MetricsActor, + MetricsActor, TCP_OUTGOING_CONNECTION, }, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, @@ -228,11 +228,7 @@ impl TcpOutgoingTask { let daemon_message = DaemonTcpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; - let _ = self - .metrics - .tell(MetricsDecTcpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + TCP_OUTGOING_CONNECTION.dec(); } // EOF occurred in one of peer connections. @@ -264,11 +260,7 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - let _ = self - .metrics - .tell(MetricsDecTcpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + TCP_OUTGOING_CONNECTION.dec(); } } } @@ -324,11 +316,7 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Connect(daemon_connect)) .await?; - let _ = self - .metrics - .tell(MetricsIncTcpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + TCP_OUTGOING_CONNECTION.inc(); Ok(()) } @@ -375,13 +363,7 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - let _ = self - .metrics - .tell(MetricsDecTcpOutgoingConnection) - .await - .inspect_err( - |fail| tracing::warn!(%fail, "agent metrics failure!"), - ); + TCP_OUTGOING_CONNECTION.dec(); Ok(()) } @@ -402,11 +384,7 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - let _ = self - .metrics - .tell(MetricsDecTcpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + TCP_OUTGOING_CONNECTION.dec(); Ok(()) } @@ -419,12 +397,7 @@ impl TcpOutgoingTask { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - let _ = self - .metrics - .tell(MetricsDecTcpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); - + TCP_OUTGOING_CONNECTION.dec(); Ok(()) } } diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index d60d98e895e..d089a932d71 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -26,7 +26,10 @@ use tracing::{debug, trace, warn}; use super::MetricsActor; use crate::{ error::Result, - metrics::outgoing_traffic::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, + metrics::{ + outgoing_traffic::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, + UDP_OUTGOING_CONNECTION, + }, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -170,10 +173,7 @@ impl UdpOutgoingApi { daemon_tx.send(daemon_message).await?; - let _ = metrics - .tell(MetricsIncUdpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + UDP_OUTGOING_CONNECTION.inc(); } // [user] -> [layer] -> [agent] -> [remote] // `user` wrote some message to the remote host. @@ -200,10 +200,7 @@ impl UdpOutgoingApi { let daemon_message = DaemonUdpOutgoing::Close(connection_id); daemon_tx.send(daemon_message).await?; - let _ = metrics - .tell(MetricsDecUdpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + UDP_OUTGOING_CONNECTION.dec(); } } // [layer] -> [agent] @@ -212,10 +209,7 @@ impl UdpOutgoingApi { writers.remove(connection_id); readers.remove(connection_id); - let _ = metrics - .tell(MetricsDecUdpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + UDP_OUTGOING_CONNECTION.dec(); } } } @@ -243,10 +237,7 @@ impl UdpOutgoingApi { let daemon_message = DaemonUdpOutgoing::Close(connection_id); daemon_tx.send(daemon_message).await?; - let _ = metrics - .tell(MetricsDecUdpOutgoingConnection) - .await - .inspect_err(|fail| tracing::warn!(%fail, "agent metrics failure!")); + UDP_OUTGOING_CONNECTION.dec(); } } } diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 1ea3bd32ea9..c7e30d78387 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -18,13 +18,7 @@ use tokio_stream::{ use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; use crate::{ error::AgentError, - metrics::{ - incoming_traffic::{ - MetricsDecMirrorConnectionSubscription, MetricsDecMirrorPortSubscription, - MetricsIncMirrorPortSubscription, - }, - MetricsActor, - }, + metrics::{MetricsActor, MIRROR_PORT_SUBSCRIPTION}, util::ClientId, watched_task::TaskStatus, }; @@ -186,11 +180,7 @@ impl TcpSnifferApi { .await?; self.subscriptions_in_progress.push(rx); - let _ = self - .metrics - .tell(MetricsIncMirrorPortSubscription) - .await - .inspect_err(|fail| tracing::trace!(?fail)); + MIRROR_PORT_SUBSCRIPTION.inc(); Ok(()) } @@ -199,11 +189,7 @@ impl TcpSnifferApi { self.send_command(SnifferCommandInner::UnsubscribePort(port)) .await?; - let _ = self - .metrics - .tell(MetricsDecMirrorPortSubscription) - .await - .inspect_err(|fail| tracing::trace!(?fail)); + MIRROR_PORT_SUBSCRIPTION.dec(); Ok(()) } @@ -211,11 +197,7 @@ impl TcpSnifferApi { LayerTcp::ConnectionUnsubscribe(connection_id) => { self.connections.remove(&connection_id); - let _ = self - .metrics - .tell(MetricsDecMirrorConnectionSubscription) - .await - .inspect_err(|fail| tracing::trace!(?fail)); + MIRROR_PORT_SUBSCRIPTION.dec(); Ok(()) } diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index a75baaeaa04..f888cab5fad 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -36,11 +36,11 @@ use crate::{ error::{AgentError, Result}, metrics::{ incoming_traffic::{ - MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscription, - MetricsDecStealPortSubscriptionMany, MetricsIncStealConnectionSubscription, - MetricsIncStealPortSubscription, + MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscriptionMany, + MetricsIncStealConnectionSubscription, }, - MetricsActor, + MetricsActor, STEAL_CONNECTION_SUBSCRIPTION, STEAL_FILTERED_PORT_SUBSCRIPTION, + STEAL_UNFILTERED_PORT_SUBSCRIPTION, }, steal::{ connections::{ @@ -371,11 +371,7 @@ impl TcpConnectionStealer { Ok((stream, peer)) => { self.incoming_connection(stream, peer).await?; - let _ = self - .metrics - .tell(MetricsIncStealConnectionSubscription) - .await - .inspect_err(|fail| trace!(?fail)); + STEAL_CONNECTION_SUBSCRIPTION.inc(); } Err(error) => { tracing::error!(?error, "Failed to accept a stolen connection"); @@ -600,13 +596,13 @@ impl TcpConnectionStealer { async fn close_client(&mut self, client_id: ClientId) -> Result<(), AgentError> { let removed_subscriptions = self.port_subscriptions.remove_all(client_id).await?; - let _ = self - .metrics - .tell(MetricsDecStealPortSubscriptionMany { - removed_subscriptions, - }) - .await - .inspect_err(|fail| trace!(?fail)); + for filtered in removed_subscriptions { + if filtered { + STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); + } else { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); + } + } let client = self.clients.remove(&client_id).expect("client not found"); for connection in client.subscribed_connections { @@ -691,30 +687,24 @@ impl TcpConnectionStealer { ) .await; - let _ = self - .metrics - .tell(MetricsDecStealConnectionSubscription) - .await - .inspect_err(|fail| trace!(?fail)); + STEAL_CONNECTION_SUBSCRIPTION.dec(); } Command::PortSubscribe(port_steal) => { - let filtered = self.port_subscribe(client_id, port_steal).await?; - - let _ = self - .metrics - .tell(MetricsIncStealPortSubscription { filtered }) - .await - .inspect_err(|fail| trace!(?fail)); + if self.port_subscribe(client_id, port_steal).await? { + STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + } else { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + } } Command::PortUnsubscribe(port) => { if let Some(filtered) = self.port_subscriptions.remove(client_id, port).await? { - let _ = self - .metrics - .tell(MetricsDecStealPortSubscription { filtered }) - .await - .inspect_err(|fail| trace!(?fail)); + if filtered { + STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + } else { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + } } } From 8032c1cd47e5ba7819f0f9bb23f45acc801fcccd Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 11:56:46 -0300 Subject: [PATCH 15/85] drop actors --- mirrord/agent/src/entrypoint.rs | 51 ++-- mirrord/agent/src/file.rs | 10 +- mirrord/agent/src/metrics.rs | 217 ++---------------- mirrord/agent/src/metrics/file_ops.rs | 33 --- mirrord/agent/src/metrics/incoming_traffic.rs | 167 -------------- mirrord/agent/src/metrics/outgoing_traffic.rs | 62 ----- mirrord/agent/src/outgoing.rs | 13 +- mirrord/agent/src/outgoing/udp.rs | 16 +- mirrord/agent/src/sniffer.rs | 20 +- mirrord/agent/src/sniffer/api.rs | 10 +- mirrord/agent/src/steal/connection.rs | 17 +- 11 files changed, 57 insertions(+), 559 deletions(-) delete mode 100644 mirrord/agent/src/metrics/file_ops.rs delete mode 100644 mirrord/agent/src/metrics/incoming_traffic.rs delete mode 100644 mirrord/agent/src/metrics/outgoing_traffic.rs diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 8bbdaa0bd62..1f3fabec542 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -12,8 +12,7 @@ use std::{ use client_connection::AgentTlsConnector; use dns::{DnsCommand, DnsWorker}; use futures::TryFutureExt; -use kameo::actor::ActorRef; -use metrics::MetricsActor; +use metrics::start_metrics; use mirrord_protocol::{ClientMessage, DaemonMessage, GetEnvVarsRequest, LogMessage}; use sniffer::tcp_capture::RawSocketTcpCapture; use tokio::{ @@ -70,13 +69,11 @@ struct State { ephemeral: bool, /// When present, it is used to secure incoming TCP connections. tls_connector: Option, - - metrics: ActorRef, } impl State { /// Return [`Err`] if container runtime operations failed. - pub async fn new(args: &Args, metrics: ActorRef) -> Result { + pub async fn new(args: &Args) -> Result { let tls_connector = args .operator_tls_cert_pem .clone() @@ -130,7 +127,6 @@ impl State { env: Arc::new(env), ephemeral, tls_connector, - metrics, }) } @@ -220,20 +216,15 @@ impl ClientConnectionHandler { ) -> Result { let pid = state.container_pid(); - let file_manager = FileManager::new( - pid.or_else(|| state.ephemeral.then_some(1)), - state.metrics.clone(), - ); + let file_manager = FileManager::new(pid.or_else(|| state.ephemeral.then_some(1))); - let tcp_sniffer_api = - Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection, state.metrics.clone()) - .await; + let tcp_sniffer_api = Self::create_sniffer_api(id, bg_tasks.sniffer, &mut connection).await; let tcp_stealer_api = Self::create_stealer_api(id, bg_tasks.stealer, &mut connection).await?; let dns_api = Self::create_dns_api(bg_tasks.dns); - let tcp_outgoing_api = TcpOutgoingApi::new(pid, state.metrics.clone()); - let udp_outgoing_api = UdpOutgoingApi::new(pid, state.metrics.clone()); + let tcp_outgoing_api = TcpOutgoingApi::new(pid); + let udp_outgoing_api = UdpOutgoingApi::new(pid); let client_handler = Self { id, @@ -255,10 +246,9 @@ impl ClientConnectionHandler { id: ClientId, task: BackgroundTask, connection: &mut ClientConnection, - metrics: ActorRef, ) -> Option { if let BackgroundTask::Running(sniffer_status, sniffer_sender) = task { - match TcpSnifferApi::new(id, sniffer_sender, sniffer_status, metrics).await { + match TcpSnifferApi::new(id, sniffer_sender, sniffer_status).await { Ok(api) => Some(api), Err(e) => { let message = format!( @@ -503,7 +493,11 @@ impl ClientConnectionHandler { async fn start_agent(args: Args) -> Result<()> { trace!("start_agent -> Starting agent with args: {args:?}"); - let metrics = kameo::spawn(MetricsActor::new(true)); + tokio::spawn(async move { + start_metrics() + .await + .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) + }); let listener = TcpListener::bind(SocketAddrV4::new( Ipv4Addr::UNSPECIFIED, @@ -511,7 +505,7 @@ async fn start_agent(args: Args) -> Result<()> { )) .await?; - let state = State::new(&args, metrics).await?; + let state = State::new(&args).await?; let cancellation_token = CancellationToken::new(); @@ -579,15 +573,13 @@ async fn start_agent(args: Args) -> Result<()> { let cancellation_token = cancellation_token.clone(); let watched_task = WatchedTask::new( TcpConnectionStealer::TASK_NAME, - TcpConnectionStealer::new(stealer_command_rx, state.metrics.clone()).and_then( - |stealer| async move { - let res = stealer.start(cancellation_token).await; - if let Err(err) = res.as_ref() { - error!("Stealer failed: {err}"); - } - res - }, - ), + TcpConnectionStealer::new(stealer_command_rx).and_then(|stealer| async move { + let res = stealer.start(cancellation_token).await; + if let Err(err) = res.as_ref() { + error!("Stealer failed: {err}"); + } + res + }), ); let status = watched_task.status(); let task = run_thread_in_namespace( @@ -776,8 +768,7 @@ async fn run_child_agent() -> Result<()> { async fn start_iptable_guard(args: Args) -> Result<()> { debug!("start_iptable_guard -> Initializing iptable-guard."); - let metrics = kameo::spawn(MetricsActor::new(false)); - let state = State::new(&args, metrics).await?; + let state = State::new(&args).await?; let pid = state.container_pid(); std::env::set_var(IPTABLE_PREROUTING_ENV, IPTABLE_PREROUTING.as_str()); diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index c8238f70f7d..7f4c84fdee0 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -10,15 +10,11 @@ use std::{ }; use faccess::{AccessMode, PathExt}; -use kameo::actor::ActorRef; use libc::DT_DIR; use mirrord_protocol::{file::*, FileRequest, FileResponse, RemoteResult, ResponseError}; use tracing::{error, trace, Level}; -use crate::{ - error::Result, - metrics::{MetricsActor, OPEN_FD_COUNT}, -}; +use crate::{error::Result, metrics::OPEN_FD_COUNT}; #[derive(Debug)] pub enum RemoteFile { @@ -74,7 +70,6 @@ pub(crate) struct FileManager { dir_streams: HashMap>, getdents_streams: HashMap>, fds_iter: RangeInclusive, - metrics: ActorRef, } pub fn get_root_path_from_optional_pid(pid: Option) -> PathBuf { @@ -252,12 +247,11 @@ impl FileManager { } #[tracing::instrument(level = Level::TRACE)] - pub fn new(pid: Option, metrics: ActorRef) -> Self { + pub fn new(pid: Option) -> Self { let root_path = get_root_path_from_optional_pid(pid); trace!("Agent root path >> {root_path:?}"); Self { - metrics, root_path, open_files: Default::default(), dir_streams: Default::default(), diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 54f9fb23ce5..66b1610cee5 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -1,28 +1,13 @@ use std::sync::LazyLock; -use axum::{response::IntoResponse, routing::get, Extension, Router}; -use kameo::{ - actor::ActorRef, - error::BoxError, - mailbox::unbounded::UnboundedMailbox, - message::{Context, Message}, - Actor, Reply, -}; -use prometheus::{ - core::{AtomicI64, GenericGauge}, - register_int_gauge, IntGauge, -}; -use serde::Serialize; +use axum::{response::IntoResponse, routing::get, Router}; +use prometheus::{register_int_gauge, IntGauge}; use thiserror::Error; use tokio::net::TcpListener; use tracing::Level; use crate::error::AgentError; -pub(crate) mod file_ops; -pub(crate) mod incoming_traffic; -pub(crate) mod outgoing_traffic; - pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_open_fd_count", @@ -89,9 +74,6 @@ pub(crate) static UDP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| #[derive(Error, Debug)] pub(crate) enum MetricsError { - #[error(transparent)] - GetAll(#[from] kameo::error::SendError), - #[error(transparent)] FromUtf8(#[from] std::string::FromUtf8Error), @@ -107,54 +89,10 @@ impl IntoResponse for MetricsError { } } -#[tracing::instrument(level = Level::TRACE, skip(prometheus_metrics), ret, err)] -async fn get_metrics( - metrics: Extension>, - prometheus_metrics: Extension, -) -> Result { +#[tracing::instrument(level = Level::TRACE, ret, err)] +async fn get_metrics() -> Result { use prometheus::{Encoder, TextEncoder}; - let MetricsGetAllReply { - open_fd_count, - mirror_port_subscription_count, - mirror_connection_subscription_count, - steal_filtered_port_subscription_count, - steal_unfiltered_port_subscription_count, - steal_connection_subscription_count, - tcp_outgoing_connection_count, - udp_outgoing_connection_count, - } = metrics.ask(MetricsGetAll).await?; - - prometheus_metrics.open_fd_count.set(open_fd_count as i64); - - prometheus_metrics - .mirror_port_subscription_count - .set(mirror_port_subscription_count as i64); - - prometheus_metrics - .mirror_connection_subscription_count - .set(mirror_connection_subscription_count as i64); - - prometheus_metrics - .steal_filtered_port_subscription_count - .set(steal_filtered_port_subscription_count as i64); - - prometheus_metrics - .steal_unfiltered_port_subscription_count - .set(steal_unfiltered_port_subscription_count as i64); - - prometheus_metrics - .steal_connection_subscription_count - .set(steal_connection_subscription_count as i64); - - prometheus_metrics - .tcp_outgoing_connection_count - .set(tcp_outgoing_connection_count as i64); - - prometheus_metrics - .udp_outgoing_connection_count - .set(udp_outgoing_connection_count as i64); - let metric_families = prometheus::gather(); let mut buffer = Vec::new(); @@ -165,142 +103,21 @@ async fn get_metrics( Ok(String::from_utf8(buffer)?) } -#[derive(Clone)] -struct PrometheusMetrics { - open_fd_count: GenericGauge, - mirror_port_subscription_count: GenericGauge, - mirror_connection_subscription_count: GenericGauge, - steal_filtered_port_subscription_count: GenericGauge, - steal_unfiltered_port_subscription_count: GenericGauge, - steal_connection_subscription_count: GenericGauge, - tcp_outgoing_connection_count: GenericGauge, - udp_outgoing_connection_count: GenericGauge, -} - -impl PrometheusMetrics { - fn new() -> Result { - use prometheus::register_int_gauge; - - Ok(Self { - open_fd_count: register_int_gauge!( - "mirrord_agent_open_fd_count", - "amount of open fds in mirrord-agent" - )?, - mirror_port_subscription_count: register_int_gauge!( - "mirrord_agent_mirror_port_subscription_count", - "amount of mirror port subscriptions in mirror-agent" - )?, - mirror_connection_subscription_count: register_int_gauge!( - "mirrord_agent_mirror_connection_subscription_count", - "amount of connections in steal mode in mirrord-agent" - )?, - steal_filtered_port_subscription_count: register_int_gauge!( - "mirrord_agent_steal_filtered_port_subscription_count", - "amount of filtered steal port subscriptions in mirrord-agent" - )?, - steal_unfiltered_port_subscription_count: register_int_gauge!( - "mirrord_agent_steal_unfiltered_port_subscription_count", - "amount of unfiltered steal port subscriptions in mirrord-agent" - )?, - steal_connection_subscription_count: register_int_gauge!( - "mirrord_agent_steal_connection_subscription_count", - "amount of connections in steal mode in mirrord-agent" - )?, - tcp_outgoing_connection_count: register_int_gauge!( - "mirrord_agent_tcp_outgoing_connection_count", - "amount of tcp outgoing connections in mirrord-agent" - )?, - udp_outgoing_connection_count: register_int_gauge!( - "mirrord_agent_udp_outgoing_connection_count", - "amount of udp outgoing connections in mirrord-agent" - )?, - }) - } -} - -#[derive(Default)] -pub(crate) struct MetricsActor { - enabled: bool, - open_fd_count: u64, - mirror_port_subscription_count: u64, - mirror_connection_subscription_count: u64, - steal_filtered_port_subscription_count: u64, - steal_unfiltered_port_subscription_count: u64, - steal_connection_subscription_count: u64, - tcp_outgoing_connection_count: u64, - udp_outgoing_connection_count: u64, -} - -impl MetricsActor { - pub(crate) fn new(enabled: bool) -> Self { - Self { - enabled, - ..Default::default() - } - } -} - -impl Actor for MetricsActor { - type Mailbox = UnboundedMailbox; - - #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] - async fn on_start(&mut self, metrics: ActorRef) -> Result<(), BoxError> { - if self.enabled { - let prometheus_metrics = PrometheusMetrics::new()?; - - let app = Router::new() - .route("/metrics", get(get_metrics)) - .layer(Extension(metrics)) - .layer(Extension(prometheus_metrics)); +#[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] +pub(crate) async fn start_metrics() -> Result<(), axum::BoxError> { + let app = Router::new().route("/metrics", get(get_metrics)); - let listener = TcpListener::bind("0.0.0.0:9000") - .await - .map_err(AgentError::from) - .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; + let listener = TcpListener::bind("0.0.0.0:9000") + .await + .map_err(AgentError::from) + .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; - tokio::spawn(async move { - axum::serve(listener, app).await.inspect_err(|fail| { - tracing::error!(%fail, "Could not start agent metrics + tokio::spawn(async move { + axum::serve(listener, app).await.inspect_err(|fail| { + tracing::error!(%fail, "Could not start agent metrics server!") - }) - }); - } - - Ok(()) - } -} - -pub(crate) struct MetricsGetAll; - -#[derive(Reply, Serialize)] -pub(crate) struct MetricsGetAllReply { - open_fd_count: u64, - mirror_port_subscription_count: u64, - mirror_connection_subscription_count: u64, - steal_filtered_port_subscription_count: u64, - steal_unfiltered_port_subscription_count: u64, - steal_connection_subscription_count: u64, - tcp_outgoing_connection_count: u64, - udp_outgoing_connection_count: u64, -} -impl Message for MetricsActor { - type Reply = MetricsGetAllReply; + }) + }); - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsGetAll, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - MetricsGetAllReply { - open_fd_count: self.open_fd_count, - mirror_port_subscription_count: self.mirror_port_subscription_count, - mirror_connection_subscription_count: self.mirror_connection_subscription_count, - steal_filtered_port_subscription_count: self.steal_filtered_port_subscription_count, - steal_unfiltered_port_subscription_count: self.steal_unfiltered_port_subscription_count, - steal_connection_subscription_count: self.steal_connection_subscription_count, - tcp_outgoing_connection_count: self.tcp_outgoing_connection_count, - udp_outgoing_connection_count: self.udp_outgoing_connection_count, - } - } + Ok(()) } diff --git a/mirrord/agent/src/metrics/file_ops.rs b/mirrord/agent/src/metrics/file_ops.rs deleted file mode 100644 index 7effd1cd08f..00000000000 --- a/mirrord/agent/src/metrics/file_ops.rs +++ /dev/null @@ -1,33 +0,0 @@ -use kameo::message::{Context, Message}; -use tracing::Level; - -use crate::metrics::MetricsActor; - -pub(crate) struct MetricsIncFd; -pub(crate) struct MetricsDecFd; - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncFd, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.open_fd_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecFd, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.open_fd_count = self.open_fd_count.saturating_sub(1); - } -} diff --git a/mirrord/agent/src/metrics/incoming_traffic.rs b/mirrord/agent/src/metrics/incoming_traffic.rs deleted file mode 100644 index 53d90696fcc..00000000000 --- a/mirrord/agent/src/metrics/incoming_traffic.rs +++ /dev/null @@ -1,167 +0,0 @@ -use kameo::message::{Context, Message}; -use tracing::Level; - -use crate::metrics::MetricsActor; - -pub(crate) struct MetricsIncMirrorPortSubscription; -pub(crate) struct MetricsDecMirrorPortSubscription; - -pub(crate) struct MetricsIncMirrorConnectionSubscription; -pub(crate) struct MetricsDecMirrorConnectionSubscription; - -pub(crate) struct MetricsIncStealPortSubscription { - pub(crate) filtered: bool, -} -pub(crate) struct MetricsDecStealPortSubscription { - pub(crate) filtered: bool, -} - -pub(crate) struct MetricsDecStealPortSubscriptionMany { - pub(crate) removed_subscriptions: Vec, -} - -pub(crate) struct MetricsIncStealConnectionSubscription; -pub(crate) struct MetricsDecStealConnectionSubscription; - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncMirrorPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_port_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecMirrorPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_port_subscription_count = self.mirror_port_subscription_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - MetricsIncStealPortSubscription { filtered }: MetricsIncStealPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - if filtered { - self.steal_filtered_port_subscription_count += 1; - } else { - self.steal_unfiltered_port_subscription_count += 1; - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - MetricsDecStealPortSubscription { filtered }: MetricsDecStealPortSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - if filtered { - self.steal_filtered_port_subscription_count = self - .steal_filtered_port_subscription_count - .saturating_sub(1); - } else { - self.steal_unfiltered_port_subscription_count = self - .steal_unfiltered_port_subscription_count - .saturating_sub(1); - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - MetricsDecStealPortSubscriptionMany { - removed_subscriptions, - }: MetricsDecStealPortSubscriptionMany, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - for filtered in removed_subscriptions { - if filtered { - self.steal_filtered_port_subscription_count = self - .steal_filtered_port_subscription_count - .saturating_sub(1); - } else { - self.steal_unfiltered_port_subscription_count = self - .steal_unfiltered_port_subscription_count - .saturating_sub(1); - } - } - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncStealConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.steal_connection_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecStealConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.steal_connection_subscription_count = - self.steal_connection_subscription_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncMirrorConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_connection_subscription_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecMirrorConnectionSubscription, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.mirror_connection_subscription_count = - self.mirror_connection_subscription_count.saturating_sub(1); - } -} diff --git a/mirrord/agent/src/metrics/outgoing_traffic.rs b/mirrord/agent/src/metrics/outgoing_traffic.rs deleted file mode 100644 index 3b3c1ff4f1e..00000000000 --- a/mirrord/agent/src/metrics/outgoing_traffic.rs +++ /dev/null @@ -1,62 +0,0 @@ -use kameo::message::{Context, Message}; -use tracing::Level; - -use crate::metrics::MetricsActor; - -pub(crate) struct MetricsIncTcpOutgoingConnection; -pub(crate) struct MetricsDecTcpOutgoingConnection; - -pub(crate) struct MetricsIncUdpOutgoingConnection; -pub(crate) struct MetricsDecUdpOutgoingConnection; - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncTcpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.tcp_outgoing_connection_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecTcpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.tcp_outgoing_connection_count = self.tcp_outgoing_connection_count.saturating_sub(1); - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsIncUdpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.udp_outgoing_connection_count += 1; - } -} - -impl Message for MetricsActor { - type Reply = (); - - #[tracing::instrument(level = Level::TRACE, skip_all)] - async fn handle( - &mut self, - _: MetricsDecUdpOutgoingConnection, - _ctx: Context<'_, Self, Self::Reply>, - ) -> Self::Reply { - self.udp_outgoing_connection_count = self.udp_outgoing_connection_count.saturating_sub(1); - } -} diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 887ce9c24a5..eb92ec68916 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -1,7 +1,6 @@ use std::{collections::HashMap, fmt, thread, time::Duration}; use bytes::Bytes; -use kameo::actor::ActorRef; use mirrord_protocol::{ outgoing::{tcp::*, *}, ConnectionId, RemoteError, ResponseError, @@ -20,10 +19,7 @@ use tracing::Level; use crate::{ error::Result, - metrics::{ - outgoing_traffic::{MetricsDecTcpOutgoingConnection, MetricsIncTcpOutgoingConnection}, - MetricsActor, TCP_OUTGOING_CONNECTION, - }, + metrics::TCP_OUTGOING_CONNECTION, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -60,13 +56,13 @@ impl TcpOutgoingApi { /// /// * `pid` - process id of the agent's target container #[tracing::instrument(level = Level::TRACE)] - pub(crate) fn new(pid: Option, metrics: ActorRef) -> Self { + pub(crate) fn new(pid: Option) -> Self { let (layer_tx, layer_rx) = mpsc::channel(1000); let (daemon_tx, daemon_rx) = mpsc::channel(1000); let watched_task = WatchedTask::new( Self::TASK_NAME, - TcpOutgoingTask::new(pid, layer_rx, daemon_tx, metrics).run(), + TcpOutgoingTask::new(pid, layer_rx, daemon_tx).run(), ); let task_status = watched_task.status(); let task = run_thread_in_namespace( @@ -115,7 +111,6 @@ struct TcpOutgoingTask { pid: Option, layer_rx: Receiver, daemon_tx: Sender, - metrics: ActorRef, } impl fmt::Debug for TcpOutgoingTask { @@ -144,7 +139,6 @@ impl TcpOutgoingTask { pid: Option, layer_rx: Receiver, daemon_tx: Sender, - metrics: ActorRef, ) -> Self { Self { next_connection_id: 0, @@ -153,7 +147,6 @@ impl TcpOutgoingTask { pid, layer_rx, daemon_tx, - metrics, } } diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index d089a932d71..0a30fcbd5fa 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -9,7 +9,6 @@ use futures::{ prelude::*, stream::{SplitSink, SplitStream}, }; -use kameo::actor::ActorRef; use mirrord_protocol::{ outgoing::{udp::*, *}, ConnectionId, ResponseError, @@ -23,13 +22,9 @@ use tokio::{ use tokio_util::{codec::BytesCodec, udp::UdpFramed}; use tracing::{debug, trace, warn}; -use super::MetricsActor; use crate::{ error::Result, - metrics::{ - outgoing_traffic::{MetricsDecUdpOutgoingConnection, MetricsIncUdpOutgoingConnection}, - UDP_OUTGOING_CONNECTION, - }, + metrics::UDP_OUTGOING_CONNECTION, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -77,14 +72,12 @@ async fn connect(remote_address: SocketAddr) -> Result impl UdpOutgoingApi { const TASK_NAME: &'static str = "UdpOutgoing"; - pub(crate) fn new(pid: Option, metrics: ActorRef) -> Self { + pub(crate) fn new(pid: Option) -> Self { let (layer_tx, layer_rx) = mpsc::channel(1000); let (daemon_tx, daemon_rx) = mpsc::channel(1000); - let watched_task = WatchedTask::new( - Self::TASK_NAME, - Self::interceptor_task(layer_rx, daemon_tx, metrics), - ); + let watched_task = + WatchedTask::new(Self::TASK_NAME, Self::interceptor_task(layer_rx, daemon_tx)); let task_status = watched_task.status(); let task = run_thread_in_namespace( @@ -109,7 +102,6 @@ impl UdpOutgoingApi { async fn interceptor_task( mut layer_rx: Receiver, daemon_tx: Sender, - metrics: ActorRef, ) -> Result<()> { let mut connection_ids = 0..=ConnectionId::MAX; diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index ffb5f899ba0..cbf1a4d578a 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -26,6 +26,7 @@ use self::{ use crate::{ error::AgentError, http::HttpVersion, + metrics::MIRROR_CONNECTION_SUBSCRIPTION, util::{ChannelClosedFuture, ClientId, Subscriptions}, }; @@ -276,6 +277,7 @@ where } => { if self.port_subscriptions.subscribe(client_id, port) { self.update_packet_filter()?; + MIRROR_CONNECTION_SUBSCRIPTION.inc(); } let _ = tx.send(port); @@ -434,10 +436,7 @@ mod test { use tokio::sync::mpsc; use super::*; - use crate::{ - metrics::MetricsActor, - watched_task::{TaskStatus, WatchedTask}, - }; + use crate::watched_task::{TaskStatus, WatchedTask}; struct TestSnifferSetup { command_tx: Sender, @@ -452,16 +451,9 @@ mod test { let client_id = self.next_client_id; self.next_client_id += 1; - let metrics = kameo::spawn(MetricsActor::new(false)); - - TcpSnifferApi::new( - client_id, - self.command_tx.clone(), - self.task_status.clone(), - metrics, - ) - .await - .unwrap() + TcpSnifferApi::new(client_id, self.command_tx.clone(), self.task_status.clone()) + .await + .unwrap() } fn times_filter_changed(&self) -> usize { diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index c7e30d78387..86951e3aec3 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -1,7 +1,6 @@ use std::ops::RangeInclusive; use futures::{stream::FuturesUnordered, StreamExt}; -use kameo::actor::ActorRef; use mirrord_protocol::{ tcp::{DaemonTcp, LayerTcp, NewTcpConnection, TcpClose, TcpData}, ConnectionId, LogMessage, Port, @@ -17,10 +16,7 @@ use tokio_stream::{ use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; use crate::{ - error::AgentError, - metrics::{MetricsActor, MIRROR_PORT_SUBSCRIPTION}, - util::ClientId, - watched_task::TaskStatus, + error::AgentError, metrics::MIRROR_PORT_SUBSCRIPTION, util::ClientId, watched_task::TaskStatus, }; /// Interface used by clients to interact with the @@ -44,7 +40,6 @@ pub(crate) struct TcpSnifferApi { connection_ids_iter: RangeInclusive, /// [`LayerTcp::PortSubscribe`] requests in progress. subscriptions_in_progress: FuturesUnordered>, - metrics: ActorRef, } impl TcpSnifferApi { @@ -60,12 +55,10 @@ impl TcpSnifferApi { /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer) /// * `task_status` - handle to the [`TcpConnectionSniffer`](super::TcpConnectionSniffer) exit /// status - /// * `metrics` - used to send agent metrics messages to our metrics actor; pub async fn new( client_id: ClientId, sniffer_sender: Sender, mut task_status: TaskStatus, - metrics: ActorRef, ) -> Result { let (sender, receiver) = mpsc::channel(Self::CONNECTION_CHANNEL_SIZE); @@ -85,7 +78,6 @@ impl TcpSnifferApi { connections: Default::default(), connection_ids_iter: (0..=ConnectionId::MAX), subscriptions_in_progress: Default::default(), - metrics, }) } diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index f888cab5fad..3657c55ebd8 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -11,7 +11,6 @@ use hyper::{ body::Incoming, http::{header::UPGRADE, request::Parts}, }; -use kameo::actor::ActorRef; use mirrord_protocol::{ body_chunks::{BodyExt as _, Frames}, tcp::{ @@ -30,16 +29,12 @@ use tokio::{ sync::mpsc::{Receiver, Sender}, }; use tokio_util::sync::CancellationToken; -use tracing::{trace, warn, Level}; +use tracing::{warn, Level}; use crate::{ error::{AgentError, Result}, metrics::{ - incoming_traffic::{ - MetricsDecStealConnectionSubscription, MetricsDecStealPortSubscriptionMany, - MetricsIncStealConnectionSubscription, - }, - MetricsActor, STEAL_CONNECTION_SUBSCRIPTION, STEAL_FILTERED_PORT_SUBSCRIPTION, + STEAL_CONNECTION_SUBSCRIPTION, STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION, }, steal::{ @@ -300,8 +295,6 @@ pub(crate) struct TcpConnectionStealer { /// Set of active connections stolen by [`Self::port_subscriptions`]. connections: StolenConnections, - - metrics: ActorRef, } impl TcpConnectionStealer { @@ -310,10 +303,7 @@ impl TcpConnectionStealer { /// Initializes a new [`TcpConnectionStealer`], but doesn't start the actual work. /// You need to call [`TcpConnectionStealer::start`] to do so. #[tracing::instrument(level = Level::TRACE, err)] - pub(crate) async fn new( - command_rx: Receiver, - metrics: ActorRef, - ) -> Result { + pub(crate) async fn new(command_rx: Receiver) -> Result { let config = envy::prefixed("MIRRORD_AGENT_") .from_env::() .unwrap_or_default(); @@ -331,7 +321,6 @@ impl TcpConnectionStealer { clients: HashMap::with_capacity(8), clients_closed: Default::default(), connections: StolenConnections::with_capacity(8), - metrics, }) } From be6b181cf3454e78c2f940f1102e10b092807aaa Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 12:43:41 -0300 Subject: [PATCH 16/85] gate metrics behind config --- Cargo.lock | 30 ----------- mirrord/agent/Cargo.toml | 1 - mirrord/agent/README.md | 16 +++++- mirrord/agent/src/cli.rs | 8 ++- mirrord/agent/src/container_handle.rs | 4 +- mirrord/agent/src/dns.rs | 10 ++-- mirrord/agent/src/entrypoint.rs | 37 +++++++------- mirrord/agent/src/env.rs | 4 +- mirrord/agent/src/error.rs | 5 +- mirrord/agent/src/file.rs | 6 +-- mirrord/agent/src/main.rs | 3 +- mirrord/agent/src/metrics.rs | 6 +-- mirrord/agent/src/outgoing.rs | 12 ++--- mirrord/agent/src/outgoing/udp.rs | 10 ++-- mirrord/agent/src/steal/api.rs | 22 ++++---- mirrord/agent/src/steal/connection.rs | 32 +++++++----- mirrord/agent/src/steal/ip_tables.rs | 50 +++++++++---------- mirrord/agent/src/steal/ip_tables/chain.rs | 10 ++-- .../src/steal/ip_tables/flush_connections.rs | 14 +++--- mirrord/agent/src/steal/ip_tables/mesh.rs | 20 ++++---- .../agent/src/steal/ip_tables/mesh/istio.rs | 14 +++--- mirrord/agent/src/steal/ip_tables/output.rs | 14 +++--- .../agent/src/steal/ip_tables/prerouting.rs | 14 +++--- mirrord/agent/src/steal/ip_tables/redirect.rs | 10 ++-- mirrord/agent/src/steal/ip_tables/standard.rs | 14 +++--- mirrord/agent/src/vpn.rs | 16 +++--- mirrord/config/src/agent.rs | 21 +++++++- mirrord/kube/src/api/container/job.rs | 5 -- mirrord/kube/src/api/container/pod.rs | 4 -- mirrord/kube/src/api/container/util.rs | 7 ++- mirrord/protocol/src/lib.rs | 1 + 31 files changed, 220 insertions(+), 200 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0ec2159964..3ae46051709 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3734,34 +3734,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "kameo" -version = "0.13.0" -source = "git+https://github.com/tqwewe/kameo?branch=main#fcd9987669d7530ec5853be8f05932b2d78c901d" -dependencies = [ - "dyn-clone", - "futures", - "itertools 0.13.0", - "kameo_macros", - "once_cell", - "serde", - "tokio", - "tokio-stream", - "tracing", -] - -[[package]] -name = "kameo_macros" -version = "0.13.0" -source = "git+https://github.com/tqwewe/kameo?branch=main#fcd9987669d7530ec5853be8f05932b2d78c901d" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.90", - "uuid", -] - [[package]] name = "konst" version = "0.3.15" @@ -4262,7 +4234,6 @@ dependencies = [ "hyper-util", "iptables", "k8s-cri", - "kameo", "libc", "mirrord-protocol", "mockall", @@ -7254,7 +7225,6 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "tracing", "windows-sys 0.52.0", ] diff --git a/mirrord/agent/Cargo.toml b/mirrord/agent/Cargo.toml index 2acca3d0ac9..e4757fccafe 100644 --- a/mirrord/agent/Cargo.toml +++ b/mirrord/agent/Cargo.toml @@ -71,7 +71,6 @@ rustls.workspace = true envy = "0.4" socket2.workspace = true prometheus = { version = "0.13", features = ["process"] } -kameo = { git = "https://github.com/tqwewe/kameo", branch = "main" } axum = { version = "0.7", features = ["macros"] } axum-server = "0.7" diff --git a/mirrord/agent/README.md b/mirrord/agent/README.md index 8b5fa759232..5cd0f1542e7 100644 --- a/mirrord/agent/README.md +++ b/mirrord/agent/README.md @@ -9,7 +9,21 @@ mirrord-agent is distributed as a container image (currently only x86) that is p ## Enabling prometheus metrics -TODO(alex) [mid]: Talk how to enable it from env whatever. +To start the metrics server, you'll need to add this config to your `mirrord.json`: + +```json +{ + "agent": { + "metrics": "0.0.0.0:9000", + "annotations": { + "prometheus.io/scrape": "true", + "prometheus.io/port": "9000" + } +} +``` + +Remember to change the `port` in both `metrics` and `annotations`, they have to match, +otherwise prometheus will try to scrape on `port: 80` or other commonly used ports. ### Installing prometheus diff --git a/mirrord/agent/src/cli.rs b/mirrord/agent/src/cli.rs index 6c5b11e65a2..a6b3feba535 100644 --- a/mirrord/agent/src/cli.rs +++ b/mirrord/agent/src/cli.rs @@ -1,7 +1,9 @@ #![deny(missing_docs)] use clap::{Parser, Subcommand}; -use mirrord_protocol::{MeshVendor, AGENT_NETWORK_INTERFACE_ENV, AGENT_OPERATOR_CERT_ENV}; +use mirrord_protocol::{ + MeshVendor, AGENT_METRICS_ENV, AGENT_NETWORK_INTERFACE_ENV, AGENT_OPERATOR_CERT_ENV, +}; const DEFAULT_RUNTIME: &str = "containerd"; @@ -26,6 +28,10 @@ pub struct Args { #[arg(short = 'i', long, env = AGENT_NETWORK_INTERFACE_ENV)] pub network_interface: Option, + /// Controls whether metrics are enabled, and the address to set up the metrics server. + #[arg(long, env = AGENT_METRICS_ENV)] + pub metrics: Option, + /// Return an error after accepting the first client connection, in order to test agent error /// cleanup. /// diff --git a/mirrord/agent/src/container_handle.rs b/mirrord/agent/src/container_handle.rs index 6e8ba78173d..dd6755e766d 100644 --- a/mirrord/agent/src/container_handle.rs +++ b/mirrord/agent/src/container_handle.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, sync::Arc}; use crate::{ - error::Result, + error::AgentResult, runtime::{Container, ContainerInfo, ContainerRuntime}, }; @@ -22,7 +22,7 @@ pub(crate) struct ContainerHandle(Arc); impl ContainerHandle { /// Retrieve info about the container and initialize this struct. #[tracing::instrument(level = "trace")] - pub(crate) async fn new(container: Container) -> Result { + pub(crate) async fn new(container: Container) -> AgentResult { let ContainerInfo { pid, env: raw_env } = container.get_info().await?; let inner = Inner { pid, raw_env }; diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index 0ad44c76934..4a374eab07e 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -17,7 +17,7 @@ use tokio_util::sync::CancellationToken; use tracing::Level; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, watched_task::TaskStatus, }; @@ -86,7 +86,7 @@ impl DnsWorker { // Prepares the `Resolver` after reading some `/etc` DNS files. // // We care about logging these errors, at an `error!` level. - let resolver: Result<_, ResponseError> = try { + let resolver: AgentResult<_, ResponseError> = try { let resolv_conf_path = etc_path.join("resolv.conf"); let hosts_path = etc_path.join("hosts"); @@ -139,7 +139,7 @@ impl DnsWorker { pub(crate) async fn run( mut self, cancellation_token: CancellationToken, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { loop { tokio::select! { _ = cancellation_token.cancelled() => break Ok(()), @@ -175,7 +175,7 @@ impl DnsApi { pub(crate) async fn make_request( &mut self, request: GetAddrInfoRequest, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { let (response_tx, response_rx) = oneshot::channel(); let command = DnsCommand { @@ -194,7 +194,7 @@ impl DnsApi { /// Returns the result of the oldest outstanding DNS request issued with this struct (see /// [`Self::make_request`]). #[tracing::instrument(level = Level::TRACE, skip(self), ret, err)] - pub(crate) async fn recv(&mut self) -> Result { + pub(crate) async fn recv(&mut self) -> AgentResult { let Some(response) = self.responses.next().await else { return future::pending().await; }; diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 1f3fabec542..c72409e5f4e 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -33,7 +33,7 @@ use crate::{ client_connection::ClientConnection, container_handle::ContainerHandle, dns::DnsApi, - error::{AgentError, Result}, + error::{AgentError, AgentResult}, file::FileManager, outgoing::{TcpOutgoingApi, UdpOutgoingApi}, runtime::get_container, @@ -73,7 +73,7 @@ struct State { impl State { /// Return [`Err`] if container runtime operations failed. - pub async fn new(args: &Args) -> Result { + pub async fn new(args: &Args) -> AgentResult { let tls_connector = args .operator_tls_cert_pem .clone() @@ -213,7 +213,7 @@ impl ClientConnectionHandler { mut connection: ClientConnection, bg_tasks: BackgroundTasks, state: State, - ) -> Result { + ) -> AgentResult { let pid = state.container_pid(); let file_manager = FileManager::new(pid.or_else(|| state.ephemeral.then_some(1))); @@ -274,7 +274,7 @@ impl ClientConnectionHandler { id: ClientId, task: BackgroundTask, connection: &mut ClientConnection, - ) -> Result> { + ) -> AgentResult> { if let BackgroundTask::Running(stealer_status, stealer_sender) = task { match TcpStealerApi::new( id, @@ -314,7 +314,7 @@ impl ClientConnectionHandler { /// /// Breaks upon receiver/sender drop. #[tracing::instrument(level = "trace", skip(self))] - async fn start(mut self, cancellation_token: CancellationToken) -> Result<()> { + async fn start(mut self, cancellation_token: CancellationToken) -> AgentResult<()> { let error = loop { select! { message = self.connection.receive() => { @@ -390,7 +390,7 @@ impl ClientConnectionHandler { /// Sends a [`DaemonMessage`] response to the connected client (`mirrord-layer`). #[tracing::instrument(level = "trace", skip(self))] - async fn respond(&mut self, response: DaemonMessage) -> Result<()> { + async fn respond(&mut self, response: DaemonMessage) -> AgentResult<()> { self.connection.send(response).await.map_err(Into::into) } @@ -398,7 +398,7 @@ impl ClientConnectionHandler { /// /// Returns `false` if the client disconnected. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - async fn handle_client_message(&mut self, message: ClientMessage) -> Result { + async fn handle_client_message(&mut self, message: ClientMessage) -> AgentResult { match message { ClientMessage::FileRequest(req) => { if let Some(response) = self.file_manager.handle_message(req).await? { @@ -490,14 +490,17 @@ impl ClientConnectionHandler { /// Initializes the agent's [`State`], channels, threads, and runs [`ClientConnectionHandler`]s. #[tracing::instrument(level = Level::TRACE, ret, err)] -async fn start_agent(args: Args) -> Result<()> { +async fn start_agent(args: Args) -> AgentResult<()> { trace!("start_agent -> Starting agent with args: {args:?}"); - tokio::spawn(async move { - start_metrics() - .await - .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) - }); + if let Some(metrics_address) = args.metrics.as_ref() { + let address = metrics_address.parse()?; + tokio::spawn(async move { + start_metrics(address) + .await + .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) + }); + } let listener = TcpListener::bind(SocketAddrV4::new( Ipv4Addr::UNSPECIFIED, @@ -730,7 +733,7 @@ async fn start_agent(args: Args) -> Result<()> { Ok(()) } -async fn clear_iptable_chain() -> Result<()> { +async fn clear_iptable_chain() -> AgentResult<()> { let ipt = new_iptables(); SafeIpTables::load(IPTablesWrapper::from(ipt), false) @@ -741,7 +744,7 @@ async fn clear_iptable_chain() -> Result<()> { Ok(()) } -async fn run_child_agent() -> Result<()> { +async fn run_child_agent() -> AgentResult<()> { let command_args = std::env::args().collect::>(); let (command, args) = command_args .split_first() @@ -765,7 +768,7 @@ async fn run_child_agent() -> Result<()> { /// /// Captures SIGTERM signals sent by Kubernetes when the pod is gracefully deleted. /// When a signal is captured, the child process is killed and the iptables are cleaned. -async fn start_iptable_guard(args: Args) -> Result<()> { +async fn start_iptable_guard(args: Args) -> AgentResult<()> { debug!("start_iptable_guard -> Initializing iptable-guard."); let state = State::new(&args).await?; @@ -813,7 +816,7 @@ async fn start_iptable_guard(args: Args) -> Result<()> { /// 1. If you try to `bind` a socket to some address before [`start_agent`], it'll actually /// be bound **twice**, which incurs an error (address already in use). You could get around /// this by `bind`ing on `0.0.0.0:0`, but this is most likely **not** what you want. -pub async fn main() -> Result<()> { +pub async fn main() -> AgentResult<()> { rustls::crypto::CryptoProvider::install_default(rustls::crypto::aws_lc_rs::default_provider()) .expect("Failed to install crypto provider"); diff --git a/mirrord/agent/src/env.rs b/mirrord/agent/src/env.rs index 26fa4681431..5a349709f2d 100644 --- a/mirrord/agent/src/env.rs +++ b/mirrord/agent/src/env.rs @@ -7,7 +7,7 @@ use mirrord_protocol::RemoteResult; use tokio::io::AsyncReadExt; use wildmatch::WildMatch; -use crate::error::Result; +use crate::error::AgentResult; struct EnvFilter { include: Vec, @@ -97,7 +97,7 @@ pub(crate) fn parse_raw_env<'a, S: AsRef + 'a + ?Sized, T: IntoIterator>() } -pub(crate) async fn get_proc_environ(path: PathBuf) -> Result> { +pub(crate) async fn get_proc_environ(path: PathBuf) -> AgentResult> { let mut environ_file = tokio::fs::File::open(path).await?; let mut raw_env_vars = String::with_capacity(8192); diff --git a/mirrord/agent/src/error.rs b/mirrord/agent/src/error.rs index ad04e49c8c5..c0c05fcfdf4 100644 --- a/mirrord/agent/src/error.rs +++ b/mirrord/agent/src/error.rs @@ -84,6 +84,9 @@ pub(crate) enum AgentError { /// Temporary error for vpn feature #[error("Generic error in vpn: {0}")] VpnError(String), + + #[error(transparent)] + AddrParse(#[from] std::net::AddrParseError), } impl From> for AgentError { @@ -92,4 +95,4 @@ impl From> for AgentError { } } -pub(crate) type Result = std::result::Result; +pub(crate) type AgentResult = std::result::Result; diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 7f4c84fdee0..4ad1aa89266 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -14,7 +14,7 @@ use libc::DT_DIR; use mirrord_protocol::{file::*, FileRequest, FileResponse, RemoteResult, ResponseError}; use tracing::{error, trace, Level}; -use crate::{error::Result, metrics::OPEN_FD_COUNT}; +use crate::{error::AgentResult, metrics::OPEN_FD_COUNT}; #[derive(Debug)] pub enum RemoteFile { @@ -139,7 +139,7 @@ impl FileManager { pub(crate) async fn handle_message( &mut self, request: FileRequest, - ) -> Result> { + ) -> AgentResult> { Ok(match request { FileRequest::Open(OpenFileRequest { path, open_options }) => { // TODO: maybe not agent error on this? @@ -857,7 +857,7 @@ impl FileManager { // buffer (and there was no error converting to a // `DirEntryInternal`. while let Some(entry) = entry_results - .next_if(|entry_res: &Result| { + .next_if(|entry_res: &AgentResult| { entry_res.as_ref().is_ok_and(|entry| { entry.get_d_reclen64() as u64 + result_size <= buffer_size }) diff --git a/mirrord/agent/src/main.rs b/mirrord/agent/src/main.rs index 777bdb8feb1..b8c504041ec 100644 --- a/mirrord/agent/src/main.rs +++ b/mirrord/agent/src/main.rs @@ -40,11 +40,12 @@ mod vpn; #[cfg(target_os = "linux")] mod watched_task; +#[cfg(target_os = "linux")] mod metrics; #[cfg(target_os = "linux")] #[tokio::main(flavor = "current_thread")] -async fn main() -> crate::error::Result<()> { +async fn main() -> crate::error::AgentResult<()> { crate::entrypoint::main().await } diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 66b1610cee5..4017f440fe7 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -1,4 +1,4 @@ -use std::sync::LazyLock; +use std::{net::SocketAddr, sync::LazyLock}; use axum::{response::IntoResponse, routing::get, Router}; use prometheus::{register_int_gauge, IntGauge}; @@ -104,10 +104,10 @@ async fn get_metrics() -> Result { } #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] -pub(crate) async fn start_metrics() -> Result<(), axum::BoxError> { +pub(crate) async fn start_metrics(address: SocketAddr) -> Result<(), axum::BoxError> { let app = Router::new().route("/metrics", get(get_metrics)); - let listener = TcpListener::bind("0.0.0.0:9000") + let listener = TcpListener::bind(address) .await .map_err(AgentError::from) .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index eb92ec68916..1e41c9ce942 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -18,7 +18,7 @@ use tokio_util::io::ReaderStream; use tracing::Level; use crate::{ - error::Result, + error::AgentResult, metrics::TCP_OUTGOING_CONNECTION, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, @@ -82,7 +82,7 @@ impl TcpOutgoingApi { /// Sends the [`LayerTcpOutgoing`] message to the background task. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - pub(crate) async fn send_to_task(&mut self, message: LayerTcpOutgoing) -> Result<()> { + pub(crate) async fn send_to_task(&mut self, message: LayerTcpOutgoing) -> AgentResult<()> { if self.layer_tx.send(message).await.is_ok() { Ok(()) } else { @@ -92,7 +92,7 @@ impl TcpOutgoingApi { /// Receives a [`DaemonTcpOutgoing`] message from the background task. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - pub(crate) async fn recv_from_task(&mut self) -> Result { + pub(crate) async fn recv_from_task(&mut self) -> AgentResult { match self.daemon_rx.recv().await { Some(msg) => Ok(msg), None => Err(self.task_status.unwrap_err().await), @@ -153,7 +153,7 @@ impl TcpOutgoingTask { /// Runs this task as long as the channels connecting it with [`TcpOutgoingApi`] are open. /// This routine never fails and returns [`Result`] only due to [`WatchedTask`] constraints. #[tracing::instrument(level = Level::TRACE, skip(self))] - async fn run(mut self) -> Result<()> { + async fn run(mut self) -> AgentResult<()> { loop { let channel_closed = select! { biased; @@ -191,7 +191,7 @@ impl TcpOutgoingTask { &mut self, connection_id: ConnectionId, read: io::Result>, - ) -> Result<(), SendError> { + ) -> AgentResult<(), SendError> { match read { // New bytes came in from a peer connection. // We pass them to the layer. @@ -266,7 +266,7 @@ impl TcpOutgoingTask { async fn handle_layer_msg( &mut self, message: LayerTcpOutgoing, - ) -> Result<(), SendError> { + ) -> AgentResult<(), SendError> { match message { // We make connection to the requested address, split the stream into halves with // `io::split`, and put them into respective maps. diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 0a30fcbd5fa..4ab96dd1264 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -23,7 +23,7 @@ use tokio_util::{codec::BytesCodec, udp::UdpFramed}; use tracing::{debug, trace, warn}; use crate::{ - error::Result, + error::AgentResult, metrics::UDP_OUTGOING_CONNECTION, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, @@ -57,7 +57,7 @@ pub(crate) struct UdpOutgoingApi { /// 3. User is trying to use `sendto` and `recvfrom`, we use the same hack as in DNS to fake a /// connection. #[tracing::instrument(level = "trace", ret)] -async fn connect(remote_address: SocketAddr) -> Result { +async fn connect(remote_address: SocketAddr) -> AgentResult { let mirror_address = match remote_address { std::net::SocketAddr::V4(_) => SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0), std::net::SocketAddr::V6(_) => SocketAddr::new(IpAddr::V6(Ipv6Addr::UNSPECIFIED), 0), @@ -102,7 +102,7 @@ impl UdpOutgoingApi { async fn interceptor_task( mut layer_rx: Receiver, daemon_tx: Sender, - ) -> Result<()> { + ) -> AgentResult<()> { let mut connection_ids = 0..=ConnectionId::MAX; // TODO: Right now we're manually keeping these 2 maps in sync (aviram suggested using @@ -245,7 +245,7 @@ impl UdpOutgoingApi { } /// Sends a `UdpOutgoingRequest` to the `interceptor_task`. - pub(crate) async fn layer_message(&mut self, message: LayerUdpOutgoing) -> Result<()> { + pub(crate) async fn layer_message(&mut self, message: LayerUdpOutgoing) -> AgentResult<()> { trace!( "UdpOutgoingApi::layer_message -> layer_message {:#?}", message @@ -259,7 +259,7 @@ impl UdpOutgoingApi { } /// Receives a `UdpOutgoingResponse` from the `interceptor_task`. - pub(crate) async fn daemon_message(&mut self) -> Result { + pub(crate) async fn daemon_message(&mut self) -> AgentResult { match self.daemon_rx.recv().await { Some(msg) => Ok(msg), None => Err(self.task_status.unwrap_err().await), diff --git a/mirrord/agent/src/steal/api.rs b/mirrord/agent/src/steal/api.rs index a6ec1d8d1f7..7edaa0900b5 100644 --- a/mirrord/agent/src/steal/api.rs +++ b/mirrord/agent/src/steal/api.rs @@ -14,7 +14,7 @@ use tokio_stream::wrappers::ReceiverStream; use super::*; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, util::ClientId, watched_task::TaskStatus, }; @@ -53,7 +53,7 @@ impl TcpStealerApi { task_status: TaskStatus, channel_size: usize, protocol_version: semver::Version, - ) -> Result { + ) -> AgentResult { let (daemon_tx, daemon_rx) = mpsc::channel(channel_size); command_tx @@ -73,7 +73,7 @@ impl TcpStealerApi { } /// Send `command` to stealer, with the client id of the client that is using this API instance. - async fn send_command(&mut self, command: Command) -> Result<()> { + async fn send_command(&mut self, command: Command) -> AgentResult<()> { let command = StealerCommand { client_id: self.client_id, command, @@ -91,7 +91,7 @@ impl TcpStealerApi { /// /// Called in the `ClientConnectionHandler`. #[tracing::instrument(level = "trace", skip(self))] - pub(crate) async fn recv(&mut self) -> Result { + pub(crate) async fn recv(&mut self) -> AgentResult { match self.daemon_rx.recv().await { Some(msg) => { if let DaemonTcp::Close(close) = &msg { @@ -108,7 +108,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::PortSubscribe`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn port_subscribe(&mut self, port_steal: StealType) -> Result<(), AgentError> { + pub(crate) async fn port_subscribe(&mut self, port_steal: StealType) -> AgentResult<(), AgentError> { self.send_command(Command::PortSubscribe(port_steal)).await } @@ -116,7 +116,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::PortUnsubscribe`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn port_unsubscribe(&mut self, port: Port) -> Result<(), AgentError> { + pub(crate) async fn port_unsubscribe(&mut self, port: Port) -> AgentResult<(), AgentError> { self.send_command(Command::PortUnsubscribe(port)).await } @@ -127,7 +127,7 @@ impl TcpStealerApi { pub(crate) async fn connection_unsubscribe( &mut self, connection_id: ConnectionId, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { self.send_command(Command::ConnectionUnsubscribe(connection_id)) .await } @@ -136,7 +136,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::ResponseData`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn client_data(&mut self, tcp_data: TcpData) -> Result<(), AgentError> { + pub(crate) async fn client_data(&mut self, tcp_data: TcpData) -> AgentResult<(), AgentError> { self.send_command(Command::ResponseData(tcp_data)).await } @@ -147,19 +147,19 @@ impl TcpStealerApi { pub(crate) async fn http_response( &mut self, response: HttpResponseFallback, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { self.send_command(Command::HttpResponse(response)).await } pub(crate) async fn switch_protocol_version( &mut self, version: semver::Version, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { self.send_command(Command::SwitchProtocolVersion(version)) .await } - pub(crate) async fn handle_client_message(&mut self, message: LayerTcpSteal) -> Result<()> { + pub(crate) async fn handle_client_message(&mut self, message: LayerTcpSteal) -> AgentResult<()> { match message { LayerTcpSteal::PortSubscribe(port_steal) => self.port_subscribe(port_steal).await, LayerTcpSteal::ConnectionUnsubscribe(connection_id) => { diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 3657c55ebd8..ab30fc61499 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -32,7 +32,7 @@ use tokio_util::sync::CancellationToken; use tracing::{warn, Level}; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, metrics::{ STEAL_CONNECTION_SUBSCRIPTION, STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION, @@ -59,7 +59,7 @@ struct MatchedHttpRequest { } impl MatchedHttpRequest { - async fn into_serializable(self) -> Result, hyper::Error> { + async fn into_serializable(self) -> AgentResult, hyper::Error> { let ( Parts { method, @@ -89,7 +89,7 @@ impl MatchedHttpRequest { }) } - async fn into_serializable_fallback(self) -> Result>, hyper::Error> { + async fn into_serializable_fallback(self) -> AgentResult>, hyper::Error> { let ( Parts { method, @@ -185,7 +185,7 @@ impl Client { let frames = frames .into_iter() .map(InternalHttpBodyFrame::try_from) - .filter_map(Result::ok) + .filter_map(AgentResult::ok) .collect(); let message = DaemonTcp::HttpRequestChunked(ChunkedRequest::Start(HttpRequest { @@ -214,7 +214,7 @@ impl Client { let frames = frames .into_iter() .map(InternalHttpBodyFrame::try_from) - .filter_map(Result::ok) + .filter_map(AgentResult::ok) .collect(); let message = DaemonTcp::HttpRequestChunked(ChunkedRequest::Body( ChunkedHttpBody { @@ -303,7 +303,7 @@ impl TcpConnectionStealer { /// Initializes a new [`TcpConnectionStealer`], but doesn't start the actual work. /// You need to call [`TcpConnectionStealer::start`] to do so. #[tracing::instrument(level = Level::TRACE, err)] - pub(crate) async fn new(command_rx: Receiver) -> Result { + pub(crate) async fn new(command_rx: Receiver) -> AgentResult { let config = envy::prefixed("MIRRORD_AGENT_") .from_env::() .unwrap_or_default(); @@ -339,7 +339,7 @@ impl TcpConnectionStealer { pub(crate) async fn start( mut self, cancellation_token: CancellationToken, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { loop { tokio::select! { command = self.command_rx.recv() => { @@ -379,7 +379,11 @@ impl TcpConnectionStealer { /// Handles a new remote connection that was stolen by [`Self::port_subscriptions`]. #[tracing::instrument(level = "trace", skip(self))] - async fn incoming_connection(&mut self, stream: TcpStream, peer: SocketAddr) -> Result<()> { + async fn incoming_connection( + &mut self, + stream: TcpStream, + peer: SocketAddr, + ) -> AgentResult<()> { let mut real_address = orig_dst::orig_dst_addr(&stream)?; // If we use the original IP we would go through prerouting and hit a loop. // localhost should always work. @@ -413,7 +417,7 @@ impl TcpConnectionStealer { async fn handle_connection_update( &mut self, update: ConnectionMessageOut, - ) -> Result<(), AgentError> { + ) -> AgentResult<(), AgentError> { match update { ConnectionMessageOut::Closed { connection_id, @@ -551,7 +555,11 @@ impl TcpConnectionStealer { /// /// - Returns: `true` if this is an HTTP filtered subscription. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - async fn port_subscribe(&mut self, client_id: ClientId, port_steal: StealType) -> Result { + async fn port_subscribe( + &mut self, + client_id: ClientId, + port_steal: StealType, + ) -> AgentResult { let spec = match port_steal { StealType::All(port) => Ok((port, None)), StealType::FilteredHttp(port, filter) => Regex::new(&format!("(?i){filter}")) @@ -582,7 +590,7 @@ impl TcpConnectionStealer { /// their subscriptions from [`Self::port_subscriptions`] and all their open /// connections. #[tracing::instrument(level = "trace", skip(self))] - async fn close_client(&mut self, client_id: ClientId) -> Result<(), AgentError> { + async fn close_client(&mut self, client_id: ClientId) -> AgentResult<(), AgentError> { let removed_subscriptions = self.port_subscriptions.remove_all(client_id).await?; for filtered in removed_subscriptions { @@ -647,7 +655,7 @@ impl TcpConnectionStealer { /// Handles [`Command`]s that were received by [`TcpConnectionStealer::command_rx`]. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - async fn handle_command(&mut self, command: StealerCommand) -> Result<(), AgentError> { + async fn handle_command(&mut self, command: StealerCommand) -> AgentResult<(), AgentError> { let StealerCommand { client_id, command } = command; match command { diff --git a/mirrord/agent/src/steal/ip_tables.rs b/mirrord/agent/src/steal/ip_tables.rs index c25ff1eb36c..6b131262cbf 100644 --- a/mirrord/agent/src/steal/ip_tables.rs +++ b/mirrord/agent/src/steal/ip_tables.rs @@ -9,7 +9,7 @@ use rand::distributions::{Alphanumeric, DistString}; use tracing::warn; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, steal::ip_tables::{ flush_connections::FlushConnections, mesh::{istio::AmbientRedirect, MeshRedirect, MeshVendorExt}, @@ -24,26 +24,26 @@ mod iptables { pub struct IPTables; impl IPTables { - pub fn list(&self, _: &str, _: &str) -> Result, String> { + pub fn list(&self, _: &str, _: &str) -> AgentResult, String> { todo!() } - pub fn insert(&self, _: &str, _: &str, _: &str, _: i32) -> Result<(), String> { + pub fn insert(&self, _: &str, _: &str, _: &str, _: i32) -> AgentResult<(), String> { todo!() } - pub fn append(&self, _: &str, _: &str, _: &str) -> Result<(), String> { + pub fn append(&self, _: &str, _: &str, _: &str) -> AgentResult<(), String> { todo!() } - pub fn delete(&self, _: &str, _: &str, _: &str) -> Result<(), String> { + pub fn delete(&self, _: &str, _: &str, _: &str) -> AgentResult<(), String> { todo!() } - pub fn new_chain(&self, _: &str, _: &str) -> Result<(), String> { + pub fn new_chain(&self, _: &str, _: &str) -> AgentResult<(), String> { todo!() } - pub fn delete_chain(&self, _: &str, _: &str) -> Result<(), String> { + pub fn delete_chain(&self, _: &str, _: &str) -> AgentResult<(), String> { todo!() } - pub fn flush_chain(&self, _: &str, _: &str) -> Result<(), String> { + pub fn flush_chain(&self, _: &str, _: &str) -> AgentResult<(), String> { todo!() } } @@ -114,13 +114,13 @@ pub(crate) trait IPTables { where Self: Sized; - fn create_chain(&self, name: &str) -> Result<()>; - fn remove_chain(&self, name: &str) -> Result<()>; + fn create_chain(&self, name: &str) -> AgentResult<()>; + fn remove_chain(&self, name: &str) -> AgentResult<()>; - fn add_rule(&self, chain: &str, rule: &str) -> Result<()>; - fn insert_rule(&self, chain: &str, rule: &str, index: i32) -> Result<()>; - fn list_rules(&self, chain: &str) -> Result>; - fn remove_rule(&self, chain: &str, rule: &str) -> Result<()>; + fn add_rule(&self, chain: &str, rule: &str) -> AgentResult<()>; + fn insert_rule(&self, chain: &str, rule: &str, index: i32) -> AgentResult<()>; + fn list_rules(&self, chain: &str) -> AgentResult>; + fn remove_rule(&self, chain: &str, rule: &str) -> AgentResult<()>; } #[derive(Clone)] @@ -171,7 +171,7 @@ impl IPTables for IPTablesWrapper { } #[tracing::instrument(level = "trace")] - fn create_chain(&self, name: &str) -> Result<()> { + fn create_chain(&self, name: &str) -> AgentResult<()> { self.tables .new_chain(self.table_name, name) .map_err(|e| AgentError::IPTablesError(e.to_string()))?; @@ -183,7 +183,7 @@ impl IPTables for IPTablesWrapper { } #[tracing::instrument(level = "trace")] - fn remove_chain(&self, name: &str) -> Result<()> { + fn remove_chain(&self, name: &str) -> AgentResult<()> { self.tables .flush_chain(self.table_name, name) .map_err(|e| AgentError::IPTablesError(e.to_string()))?; @@ -195,28 +195,28 @@ impl IPTables for IPTablesWrapper { } #[tracing::instrument(level = "trace", ret)] - fn add_rule(&self, chain: &str, rule: &str) -> Result<()> { + fn add_rule(&self, chain: &str, rule: &str) -> AgentResult<()> { self.tables .append(self.table_name, chain, rule) .map_err(|e| AgentError::IPTablesError(e.to_string())) } #[tracing::instrument(level = "trace", ret)] - fn insert_rule(&self, chain: &str, rule: &str, index: i32) -> Result<()> { + fn insert_rule(&self, chain: &str, rule: &str, index: i32) -> AgentResult<()> { self.tables .insert(self.table_name, chain, rule, index) .map_err(|e| AgentError::IPTablesError(e.to_string())) } #[tracing::instrument(level = "trace")] - fn list_rules(&self, chain: &str) -> Result> { + fn list_rules(&self, chain: &str) -> AgentResult> { self.tables .list(self.table_name, chain) .map_err(|e| AgentError::IPTablesError(e.to_string())) } #[tracing::instrument(level = "trace")] - fn remove_rule(&self, chain: &str, rule: &str) -> Result<()> { + fn remove_rule(&self, chain: &str, rule: &str) -> AgentResult<()> { self.tables .delete(self.table_name, chain, rule) .map_err(|e| AgentError::IPTablesError(e.to_string())) @@ -250,7 +250,7 @@ where ipt: IPT, flush_connections: bool, pod_ips: Option<&str>, - ) -> Result { + ) -> AgentResult { let ipt = Arc::new(ipt); let mut redirect = if let Some(vendor) = MeshVendor::detect(ipt.as_ref())? { @@ -281,7 +281,7 @@ where Ok(Self { redirect }) } - pub(crate) async fn load(ipt: IPT, flush_connections: bool) -> Result { + pub(crate) async fn load(ipt: IPT, flush_connections: bool) -> AgentResult { let ipt = Arc::new(ipt); let mut redirect = if let Some(vendor) = MeshVendor::detect(ipt.as_ref())? { @@ -315,7 +315,7 @@ where &self, redirected_port: Port, target_port: Port, - ) -> Result<()> { + ) -> AgentResult<()> { self.redirect .add_redirect(redirected_port, target_port) .await @@ -330,13 +330,13 @@ where &self, redirected_port: Port, target_port: Port, - ) -> Result<()> { + ) -> AgentResult<()> { self.redirect .remove_redirect(redirected_port, target_port) .await } - pub(crate) async fn cleanup(&self) -> Result<()> { + pub(crate) async fn cleanup(&self) -> AgentResult<()> { self.redirect.unmount_entrypoint().await } } diff --git a/mirrord/agent/src/steal/ip_tables/chain.rs b/mirrord/agent/src/steal/ip_tables/chain.rs index c5bc6d65404..c1c34715c85 100644 --- a/mirrord/agent/src/steal/ip_tables/chain.rs +++ b/mirrord/agent/src/steal/ip_tables/chain.rs @@ -4,7 +4,7 @@ use std::sync::{ }; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, steal::ip_tables::IPTables, }; @@ -19,7 +19,7 @@ impl IPTableChain where IPT: IPTables, { - pub fn create(inner: Arc, chain_name: String) -> Result { + pub fn create(inner: Arc, chain_name: String) -> AgentResult { inner.create_chain(&chain_name)?; // Start with 1 because the chain will allways have atleast `-A ` as a rule @@ -32,7 +32,7 @@ where }) } - pub fn load(inner: Arc, chain_name: String) -> Result { + pub fn load(inner: Arc, chain_name: String) -> AgentResult { let existing_rules = inner.list_rules(&chain_name)?.len(); if existing_rules == 0 { @@ -59,7 +59,7 @@ where &self.inner } - pub fn add_rule(&self, rule: &str) -> Result { + pub fn add_rule(&self, rule: &str) -> AgentResult { self.inner .insert_rule( &self.chain_name, @@ -72,7 +72,7 @@ where }) } - pub fn remove_rule(&self, rule: &str) -> Result<()> { + pub fn remove_rule(&self, rule: &str) -> AgentResult<()> { self.inner.remove_rule(&self.chain_name, rule)?; self.chain_size.fetch_sub(1, Ordering::Relaxed); diff --git a/mirrord/agent/src/steal/ip_tables/flush_connections.rs b/mirrord/agent/src/steal/ip_tables/flush_connections.rs index 6675a40651f..c0f19c20b8d 100644 --- a/mirrord/agent/src/steal/ip_tables/flush_connections.rs +++ b/mirrord/agent/src/steal/ip_tables/flush_connections.rs @@ -13,7 +13,7 @@ use tokio::process::Command; use tracing::warn; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{chain::IPTableChain, redirect::Redirect, IPTables, IPTABLE_INPUT}, }; @@ -33,7 +33,7 @@ where const ENTRYPOINT: &'static str = "INPUT"; #[tracing::instrument(level = "trace", skip(ipt, inner))] - pub fn create(ipt: Arc, inner: Box) -> Result { + pub fn create(ipt: Arc, inner: Box) -> AgentResult { let managed = IPTableChain::create(ipt.with_table("filter").into(), IPTABLE_INPUT.to_string())?; @@ -48,7 +48,7 @@ where } #[tracing::instrument(level = "trace", skip(ipt, inner))] - pub fn load(ipt: Arc, inner: Box) -> Result { + pub fn load(ipt: Arc, inner: Box) -> AgentResult { let managed = IPTableChain::load(ipt.with_table("filter").into(), IPTABLE_INPUT.to_string())?; @@ -63,7 +63,7 @@ where T: Redirect + Send + Sync, { #[tracing::instrument(level = "trace", skip(self), ret)] - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { self.inner.mount_entrypoint().await?; self.managed.inner().add_rule( @@ -75,7 +75,7 @@ where } #[tracing::instrument(level = "trace", skip(self), ret)] - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.inner.unmount_entrypoint().await?; self.managed.inner().remove_rule( @@ -87,7 +87,7 @@ where } #[tracing::instrument(level = "trace", skip(self), ret)] - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.inner .add_redirect(redirected_port, target_port) .await?; @@ -115,7 +115,7 @@ where } #[tracing::instrument(level = "trace", skip(self), ret)] - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.inner .remove_redirect(redirected_port, target_port) .await?; diff --git a/mirrord/agent/src/steal/ip_tables/mesh.rs b/mirrord/agent/src/steal/ip_tables/mesh.rs index 88fdff5d0b1..1a3e5acbe62 100644 --- a/mirrord/agent/src/steal/ip_tables/mesh.rs +++ b/mirrord/agent/src/steal/ip_tables/mesh.rs @@ -5,7 +5,7 @@ use fancy_regex::Regex; use mirrord_protocol::{MeshVendor, Port}; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{ output::OutputRedirect, prerouting::PreroutingRedirect, redirect::Redirect, IPTables, IPTABLE_MESH, @@ -29,7 +29,7 @@ impl MeshRedirect where IPT: IPTables, { - pub fn create(ipt: Arc, vendor: MeshVendor, pod_ips: Option<&str>) -> Result { + pub fn create(ipt: Arc, vendor: MeshVendor, pod_ips: Option<&str>) -> AgentResult { let prerouting = PreroutingRedirect::create(ipt.clone())?; for port in Self::get_skip_ports(&ipt, &vendor)? { @@ -45,7 +45,7 @@ where }) } - pub fn load(ipt: Arc, vendor: MeshVendor) -> Result { + pub fn load(ipt: Arc, vendor: MeshVendor) -> AgentResult { let prerouting = PreroutingRedirect::load(ipt.clone())?; let output = OutputRedirect::load(ipt, IPTABLE_MESH.to_string())?; @@ -56,7 +56,7 @@ where }) } - fn get_skip_ports(ipt: &IPT, vendor: &MeshVendor) -> Result> { + fn get_skip_ports(ipt: &IPT, vendor: &MeshVendor) -> AgentResult> { let chain_name = vendor.input_chain(); let lookup_regex = if let Some(regex) = vendor.skip_ports_regex() { regex @@ -86,21 +86,21 @@ impl Redirect for MeshRedirect where IPT: IPTables + Send + Sync, { - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { self.prerouting.mount_entrypoint().await?; self.output.mount_entrypoint().await?; Ok(()) } - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.prerouting.unmount_entrypoint().await?; self.output.unmount_entrypoint().await?; Ok(()) } - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { if self.vendor != MeshVendor::IstioCni { self.prerouting .add_redirect(redirected_port, target_port) @@ -113,7 +113,7 @@ where Ok(()) } - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { if self.vendor != MeshVendor::IstioCni { self.prerouting .remove_redirect(redirected_port, target_port) @@ -129,13 +129,13 @@ where /// Extends the [`MeshVendor`] type with methods that are only relevant for the agent. pub(super) trait MeshVendorExt: Sized { - fn detect(ipt: &IPT) -> Result>; + fn detect(ipt: &IPT) -> AgentResult>; fn input_chain(&self) -> &str; fn skip_ports_regex(&self) -> Option<&Regex>; } impl MeshVendorExt for MeshVendor { - fn detect(ipt: &IPT) -> Result> { + fn detect(ipt: &IPT) -> AgentResult> { if let Ok(val) = std::env::var("MIRRORD_AGENT_ISTIO_CNI") && val.to_lowercase() == "true" { diff --git a/mirrord/agent/src/steal/ip_tables/mesh/istio.rs b/mirrord/agent/src/steal/ip_tables/mesh/istio.rs index cd3d4b06fa9..01e513a6bf9 100644 --- a/mirrord/agent/src/steal/ip_tables/mesh/istio.rs +++ b/mirrord/agent/src/steal/ip_tables/mesh/istio.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use mirrord_protocol::Port; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{ output::OutputRedirect, prerouting::PreroutingRedirect, redirect::Redirect, IPTables, IPTABLE_IPV4_ROUTE_LOCALNET_ORIGINAL, IPTABLE_MESH, @@ -20,14 +20,14 @@ impl AmbientRedirect where IPT: IPTables, { - pub fn create(ipt: Arc, pod_ips: Option<&str>) -> Result { + pub fn create(ipt: Arc, pod_ips: Option<&str>) -> AgentResult { let prerouting = PreroutingRedirect::create(ipt.clone())?; let output = OutputRedirect::create(ipt, IPTABLE_MESH.to_string(), pod_ips)?; Ok(AmbientRedirect { prerouting, output }) } - pub fn load(ipt: Arc) -> Result { + pub fn load(ipt: Arc) -> AgentResult { let prerouting = PreroutingRedirect::load(ipt.clone())?; let output = OutputRedirect::load(ipt, IPTABLE_MESH.to_string())?; @@ -40,7 +40,7 @@ impl Redirect for AmbientRedirect where IPT: IPTables + Send + Sync, { - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { tokio::fs::write("/proc/sys/net/ipv4/conf/all/route_localnet", "1".as_bytes()).await?; self.prerouting.mount_entrypoint().await?; @@ -49,7 +49,7 @@ where Ok(()) } - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.prerouting.unmount_entrypoint().await?; self.output.unmount_entrypoint().await?; @@ -62,7 +62,7 @@ where Ok(()) } - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.prerouting .add_redirect(redirected_port, target_port) .await?; @@ -73,7 +73,7 @@ where Ok(()) } - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.prerouting .remove_redirect(redirected_port, target_port) .await?; diff --git a/mirrord/agent/src/steal/ip_tables/output.rs b/mirrord/agent/src/steal/ip_tables/output.rs index 944bc26f95b..b28cc93b24b 100644 --- a/mirrord/agent/src/steal/ip_tables/output.rs +++ b/mirrord/agent/src/steal/ip_tables/output.rs @@ -6,7 +6,7 @@ use nix::unistd::getgid; use tracing::warn; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{chain::IPTableChain, IPTables, Redirect}, }; @@ -20,7 +20,7 @@ where { const ENTRYPOINT: &'static str = "OUTPUT"; - pub fn create(ipt: Arc, chain_name: String, pod_ips: Option<&str>) -> Result { + pub fn create(ipt: Arc, chain_name: String, pod_ips: Option<&str>) -> AgentResult { let managed = IPTableChain::create(ipt, chain_name)?; let exclude_source_ips = pod_ips @@ -39,7 +39,7 @@ where Ok(OutputRedirect { managed }) } - pub fn load(ipt: Arc, chain_name: String) -> Result { + pub fn load(ipt: Arc, chain_name: String) -> AgentResult { let managed = IPTableChain::load(ipt, chain_name)?; Ok(OutputRedirect { managed }) @@ -53,7 +53,7 @@ impl Redirect for OutputRedirect where IPT: IPTables + Send + Sync, { - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { if USE_INSERT { self.managed.inner().insert_rule( Self::ENTRYPOINT, @@ -70,7 +70,7 @@ where Ok(()) } - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.managed.inner().remove_rule( Self::ENTRYPOINT, &format!("-j {}", self.managed.chain_name()), @@ -79,7 +79,7 @@ where Ok(()) } - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { let redirect_rule = format!( "-o lo -m tcp -p tcp --dport {redirected_port} -j REDIRECT --to-ports {target_port}" ); @@ -89,7 +89,7 @@ where Ok(()) } - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { let redirect_rule = format!( "-o lo -m tcp -p tcp --dport {redirected_port} -j REDIRECT --to-ports {target_port}" ); diff --git a/mirrord/agent/src/steal/ip_tables/prerouting.rs b/mirrord/agent/src/steal/ip_tables/prerouting.rs index 486b0ca1b51..29d5de06103 100644 --- a/mirrord/agent/src/steal/ip_tables/prerouting.rs +++ b/mirrord/agent/src/steal/ip_tables/prerouting.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use mirrord_protocol::Port; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{chain::IPTableChain, IPTables, Redirect, IPTABLE_PREROUTING}, }; @@ -18,13 +18,13 @@ where { const ENTRYPOINT: &'static str = "PREROUTING"; - pub fn create(ipt: Arc) -> Result { + pub fn create(ipt: Arc) -> AgentResult { let managed = IPTableChain::create(ipt, IPTABLE_PREROUTING.to_string())?; Ok(PreroutingRedirect { managed }) } - pub fn load(ipt: Arc) -> Result { + pub fn load(ipt: Arc) -> AgentResult { let managed = IPTableChain::load(ipt, IPTABLE_PREROUTING.to_string())?; Ok(PreroutingRedirect { managed }) @@ -36,7 +36,7 @@ impl Redirect for PreroutingRedirect where IPT: IPTables + Send + Sync, { - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { self.managed.inner().add_rule( Self::ENTRYPOINT, &format!("-j {}", self.managed.chain_name()), @@ -45,7 +45,7 @@ where Ok(()) } - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.managed.inner().remove_rule( Self::ENTRYPOINT, &format!("-j {}", self.managed.chain_name()), @@ -54,7 +54,7 @@ where Ok(()) } - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { let redirect_rule = format!("-m tcp -p tcp --dport {redirected_port} -j REDIRECT --to-ports {target_port}"); @@ -63,7 +63,7 @@ where Ok(()) } - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { let redirect_rule = format!("-m tcp -p tcp --dport {redirected_port} -j REDIRECT --to-ports {target_port}"); diff --git a/mirrord/agent/src/steal/ip_tables/redirect.rs b/mirrord/agent/src/steal/ip_tables/redirect.rs index d18aeb1d7ea..fe52d90fc1e 100644 --- a/mirrord/agent/src/steal/ip_tables/redirect.rs +++ b/mirrord/agent/src/steal/ip_tables/redirect.rs @@ -2,17 +2,17 @@ use async_trait::async_trait; use enum_dispatch::enum_dispatch; use mirrord_protocol::Port; -use crate::error::Result; +use crate::error::AgentResult; #[async_trait] #[enum_dispatch] pub(crate) trait Redirect { - async fn mount_entrypoint(&self) -> Result<()>; + async fn mount_entrypoint(&self) -> AgentResult<()>; - async fn unmount_entrypoint(&self) -> Result<()>; + async fn unmount_entrypoint(&self) -> AgentResult<()>; /// Create port redirection - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()>; + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()>; /// Remove port redirection - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()>; + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()>; } diff --git a/mirrord/agent/src/steal/ip_tables/standard.rs b/mirrord/agent/src/steal/ip_tables/standard.rs index 3302b05c02e..47b9bf0c0af 100644 --- a/mirrord/agent/src/steal/ip_tables/standard.rs +++ b/mirrord/agent/src/steal/ip_tables/standard.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use mirrord_protocol::Port; use crate::{ - error::Result, + error::AgentResult, steal::ip_tables::{ output::OutputRedirect, prerouting::PreroutingRedirect, IPTables, Redirect, IPTABLE_STANDARD, @@ -20,14 +20,14 @@ impl StandardRedirect where IPT: IPTables, { - pub fn create(ipt: Arc, pod_ips: Option<&str>) -> Result { + pub fn create(ipt: Arc, pod_ips: Option<&str>) -> AgentResult { let prerouting = PreroutingRedirect::create(ipt.clone())?; let output = OutputRedirect::create(ipt, IPTABLE_STANDARD.to_string(), pod_ips)?; Ok(StandardRedirect { prerouting, output }) } - pub fn load(ipt: Arc) -> Result { + pub fn load(ipt: Arc) -> AgentResult { let prerouting = PreroutingRedirect::load(ipt.clone())?; let output = OutputRedirect::load(ipt, IPTABLE_STANDARD.to_string())?; @@ -42,21 +42,21 @@ impl Redirect for StandardRedirect where IPT: IPTables + Send + Sync, { - async fn mount_entrypoint(&self) -> Result<()> { + async fn mount_entrypoint(&self) -> AgentResult<()> { self.prerouting.mount_entrypoint().await?; self.output.mount_entrypoint().await?; Ok(()) } - async fn unmount_entrypoint(&self) -> Result<()> { + async fn unmount_entrypoint(&self) -> AgentResult<()> { self.prerouting.unmount_entrypoint().await?; self.output.unmount_entrypoint().await?; Ok(()) } - async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn add_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.prerouting .add_redirect(redirected_port, target_port) .await?; @@ -67,7 +67,7 @@ where Ok(()) } - async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> Result<()> { + async fn remove_redirect(&self, redirected_port: Port, target_port: Port) -> AgentResult<()> { self.prerouting .remove_redirect(redirected_port, target_port) .await?; diff --git a/mirrord/agent/src/vpn.rs b/mirrord/agent/src/vpn.rs index dd8c3a5133f..d7d30d5ca6f 100644 --- a/mirrord/agent/src/vpn.rs +++ b/mirrord/agent/src/vpn.rs @@ -17,7 +17,7 @@ use tokio::{ }; use crate::{ - error::{AgentError, Result}, + error::{AgentError, AgentResult}, util::run_thread_in_namespace, watched_task::{TaskStatus, WatchedTask}, }; @@ -75,7 +75,7 @@ impl VpnApi { /// Sends the [`ClientVpn`] message to the background task. #[tracing::instrument(level = "trace", skip(self))] - pub(crate) async fn layer_message(&mut self, message: ClientVpn) -> Result<()> { + pub(crate) async fn layer_message(&mut self, message: ClientVpn) -> AgentResult<()> { if self.layer_tx.send(message).await.is_ok() { Ok(()) } else { @@ -84,7 +84,7 @@ impl VpnApi { } /// Receives a [`ServerVpn`] message from the background task. - pub(crate) async fn daemon_message(&mut self) -> Result { + pub(crate) async fn daemon_message(&mut self) -> AgentResult { match self.daemon_rx.recv().await { Some(msg) => Ok(msg), None => Err(self.task_status.unwrap_err().await), @@ -121,7 +121,7 @@ impl AsyncRawSocket { } } -async fn create_raw_socket() -> Result { +async fn create_raw_socket() -> AgentResult { let index = nix::net::if_::if_nametoindex("eth0") .map_err(|err| AgentError::VpnError(err.to_string()))?; @@ -139,7 +139,7 @@ async fn create_raw_socket() -> Result { } #[tracing::instrument(level = "debug", ret)] -async fn resolve_interface() -> Result<(IpAddr, IpAddr, IpAddr)> { +async fn resolve_interface() -> AgentResult<(IpAddr, IpAddr, IpAddr)> { // Connect to a remote address so we can later get the default network interface. let temporary_socket = UdpSocket::bind("0.0.0.0:0").await?; temporary_socket.connect("8.8.8.8:53").await?; @@ -209,7 +209,7 @@ impl fmt::Debug for VpnTask { } } -fn interface_index_to_sock_addr(index: i32) -> Result { +fn interface_index_to_sock_addr(index: i32) -> AgentResult { let mut addr_storage: libc::sockaddr_storage = unsafe { std::mem::zeroed() }; let len = std::mem::size_of::() as libc::socklen_t; let macs = procfs::net::arp().map_err(|err| AgentError::VpnError(err.to_string()))?; @@ -245,7 +245,7 @@ impl VpnTask { } #[allow(clippy::indexing_slicing)] - async fn run(mut self) -> Result<()> { + async fn run(mut self) -> AgentResult<()> { // so host won't respond with RST to our packets. // TODO: need to do it for UDP as well to avoid ICMP unreachable. let output = std::process::Command::new("iptables") @@ -318,7 +318,7 @@ impl VpnTask { &mut self, message: ClientVpn, network_configuration: &NetworkConfiguration, - ) -> Result<()> { + ) -> AgentResult<()> { match message { // We make connection to the requested address, split the stream into halves with // `io::split`, and put them into respective maps. diff --git a/mirrord/config/src/agent.rs b/mirrord/config/src/agent.rs index b82c45a7177..79b3bdd27a9 100644 --- a/mirrord/config/src/agent.rs +++ b/mirrord/config/src/agent.rs @@ -322,7 +322,11 @@ pub struct AgentConfig { /// /// ```json /// { - /// "annotations": { "cats.io/inject": "enabled" } + /// "annotations": { + /// "cats.io/inject": "enabled" + /// "prometheus.io/scrape": "true", + /// "prometheus.io/port": "9000" + /// } /// } /// ``` pub annotations: Option>, @@ -350,6 +354,21 @@ pub struct AgentConfig { /// ``` pub service_account: Option, + /// ### agent.metrics {#agent-metrics} + /// + /// Enables prometheus metrics for the agent pod. + /// + /// You might need to add annotations to the agent pod depending on how prometheus is + /// configured to scrape for metrics. + /// + /// ```json + /// { + /// "metrics": "0.0.0.0:9000" + /// } + /// ``` + #[config(env = "MIRRORD_AGENT_METRICS")] + pub metrics: Option, + /// /// Create an agent that returns an error after accepting the first client. For testing /// purposes. Only supported with job agents (not with ephemeral agents). diff --git a/mirrord/kube/src/api/container/job.rs b/mirrord/kube/src/api/container/job.rs index b8d702f13f9..907aefffeeb 100644 --- a/mirrord/kube/src/api/container/job.rs +++ b/mirrord/kube/src/api/container/job.rs @@ -152,8 +152,6 @@ where "disabled".to_string(), ), ("app".to_string(), "mirrord".to_string()), - ("prometheus.io/scrape".to_string(), "true".to_string()), - ("prometheus.io/port".to_string(), "9000".to_string()), ])); let mut annotations = config @@ -165,9 +163,6 @@ where annotations.extend(BTreeMap::from([ ("sidecar.istio.io/inject".to_string(), "false".to_string()), ("linkerd.io/inject".to_string(), "disabled".to_string()), - ("prometheus.io/scrape".to_string(), "true".to_string()), - // ("prometheus.io/path".to_string(), "/metrics".to_string()), - ("prometheus.io/port".to_string(), "9000".to_string()), ])); pod.labels_mut().extend(labels.clone()); diff --git a/mirrord/kube/src/api/container/pod.rs b/mirrord/kube/src/api/container/pod.rs index 9ecb9317cea..f8461e8a002 100644 --- a/mirrord/kube/src/api/container/pod.rs +++ b/mirrord/kube/src/api/container/pod.rs @@ -106,8 +106,6 @@ impl ContainerVariant for PodVariant<'_> { [ ("sidecar.istio.io/inject".to_string(), "false".to_string()), ("linkerd.io/inject".to_string(), "disabled".to_string()), - ("prometheus.io/scrape".to_string(), "true".to_string()), - ("prometheus.io/port".to_string(), "9000".to_string()), ] .into(), ), @@ -118,8 +116,6 @@ impl ContainerVariant for PodVariant<'_> { "disabled".to_string(), ), ("app".to_string(), "mirrord".to_string()), - ("prometheus.io/scrape".to_string(), "true".to_string()), - ("prometheus.io/port".to_string(), "9000".to_string()), ] .into(), ), diff --git a/mirrord/kube/src/api/container/util.rs b/mirrord/kube/src/api/container/util.rs index 77f917378ce..bde5fb647a0 100644 --- a/mirrord/kube/src/api/container/util.rs +++ b/mirrord/kube/src/api/container/util.rs @@ -4,7 +4,7 @@ use futures::{AsyncBufReadExt, TryStreamExt}; use k8s_openapi::api::core::v1::{EnvVar, Pod, Toleration}; use kube::{api::LogParams, Api}; use mirrord_config::agent::{AgentConfig, LinuxCapability}; -use mirrord_protocol::{AGENT_NETWORK_INTERFACE_ENV, AGENT_OPERATOR_CERT_ENV}; +use mirrord_protocol::{AGENT_METRICS_ENV, AGENT_NETWORK_INTERFACE_ENV, AGENT_OPERATOR_CERT_ENV}; use regex::Regex; use tracing::warn; @@ -59,6 +59,7 @@ pub(super) fn agent_env(agent: &AgentConfig, params: &&ContainerParams) -> Vec Vec Date: Fri, 20 Dec 2024 15:42:40 -0300 Subject: [PATCH 17/85] use socketaddr --- mirrord/agent/src/cli.rs | 4 +++- mirrord/agent/src/entrypoint.rs | 5 ++--- mirrord/agent/src/error.rs | 3 --- mirrord/config/src/agent.rs | 5 ++--- mirrord/kube/src/api/container/util.rs | 4 ++-- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/mirrord/agent/src/cli.rs b/mirrord/agent/src/cli.rs index a6b3feba535..144ff3d9eb5 100644 --- a/mirrord/agent/src/cli.rs +++ b/mirrord/agent/src/cli.rs @@ -1,5 +1,7 @@ #![deny(missing_docs)] +use std::net::SocketAddr; + use clap::{Parser, Subcommand}; use mirrord_protocol::{ MeshVendor, AGENT_METRICS_ENV, AGENT_NETWORK_INTERFACE_ENV, AGENT_OPERATOR_CERT_ENV, @@ -30,7 +32,7 @@ pub struct Args { /// Controls whether metrics are enabled, and the address to set up the metrics server. #[arg(long, env = AGENT_METRICS_ENV)] - pub metrics: Option, + pub metrics: Option, /// Return an error after accepting the first client connection, in order to test agent error /// cleanup. diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index c72409e5f4e..c10211eb241 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -493,10 +493,9 @@ impl ClientConnectionHandler { async fn start_agent(args: Args) -> AgentResult<()> { trace!("start_agent -> Starting agent with args: {args:?}"); - if let Some(metrics_address) = args.metrics.as_ref() { - let address = metrics_address.parse()?; + if let Some(metrics_address) = args.metrics.clone() { tokio::spawn(async move { - start_metrics(address) + start_metrics(metrics_address) .await .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) }); diff --git a/mirrord/agent/src/error.rs b/mirrord/agent/src/error.rs index c0c05fcfdf4..f5cd0de674d 100644 --- a/mirrord/agent/src/error.rs +++ b/mirrord/agent/src/error.rs @@ -84,9 +84,6 @@ pub(crate) enum AgentError { /// Temporary error for vpn feature #[error("Generic error in vpn: {0}")] VpnError(String), - - #[error(transparent)] - AddrParse(#[from] std::net::AddrParseError), } impl From> for AgentError { diff --git a/mirrord/config/src/agent.rs b/mirrord/config/src/agent.rs index 79b3bdd27a9..4feebb4c6cd 100644 --- a/mirrord/config/src/agent.rs +++ b/mirrord/config/src/agent.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, fmt, path::Path}; +use std::{collections::HashMap, fmt, net::SocketAddr, path::Path}; use k8s_openapi::api::core::v1::{ResourceRequirements, Toleration}; use mirrord_analytics::CollectAnalytics; @@ -366,8 +366,7 @@ pub struct AgentConfig { /// "metrics": "0.0.0.0:9000" /// } /// ``` - #[config(env = "MIRRORD_AGENT_METRICS")] - pub metrics: Option, + pub metrics: Option, /// /// Create an agent that returns an error after accepting the first client. For testing diff --git a/mirrord/kube/src/api/container/util.rs b/mirrord/kube/src/api/container/util.rs index bde5fb647a0..de18cd1c161 100644 --- a/mirrord/kube/src/api/container/util.rs +++ b/mirrord/kube/src/api/container/util.rs @@ -68,8 +68,8 @@ pub(super) fn agent_env(agent: &AgentConfig, params: &&ContainerParams) -> Vec Date: Fri, 20 Dec 2024 15:44:09 -0300 Subject: [PATCH 18/85] spawn inside spawn --- mirrord/agent/src/metrics.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 4017f440fe7..509cf731e87 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -112,12 +112,10 @@ pub(crate) async fn start_metrics(address: SocketAddr) -> Result<(), axum::BoxEr .map_err(AgentError::from) .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; - tokio::spawn(async move { - axum::serve(listener, app).await.inspect_err(|fail| { - tracing::error!(%fail, "Could not start agent metrics + let _ = axum::serve(listener, app).await.inspect_err(|fail| { + tracing::error!(%fail, "Could not start agent metrics server!") - }) - }); + })?; Ok(()) } From bb234efa5d69967f49013d9fde08e4687eced148 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 15:45:00 -0300 Subject: [PATCH 19/85] error impl send --- mirrord/agent/src/metrics.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 509cf731e87..99311ca1eac 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -81,8 +81,6 @@ pub(crate) enum MetricsError { Prometheus(#[from] prometheus::Error), } -unsafe impl Send for MetricsError {} - impl IntoResponse for MetricsError { fn into_response(self) -> axum::response::Response { (http::StatusCode::INTERNAL_SERVER_ERROR, self.to_string()).into_response() From f54dc5c30cf21844f6e2a7e59456f0e367f18c16 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 15:49:17 -0300 Subject: [PATCH 20/85] move where we inc subs for sniffer --- mirrord/agent/src/sniffer.rs | 9 +++++++-- mirrord/agent/src/sniffer/api.rs | 13 +------------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index cbf1a4d578a..84826977877 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -26,7 +26,7 @@ use self::{ use crate::{ error::AgentError, http::HttpVersion, - metrics::MIRROR_CONNECTION_SUBSCRIPTION, + metrics::{MIRROR_CONNECTION_SUBSCRIPTION, MIRROR_PORT_SUBSCRIPTION}, util::{ChannelClosedFuture, ClientId, Subscriptions}, }; @@ -269,6 +269,8 @@ where command: SnifferCommandInner::NewClient(sender), } => { self.handle_new_client(client_id, sender); + + MIRROR_CONNECTION_SUBSCRIPTION.inc(); } SnifferCommand { @@ -277,7 +279,8 @@ where } => { if self.port_subscriptions.subscribe(client_id, port) { self.update_packet_filter()?; - MIRROR_CONNECTION_SUBSCRIPTION.inc(); + + MIRROR_PORT_SUBSCRIPTION.inc(); } let _ = tx.send(port); @@ -289,6 +292,8 @@ where } => { if self.port_subscriptions.unsubscribe(client_id, port) { self.update_packet_filter()?; + + MIRROR_PORT_SUBSCRIPTION.dec(); } } } diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 86951e3aec3..2dea5534dce 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -15,9 +15,7 @@ use tokio_stream::{ }; use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; -use crate::{ - error::AgentError, metrics::MIRROR_PORT_SUBSCRIPTION, util::ClientId, watched_task::TaskStatus, -}; +use crate::{error::AgentError, util::ClientId, watched_task::TaskStatus}; /// Interface used by clients to interact with the /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). Multiple instances of this struct operate @@ -171,26 +169,17 @@ impl TcpSnifferApi { self.send_command(SnifferCommandInner::Subscribe(port, tx)) .await?; self.subscriptions_in_progress.push(rx); - - MIRROR_PORT_SUBSCRIPTION.inc(); - Ok(()) } LayerTcp::PortUnsubscribe(port) => { self.send_command(SnifferCommandInner::UnsubscribePort(port)) .await?; - - MIRROR_PORT_SUBSCRIPTION.dec(); - Ok(()) } LayerTcp::ConnectionUnsubscribe(connection_id) => { self.connections.remove(&connection_id); - - MIRROR_PORT_SUBSCRIPTION.dec(); - Ok(()) } } From f885bdafe01514f23b8d089d560bfa1230ec7821 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 15:49:58 -0300 Subject: [PATCH 21/85] no clone --- mirrord/agent/src/steal/http/filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/steal/http/filter.rs b/mirrord/agent/src/steal/http/filter.rs index caba22302b1..8afcbc85f25 100644 --- a/mirrord/agent/src/steal/http/filter.rs +++ b/mirrord/agent/src/steal/http/filter.rs @@ -3,7 +3,7 @@ use hyper::Request; use tracing::Level; /// Currently supported filtering criterias. -#[derive(Debug, Clone)] +#[derive(Debug)] pub enum HttpFilter { /// Header based filter. /// This [`Regex`] should be used against each header after transforming it to `k: v` format. From 5bb9ca5130077477938727fe735bd3aa277a4faf Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 15:57:19 -0300 Subject: [PATCH 22/85] dec things closer to remove --- mirrord/agent/src/steal/connection.rs | 18 ++-------- mirrord/agent/src/steal/subscriptions.rs | 42 +++++++++++++----------- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index ab30fc61499..ba64471153a 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -591,15 +591,7 @@ impl TcpConnectionStealer { /// connections. #[tracing::instrument(level = "trace", skip(self))] async fn close_client(&mut self, client_id: ClientId) -> AgentResult<(), AgentError> { - let removed_subscriptions = self.port_subscriptions.remove_all(client_id).await?; - - for filtered in removed_subscriptions { - if filtered { - STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); - } else { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); - } - } + self.port_subscriptions.remove_all(client_id).await?; let client = self.clients.remove(&client_id).expect("client not found"); for connection in client.subscribed_connections { @@ -696,13 +688,7 @@ impl TcpConnectionStealer { } Command::PortUnsubscribe(port) => { - if let Some(filtered) = self.port_subscriptions.remove(client_id, port).await? { - if filtered { - STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); - } else { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); - } - } + self.port_subscriptions.remove(client_id, port).await?; } Command::ResponseData(TcpData { diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 0fcf313d7e6..0fe7ccec94c 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -12,7 +12,11 @@ use super::{ http::HttpFilter, ip_tables::{new_iptables, IPTablesWrapper, SafeIpTables}, }; -use crate::{error::AgentError, util::ClientId}; +use crate::{ + error::AgentError, + metrics::{STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION}, + util::ClientId, +}; /// For stealing incoming TCP connections. #[async_trait::async_trait] @@ -225,29 +229,32 @@ impl PortSubscriptions { /// If this method returns an [`Err`], it means that this set is out of sync with the inner /// [`PortRedirector`] and it is no longer usable. It is a caller's responsibility to clean /// up any external state. - pub async fn remove( - &mut self, - client_id: ClientId, - port: Port, - ) -> Result, R::Error> { + pub async fn remove(&mut self, client_id: ClientId, port: Port) -> Result<(), R::Error> { let Entry::Occupied(mut e) = self.subscriptions.entry(port) else { - return Ok(None); + return Ok(()); }; - let (remove_redirect, filtered) = match e.get_mut() { + let remove_redirect = match e.get_mut() { PortSubscription::Unfiltered(subscribed_client) if *subscribed_client == client_id => { e.remove(); - (true, Some(false)) + STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); + + true + } + PortSubscription::Unfiltered(..) => { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); + false } - PortSubscription::Unfiltered(..) => (false, Some(false)), PortSubscription::Filtered(filters) => { filters.remove(&client_id); if filters.is_empty() { e.remove(); - (true, Some(true)) + STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); + true } else { - (false, Some(true)) + STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); + false } } }; @@ -260,7 +267,7 @@ impl PortSubscriptions { } } - Ok(filtered) + Ok(()) } /// Remove all client subscriptions from this set. @@ -274,21 +281,18 @@ impl PortSubscriptions { /// If this method returns an [`Err`], it means that this set is out of sync with the inner /// [`PortRedirector`] and it is no longer usable. It is a caller's responsibility to clean /// up any external state. - pub async fn remove_all(&mut self, client_id: ClientId) -> Result, R::Error> { + pub async fn remove_all(&mut self, client_id: ClientId) -> Result<(), R::Error> { let ports = self .subscriptions .iter() .filter_map(|(k, v)| v.has_client(client_id).then_some(*k)) .collect::>(); - let mut all_removed = Vec::new(); for port in ports { - if let Some(removed) = self.remove(client_id, port).await? { - all_removed.push(removed); - } + self.remove(client_id, port).await?; } - Ok(all_removed) + Ok(()) } /// Return a subscription for the given `port`. From 9d8aebdb0c4db08a57f70a1c7059fea46ccff698 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:58:15 -0300 Subject: [PATCH 23/85] better help MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 99311ca1eac..b9a94f6065f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -11,7 +11,7 @@ use crate::error::AgentError; pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_open_fd_count", - "amount of open fds in mirrord-agent" + "amount of open file descriptors in mirrord-agent file manager" ) .expect("Valid at initialization!") }); From 12e9c96fa99d7c898c94b54049e3ccd05e778d4f Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:58:26 -0300 Subject: [PATCH 24/85] fix help MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index b9a94f6065f..b674f04bd91 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -27,7 +27,7 @@ pub(crate) static MIRROR_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(| pub(crate) static MIRROR_CONNECTION_SUBSCRIPTION: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_mirror_connection_subscription_count", - "amount of connections in steal mode in mirrord-agent" + "amount of connections in mirror mode in mirrord-agent" ) .expect("Valid at initialization!") }); From ab07a8f26b8dc952565fb8d97214140431784c0a Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 16:00:06 -0300 Subject: [PATCH 25/85] no async file manager --- mirrord/agent/src/entrypoint.rs | 2 +- mirrord/agent/src/file.rs | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index c10211eb241..3579a2cf541 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -401,7 +401,7 @@ impl ClientConnectionHandler { async fn handle_client_message(&mut self, message: ClientMessage) -> AgentResult { match message { ClientMessage::FileRequest(req) => { - if let Some(response) = self.file_manager.handle_message(req).await? { + if let Some(response) = self.file_manager.handle_message(req)? { self.respond(DaemonMessage::File(response)) .await .inspect_err(|fail| { diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 4ad1aa89266..7186803aab9 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -136,7 +136,7 @@ pub fn resolve_path + std::fmt::Debug, R: AsRef + std::fmt: impl FileManager { /// Executes the request and returns the response. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - pub(crate) async fn handle_message( + pub(crate) fn handle_message( &mut self, request: FileRequest, ) -> AgentResult> { @@ -147,7 +147,7 @@ impl FileManager { .strip_prefix("/") .inspect_err(|fail| error!("file_worker -> {:#?}", fail))?; - let open_result = self.open(path.into(), open_options).await; + let open_result = self.open(path.into(), open_options); Some(FileResponse::Open(open_result)) } FileRequest::OpenRelative(OpenRelativeFileRequest { @@ -155,7 +155,7 @@ impl FileManager { path, open_options, }) => { - let open_result = self.open_relative(relative_fd, path, open_options).await; + let open_result = self.open_relative(relative_fd, path, open_options); Some(FileResponse::Open(open_result)) } FileRequest::Read(ReadFileRequest { @@ -193,7 +193,7 @@ impl FileManager { let write_result = self.write_limited(remote_fd, start_from, write_bytes); Some(FileResponse::WriteLimited(write_result)) } - FileRequest::Close(CloseFileRequest { fd }) => self.close(fd).await, + FileRequest::Close(CloseFileRequest { fd }) => self.close(fd), FileRequest::Access(AccessFileRequest { pathname, mode }) => { let pathname = pathname .strip_prefix("/") @@ -217,7 +217,7 @@ impl FileManager { // dir operations FileRequest::FdOpenDir(FdOpenDirRequest { remote_fd }) => { - let open_dir_result = self.fdopen_dir(remote_fd).await; + let open_dir_result = self.fdopen_dir(remote_fd); Some(FileResponse::OpenDir(open_dir_result)) } FileRequest::ReadDir(ReadDirRequest { remote_fd }) => { @@ -228,7 +228,7 @@ impl FileManager { let read_dir_result = self.read_dir_batch(remote_fd, amount); Some(FileResponse::ReadDirBatch(read_dir_result)) } - FileRequest::CloseDir(CloseDirRequest { remote_fd }) => self.close_dir(remote_fd).await, + FileRequest::CloseDir(CloseDirRequest { remote_fd }) => self.close_dir(remote_fd), FileRequest::GetDEnts64(GetDEnts64Request { remote_fd, buffer_size, @@ -266,7 +266,7 @@ impl FileManager { at mirrord/agent/src/file.rs:261 */ #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] - async fn open( + fn open( &mut self, path: PathBuf, open_options: OpenOptionsInternal, @@ -295,7 +295,7 @@ impl FileManager { } #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] - async fn open_relative( + fn open_relative( &mut self, relative_fd: u64, path: PathBuf, @@ -572,7 +572,7 @@ impl FileManager { /// Always returns `None`, since we don't return any [`FileResponse`] back to mirrord /// on `close` of an fd. #[tracing::instrument(level = Level::TRACE, skip(self))] - pub(crate) async fn close(&mut self, fd: u64) -> Option { + pub(crate) fn close(&mut self, fd: u64) -> Option { if self.open_files.remove(&fd).is_none() { error!(fd, "fd not found!"); } else { @@ -585,7 +585,7 @@ impl FileManager { /// Always returns `None`, since we don't return any [`FileResponse`] back to mirrord /// on `close_dir` of an fd. #[tracing::instrument(level = Level::TRACE, skip(self))] - pub(crate) async fn close_dir(&mut self, fd: u64) -> Option { + pub(crate) fn close_dir(&mut self, fd: u64) -> Option { if self.dir_streams.remove(&fd).is_none() && self.getdents_streams.remove(&fd).is_none() { error!("FileManager::close_dir -> fd {:#?} not found", fd); } else { @@ -697,7 +697,7 @@ impl FileManager { } #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] - pub(crate) async fn fdopen_dir(&mut self, fd: u64) -> RemoteResult { + pub(crate) fn fdopen_dir(&mut self, fd: u64) -> RemoteResult { let path = match self .open_files .get(&fd) From 8f83012418c846a2b093a8be1845aad4ba225a25 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 16:17:08 -0300 Subject: [PATCH 26/85] split filtered unfiltered steal inc --- mirrord/agent/src/metrics.rs | 24 +++++++++++++++++------- mirrord/agent/src/steal/connection.rs | 6 +++--- mirrord/agent/src/steal/connections.rs | 24 ++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index b674f04bd91..af552df0c23 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -48,13 +48,23 @@ pub(crate) static STEAL_UNFILTERED_PORT_SUBSCRIPTION: LazyLock = LazyL .expect("Valid at initialization!") }); -pub(crate) static STEAL_CONNECTION_SUBSCRIPTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_steal_connection_subscription_count", - "amount of connections in steal mode in mirrord-agent" - ) - .expect("Valid at initialization!") -}); +pub(crate) static STEAL_FILTERED_CONNECTION_SUBSCRIPTION: LazyLock = + LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_steal_connection_subscription_count", + "amount of filtered connections in steal mode in mirrord-agent" + ) + .expect("Valid at initialization!") + }); + +pub(crate) static STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION: LazyLock = + LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_steal_connection_subscription_count", + "amount of unfiltered connections in steal mode in mirrord-agent" + ) + .expect("Valid at initialization!") + }); pub(crate) static TCP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { register_int_gauge!( diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index ba64471153a..9ea6df055ba 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -34,7 +34,7 @@ use tracing::{warn, Level}; use crate::{ error::{AgentError, AgentResult}, metrics::{ - STEAL_CONNECTION_SUBSCRIPTION, STEAL_FILTERED_PORT_SUBSCRIPTION, + STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION, }, steal::{ @@ -360,7 +360,7 @@ impl TcpConnectionStealer { Ok((stream, peer)) => { self.incoming_connection(stream, peer).await?; - STEAL_CONNECTION_SUBSCRIPTION.inc(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); } Err(error) => { tracing::error!(?error, "Failed to accept a stolen connection"); @@ -676,7 +676,7 @@ impl TcpConnectionStealer { ) .await; - STEAL_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); } Command::PortSubscribe(port_steal) => { diff --git a/mirrord/agent/src/steal/connections.rs b/mirrord/agent/src/steal/connections.rs index bc47eb80e4b..a9a56fa4799 100644 --- a/mirrord/agent/src/steal/connections.rs +++ b/mirrord/agent/src/steal/connections.rs @@ -11,10 +11,16 @@ use tokio::{ sync::mpsc::{self, error::SendError, Receiver, Sender}, task::JoinSet, }; +use tracing::Level; use self::{filtered::DynamicBody, unfiltered::UnfilteredStealTask}; use super::{http::DefaultReversibleStream, subscriptions::PortSubscription}; -use crate::{http::HttpVersion, steal::connections::filtered::FilteredStealTask, util::ClientId}; +use crate::{ + http::HttpVersion, + metrics::{STEAL_FILTERED_CONNECTION_SUBSCRIPTION, STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION}, + steal::connections::filtered::FilteredStealTask, + util::ClientId, +}; mod filtered; mod unfiltered; @@ -287,7 +293,7 @@ impl StolenConnections { /// Adds the given [`StolenConnection`] to this set. Spawns a new [`tokio::task`] that will /// manage it. - #[tracing::instrument(level = "trace", name = "manage_stolen_connection", skip(self))] + #[tracing::instrument(level = Level::TRACE, name = "manage_stolen_connection", skip(self))] pub fn manage(&mut self, connection: StolenConnection) { let connection_id = self.next_connection_id; self.next_connection_id += 1; @@ -295,6 +301,8 @@ impl StolenConnections { let (task_tx, task_rx) = mpsc::channel(Self::TASK_IN_CHANNEL_CAPACITY); let main_tx = self.main_tx.clone(); + let filtered = matches!(connection.port_subscription, PortSubscription::Filtered(..)); + tracing::trace!(connection_id, "Spawning connection task"); self.tasks.spawn(async move { let task = ConnectionTask { @@ -304,9 +312,21 @@ impl StolenConnections { rx: task_rx, }; + if filtered { + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); + } else { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); + } + match task.run().await { Ok(()) => { tracing::trace!(connection_id, "Connection task finished"); + + if filtered { + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + } else { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + } } Err(error) => { tracing::trace!(connection_id, ?error, "Connection task failed"); From cd0b443e301ff2ee019a818fc88fd9d4a0ff4843 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Fri, 20 Dec 2024 16:18:01 -0300 Subject: [PATCH 27/85] remove unfiltered inc from wrong place --- mirrord/agent/src/steal/connection.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 9ea6df055ba..650fcb41aa4 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -359,8 +359,6 @@ impl TcpConnectionStealer { accept = self.port_subscriptions.next_connection() => match accept { Ok((stream, peer)) => { self.incoming_connection(stream, peer).await?; - - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); } Err(error) => { tracing::error!(?error, "Failed to accept a stolen connection"); @@ -675,8 +673,6 @@ impl TcpConnectionStealer { ConnectionMessageIn::Unsubscribed { client_id }, ) .await; - - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); } Command::PortSubscribe(port_steal) => { From 020129ac47586ebabfee8f0c97c331a01e991139 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 11:37:06 -0300 Subject: [PATCH 28/85] cancellation token --- mirrord/agent/src/entrypoint.rs | 17 +++++++++-------- mirrord/agent/src/metrics.rs | 15 +++++++++++---- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 3579a2cf541..80bd0a0174a 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -493,14 +493,6 @@ impl ClientConnectionHandler { async fn start_agent(args: Args) -> AgentResult<()> { trace!("start_agent -> Starting agent with args: {args:?}"); - if let Some(metrics_address) = args.metrics.clone() { - tokio::spawn(async move { - start_metrics(metrics_address) - .await - .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) - }); - } - let listener = TcpListener::bind(SocketAddrV4::new( Ipv4Addr::UNSPECIFIED, args.communicate_port, @@ -514,6 +506,15 @@ async fn start_agent(args: Args) -> AgentResult<()> { // To make sure that background tasks are cancelled when we exit early from this function. let cancel_guard = cancellation_token.clone().drop_guard(); + if let Some(metrics_address) = args.metrics.clone() { + let cancellation_token = cancellation_token.clone(); + tokio::spawn(async move { + start_metrics(metrics_address, cancellation_token) + .await + .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) + }); + } + let (sniffer_command_tx, sniffer_command_rx) = mpsc::channel::(1000); let (stealer_command_tx, stealer_command_rx) = mpsc::channel::(1000); let (dns_command_tx, dns_command_rx) = mpsc::channel::(1000); diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index af552df0c23..6878e8fb2dd 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -4,6 +4,7 @@ use axum::{response::IntoResponse, routing::get, Router}; use prometheus::{register_int_gauge, IntGauge}; use thiserror::Error; use tokio::net::TcpListener; +use tokio_util::sync::CancellationToken; use tracing::Level; use crate::error::AgentError; @@ -112,7 +113,10 @@ async fn get_metrics() -> Result { } #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] -pub(crate) async fn start_metrics(address: SocketAddr) -> Result<(), axum::BoxError> { +pub(crate) async fn start_metrics( + address: SocketAddr, + cancellation_token: CancellationToken, +) -> Result<(), axum::BoxError> { let app = Router::new().route("/metrics", get(get_metrics)); let listener = TcpListener::bind(address) @@ -120,10 +124,13 @@ pub(crate) async fn start_metrics(address: SocketAddr) -> Result<(), axum::BoxEr .map_err(AgentError::from) .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; - let _ = axum::serve(listener, app).await.inspect_err(|fail| { - tracing::error!(%fail, "Could not start agent metrics + let _ = axum::serve(listener, app) + .with_graceful_shutdown(async move { cancellation_token.cancelled().await }) + .await + .inspect_err(|fail| { + tracing::error!(%fail, "Could not start agent metrics server!") - })?; + })?; Ok(()) } From 270ce31e4d18690c6bafe085ba19f3452fb62934 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:01:12 -0300 Subject: [PATCH 29/85] unit test for metrics --- Cargo.lock | 1 + mirrord/agent/Cargo.toml | 1 + mirrord/agent/src/metrics.rs | 41 ++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 3ae46051709..4966ad80c7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4246,6 +4246,7 @@ dependencies = [ "rawsocket", "rcgen", "regex", + "reqwest 0.12.9", "rstest", "rustls 0.23.19", "semver 1.0.23", diff --git a/mirrord/agent/Cargo.toml b/mirrord/agent/Cargo.toml index e4757fccafe..cd2f5779123 100644 --- a/mirrord/agent/Cargo.toml +++ b/mirrord/agent/Cargo.toml @@ -84,3 +84,4 @@ rstest.workspace = true mockall = "0.13" test_bin = "0.4" rcgen.workspace = true +reqwest.workspace = true diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 6878e8fb2dd..c75aff3eed0 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -134,3 +134,44 @@ pub(crate) async fn start_metrics( Ok(()) } + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use tokio_util::sync::CancellationToken; + + use super::OPEN_FD_COUNT; + use crate::metrics::start_metrics; + + #[tokio::test] + async fn test_metrics() { + let metrics_address = "127.0.0.1:9000".parse().unwrap(); + let cancellation_token = CancellationToken::new(); + + let metrics_cancellation = cancellation_token.child_token(); + tokio::spawn(async move { + start_metrics(metrics_address, metrics_cancellation) + .await + .unwrap() + }); + + OPEN_FD_COUNT.inc(); + + // Give the server some time to start. + tokio::time::sleep(Duration::from_secs(1)).await; + + let get_all_metrics = reqwest::get("http://127.0.0.1:9000/metrics") + .await + .unwrap() + .error_for_status() + .unwrap() + .text() + .await + .unwrap(); + + assert!(get_all_metrics.contains("mirrord_agent_open_fd_count 1")); + + cancellation_token.drop_guard(); + } +} From 21d8ce1f45f1412e7644b87fe234c8af5936cac4 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:02:01 -0300 Subject: [PATCH 30/85] remove unused --- mirrord/agent/src/steal/connection.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 650fcb41aa4..1ea50cf4913 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -33,10 +33,7 @@ use tracing::{warn, Level}; use crate::{ error::{AgentError, AgentResult}, - metrics::{ - STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION, - STEAL_UNFILTERED_PORT_SUBSCRIPTION, - }, + metrics::{STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION}, steal::{ connections::{ ConnectionMessageIn, ConnectionMessageOut, StolenConnection, StolenConnections, From 54702c49f33bd36d99ae247f89ad0cba64d1ed7f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:03:06 -0300 Subject: [PATCH 31/85] rustfmt --- mirrord/agent/src/steal/api.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mirrord/agent/src/steal/api.rs b/mirrord/agent/src/steal/api.rs index 7edaa0900b5..64905576b45 100644 --- a/mirrord/agent/src/steal/api.rs +++ b/mirrord/agent/src/steal/api.rs @@ -108,7 +108,10 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::PortSubscribe`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn port_subscribe(&mut self, port_steal: StealType) -> AgentResult<(), AgentError> { + pub(crate) async fn port_subscribe( + &mut self, + port_steal: StealType, + ) -> AgentResult<(), AgentError> { self.send_command(Command::PortSubscribe(port_steal)).await } @@ -159,7 +162,10 @@ impl TcpStealerApi { .await } - pub(crate) async fn handle_client_message(&mut self, message: LayerTcpSteal) -> AgentResult<()> { + pub(crate) async fn handle_client_message( + &mut self, + message: LayerTcpSteal, + ) -> AgentResult<()> { match message { LayerTcpSteal::PortSubscribe(port_steal) => self.port_subscribe(port_steal).await, LayerTcpSteal::ConnectionUnsubscribe(connection_id) => { From 485b41987377b901c9701a0ada66506f87b4be8f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:03:37 -0300 Subject: [PATCH 32/85] schema --- mirrord-schema.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mirrord-schema.json b/mirrord-schema.json index ec3d68d4766..cfc92f4f14e 100644 --- a/mirrord-schema.json +++ b/mirrord-schema.json @@ -255,7 +255,7 @@ "properties": { "annotations": { "title": "agent.annotations {#agent-annotations}", - "description": "Allows setting up custom annotations for the agent Job and Pod.\n\n```json { \"annotations\": { \"cats.io/inject\": \"enabled\" } } ```", + "description": "Allows setting up custom annotations for the agent Job and Pod.\n\n```json { \"annotations\": { \"cats.io/inject\": \"enabled\" \"prometheus.io/scrape\": \"true\", \"prometheus.io/port\": \"9000\" } } ```", "type": [ "object", "null" @@ -378,6 +378,14 @@ "null" ] }, + "metrics": { + "title": "agent.metrics {#agent-metrics}", + "description": "Enables prometheus metrics for the agent pod.\n\nYou might need to add annotations to the agent pod depending on how prometheus is configured to scrape for metrics.\n\n```json { \"metrics\": \"0.0.0.0:9000\" } ```", + "type": [ + "string", + "null" + ] + }, "namespace": { "title": "agent.namespace {#agent-namespace}", "description": "Namespace where the agent shall live. Note: Doesn't work with ephemeral containers. Defaults to the current kubernetes namespace.", From 559611991acbd8c3846a3abfe3ba27426196ae7c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:17:44 -0300 Subject: [PATCH 33/85] schema 2 --- mirrord-schema.json | 2 +- mirrord/config/src/lib.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mirrord-schema.json b/mirrord-schema.json index cfc92f4f14e..2340d40ea8c 100644 --- a/mirrord-schema.json +++ b/mirrord-schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "LayerFileConfig", - "description": "mirrord allows for a high degree of customization when it comes to which features you want to enable, and how they should function.\n\nAll of the configuration fields have a default value, so a minimal configuration would be no configuration at all.\n\nThe configuration supports templating using the [Tera](https://keats.github.io/tera/docs/) template engine. Currently we don't provide additional values to the context, if you have anything you want us to provide please let us know.\n\nTo use a configuration file in the CLI, use the `-f ` flag. Or if using VSCode Extension or JetBrains plugin, simply create a `.mirrord/mirrord.json` file or use the UI.\n\nTo help you get started, here are examples of a basic configuration file, and a complete configuration file containing all fields.\n\n### Basic `config.json` {#root-basic}\n\n```json { \"target\": \"pod/bear-pod\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Basic `config.json` with templating {#root-basic-templating}\n\n```json { \"target\": \"{{ get_env(name=\"TARGET\", default=\"pod/fallback\") }}\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Complete `config.json` {#root-complete}\n\nDon't use this example as a starting point, it's just here to show you all the available options. ```json { \"accept_invalid_certificates\": false, \"skip_processes\": \"ide-debugger\", \"target\": { \"path\": \"pod/bear-pod\", \"namespace\": \"default\" }, \"connect_tcp\": null, \"agent\": { \"log_level\": \"info\", \"json_log\": false, \"labels\": { \"user\": \"meow\" }, \"annotations\": { \"cats.io/inject\": \"enabled\" }, \"namespace\": \"default\", \"image\": \"ghcr.io/metalbear-co/mirrord:latest\", \"image_pull_policy\": \"IfNotPresent\", \"image_pull_secrets\": [ { \"secret-key\": \"secret\" } ], \"ttl\": 30, \"ephemeral\": false, \"communication_timeout\": 30, \"startup_timeout\": 360, \"network_interface\": \"eth0\", \"flush_connections\": true }, \"feature\": { \"env\": { \"include\": \"DATABASE_USER;PUBLIC_ENV\", \"exclude\": \"DATABASE_PASSWORD;SECRET_ENV\", \"override\": { \"DATABASE_CONNECTION\": \"db://localhost:7777/my-db\", \"LOCAL_BEAR\": \"panda\" }, \"mapping\": { \".+_TIMEOUT\": \"1000\" } }, \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\\\.json\" , \"read_only\": [ \".+\\\\.yaml\", \".+important-file\\\\.txt\" ], \"local\": [ \".+\\\\.js\", \".+\\\\.mjs\" ] }, \"network\": { \"incoming\": { \"mode\": \"steal\", \"http_filter\": { \"header_filter\": \"host: api\\\\..+\" }, \"port_mapping\": [[ 7777, 8888 ]], \"ignore_localhost\": false, \"ignore_ports\": [9999, 10000] }, \"outgoing\": { \"tcp\": true, \"udp\": true, \"filter\": { \"local\": [\"tcp://1.1.1.0/24:1337\", \"1.1.5.0/24\", \"google.com\", \":53\"] }, \"ignore_localhost\": false, \"unix_streams\": \"bear.+\" }, \"dns\": { \"enabled\": true, \"filter\": { \"local\": [\"1.1.1.0/24:1337\", \"1.1.5.0/24\", \"google.com\"] } } }, \"copy_target\": { \"scale_down\": false } }, \"operator\": true, \"kubeconfig\": \"~/.kube/config\", \"sip_binaries\": \"bash\", \"telemetry\": true, \"kube_context\": \"my-cluster\" } ```\n\n# Options {#root-options}", + "description": "mirrord allows for a high degree of customization when it comes to which features you want to enable, and how they should function.\n\nAll of the configuration fields have a default value, so a minimal configuration would be no configuration at all.\n\nThe configuration supports templating using the [Tera](https://keats.github.io/tera/docs/) template engine. Currently we don't provide additional values to the context, if you have anything you want us to provide please let us know.\n\nTo use a configuration file in the CLI, use the `-f ` flag. Or if using VSCode Extension or JetBrains plugin, simply create a `.mirrord/mirrord.json` file or use the UI.\n\nTo help you get started, here are examples of a basic configuration file, and a complete configuration file containing all fields.\n\n### Basic `config.json` {#root-basic}\n\n```json { \"target\": \"pod/bear-pod\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Basic `config.json` with templating {#root-basic-templating}\n\n```json { \"target\": \"{{ get_env(name=\"TARGET\", default=\"pod/fallback\") }}\", \"feature\": { \"env\": true, \"fs\": \"read\", \"network\": true } } ```\n\n### Complete `config.json` {#root-complete}\n\nDon't use this example as a starting point, it's just here to show you all the available options. ```json { \"accept_invalid_certificates\": false, \"skip_processes\": \"ide-debugger\", \"target\": { \"path\": \"pod/bear-pod\", \"namespace\": \"default\" }, \"connect_tcp\": null, \"agent\": { \"log_level\": \"info\", \"json_log\": false, \"labels\": { \"user\": \"meow\" }, \"annotations\": { \"cats.io/inject\": \"enabled\" }, \"namespace\": \"default\", \"image\": \"ghcr.io/metalbear-co/mirrord:latest\", \"image_pull_policy\": \"IfNotPresent\", \"image_pull_secrets\": [ { \"secret-key\": \"secret\" } ], \"ttl\": 30, \"ephemeral\": false, \"communication_timeout\": 30, \"startup_timeout\": 360, \"network_interface\": \"eth0\", \"flush_connections\": true, \"metrics\": \"0.0.0.0:9000\", }, \"feature\": { \"env\": { \"include\": \"DATABASE_USER;PUBLIC_ENV\", \"exclude\": \"DATABASE_PASSWORD;SECRET_ENV\", \"override\": { \"DATABASE_CONNECTION\": \"db://localhost:7777/my-db\", \"LOCAL_BEAR\": \"panda\" }, \"mapping\": { \".+_TIMEOUT\": \"1000\" } }, \"fs\": { \"mode\": \"write\", \"read_write\": \".+\\\\.json\" , \"read_only\": [ \".+\\\\.yaml\", \".+important-file\\\\.txt\" ], \"local\": [ \".+\\\\.js\", \".+\\\\.mjs\" ] }, \"network\": { \"incoming\": { \"mode\": \"steal\", \"http_filter\": { \"header_filter\": \"host: api\\\\..+\" }, \"port_mapping\": [[ 7777, 8888 ]], \"ignore_localhost\": false, \"ignore_ports\": [9999, 10000] }, \"outgoing\": { \"tcp\": true, \"udp\": true, \"filter\": { \"local\": [\"tcp://1.1.1.0/24:1337\", \"1.1.5.0/24\", \"google.com\", \":53\"] }, \"ignore_localhost\": false, \"unix_streams\": \"bear.+\" }, \"dns\": { \"enabled\": true, \"filter\": { \"local\": [\"1.1.1.0/24:1337\", \"1.1.5.0/24\", \"google.com\"] } } }, \"copy_target\": { \"scale_down\": false } }, \"operator\": true, \"kubeconfig\": \"~/.kube/config\", \"sip_binaries\": \"bash\", \"telemetry\": true, \"kube_context\": \"my-cluster\" } ```\n\n# Options {#root-options}", "type": "object", "properties": { "accept_invalid_certificates": { diff --git a/mirrord/config/src/lib.rs b/mirrord/config/src/lib.rs index 2cee1d8de4e..a191975f752 100644 --- a/mirrord/config/src/lib.rs +++ b/mirrord/config/src/lib.rs @@ -113,7 +113,8 @@ pub static MIRRORD_CONFIG_FILE_ENV: &str = "MIRRORD_CONFIG_FILE"; /// "communication_timeout": 30, /// "startup_timeout": 360, /// "network_interface": "eth0", -/// "flush_connections": true +/// "flush_connections": true, +/// "metrics": "0.0.0.0:9000", /// }, /// "feature": { /// "env": { From 9fae4e9387ffaace5a942211908cee3287e59a7c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:18:23 -0300 Subject: [PATCH 34/85] md --- mirrord/config/configuration.md | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/mirrord/config/configuration.md b/mirrord/config/configuration.md index df0788ada78..ec32b677e86 100644 --- a/mirrord/config/configuration.md +++ b/mirrord/config/configuration.md @@ -68,7 +68,8 @@ configuration file containing all fields. "communication_timeout": 30, "startup_timeout": 360, "network_interface": "eth0", - "flush_connections": true + "flush_connections": true, + "metrics": "0.0.0.0:9000", }, "feature": { "env": { @@ -166,7 +167,11 @@ Allows setting up custom annotations for the agent Job and Pod. ```json { - "annotations": { "cats.io/inject": "enabled" } + "annotations": { + "cats.io/inject": "enabled" + "prometheus.io/scrape": "true", + "prometheus.io/port": "9000" + } } ``` @@ -299,6 +304,19 @@ with `RUST_LOG`. } ``` +### agent.metrics {#agent-metrics} + +Enables prometheus metrics for the agent pod. + +You might need to add annotations to the agent pod depending on how prometheus is +configured to scrape for metrics. + +```json +{ + "metrics": "0.0.0.0:9000" +} +``` + ### agent.namespace {#agent-namespace} Namespace where the agent shall live. From 22231cf1b8886467f220adc378580469fd49fad8 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:21:39 -0300 Subject: [PATCH 35/85] appease clippy --- mirrord/agent/src/entrypoint.rs | 8 ++++---- mirrord/agent/src/metrics.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 80bd0a0174a..62d09177677 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -506,7 +506,7 @@ async fn start_agent(args: Args) -> AgentResult<()> { // To make sure that background tasks are cancelled when we exit early from this function. let cancel_guard = cancellation_token.clone().drop_guard(); - if let Some(metrics_address) = args.metrics.clone() { + if let Some(metrics_address) = args.metrics { let cancellation_token = cancellation_token.clone(); tokio::spawn(async move { start_metrics(metrics_address, cancellation_token) @@ -813,9 +813,9 @@ async fn start_iptable_guard(args: Args) -> AgentResult<()> { /// Since the _second_ agent gets spawned as a child of the _first_, they share resources, /// like the `namespace`, which means: /// -/// 1. If you try to `bind` a socket to some address before [`start_agent`], it'll actually -/// be bound **twice**, which incurs an error (address already in use). You could get around -/// this by `bind`ing on `0.0.0.0:0`, but this is most likely **not** what you want. +/// 1. If you try to `bind` a socket to some address before [`start_agent`], it'll actually be bound +/// **twice**, which incurs an error (address already in use). You could get around this by +/// `bind`ing on `0.0.0.0:0`, but this is most likely **not** what you want. pub async fn main() -> AgentResult<()> { rustls::crypto::CryptoProvider::install_default(rustls::crypto::aws_lc_rs::default_provider()) .expect("Failed to install crypto provider"); diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index c75aff3eed0..216efed135f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -124,7 +124,7 @@ pub(crate) async fn start_metrics( .map_err(AgentError::from) .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; - let _ = axum::serve(listener, app) + axum::serve(listener, app) .with_graceful_shutdown(async move { cancellation_token.cancelled().await }) .await .inspect_err(|fail| { From 3442a6fcfdc77518132b23c6eba1686bb416d069 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 23 Dec 2024 12:48:45 -0300 Subject: [PATCH 36/85] crypto provider for test --- mirrord/agent/src/client_connection.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mirrord/agent/src/client_connection.rs b/mirrord/agent/src/client_connection.rs index 8181e4baabd..7b484cc25da 100644 --- a/mirrord/agent/src/client_connection.rs +++ b/mirrord/agent/src/client_connection.rs @@ -208,7 +208,7 @@ enum ConnectionFramed { #[cfg(test)] mod test { - use std::sync::Arc; + use std::sync::{Arc, Once}; use futures::StreamExt; use mirrord_protocol::ClientCodec; @@ -220,10 +220,19 @@ mod test { use super::*; + static CRYPTO_PROVIDER: Once = Once::new(); + /// Verifies that [`AgentTlsConnector`] correctly accepts a /// connection from a server using the provided certificate. #[tokio::test] async fn agent_tls_connector_valid_cert() { + CRYPTO_PROVIDER.call_once(|| { + rustls::crypto::CryptoProvider::install_default( + rustls::crypto::aws_lc_rs::default_provider(), + ) + .expect("Failed to install crypto provider") + }); + let cert = rcgen::generate_simple_self_signed(vec!["operator".to_string()]).unwrap(); let cert_bytes = cert.cert.der(); let key_bytes = cert.key_pair.serialize_der(); @@ -269,6 +278,13 @@ mod test { /// connection from a server using some other certificate. #[tokio::test] async fn agent_tls_connector_invalid_cert() { + CRYPTO_PROVIDER.call_once(|| { + rustls::crypto::CryptoProvider::install_default( + rustls::crypto::aws_lc_rs::default_provider(), + ) + .expect("Failed to install crypto provider") + }); + let server_cert = rcgen::generate_simple_self_signed(vec!["operator".to_string()]).unwrap(); let cert_bytes = server_cert.cert.der(); let key_bytes = server_cert.key_pair.serialize_der(); From 028f78c77b345d3fa67db455038f70a481a0bd52 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 12:48:46 -0300 Subject: [PATCH 37/85] encode_to_string // cancellation on error --- mirrord/agent/src/metrics.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 216efed135f..f1b0ab8b742 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -100,16 +100,11 @@ impl IntoResponse for MetricsError { #[tracing::instrument(level = Level::TRACE, ret, err)] async fn get_metrics() -> Result { - use prometheus::{Encoder, TextEncoder}; + use prometheus::TextEncoder; let metric_families = prometheus::gather(); - let mut buffer = Vec::new(); - TextEncoder - .encode(&metric_families, &mut buffer) - .inspect_err(|error| tracing::error!(%error, "unable to encode prometheus metrics"))?; - - Ok(String::from_utf8(buffer)?) + Ok(TextEncoder.encode_to_string(&metric_families)?) } #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] @@ -124,12 +119,14 @@ pub(crate) async fn start_metrics( .map_err(AgentError::from) .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; + let cancel_on_error = cancellation_token.clone(); axum::serve(listener, app) .with_graceful_shutdown(async move { cancellation_token.cancelled().await }) .await .inspect_err(|fail| { tracing::error!(%fail, "Could not start agent metrics - server!") + server!"); + cancel_on_error.cancel(); })?; Ok(()) From 162232375913d599c12e0c429b8cf8df926ce136 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Tue, 24 Dec 2024 12:50:16 -0300 Subject: [PATCH 38/85] better error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index f1b0ab8b742..1ab926f454f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -117,7 +117,7 @@ pub(crate) async fn start_metrics( let listener = TcpListener::bind(address) .await .map_err(AgentError::from) - .inspect_err(|fail| tracing::error!(?fail, "Actor listener!"))?; + .inspect_err(|fail| tracing::error!(?fail, "Failed to bind TCP socket for metrics server"))?; let cancel_on_error = cancellation_token.clone(); axum::serve(listener, app) From c873dcca75d6fb62432cf7b01947439a7c7424b6 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 12:55:35 -0300 Subject: [PATCH 39/85] no more metricserror --- mirrord/agent/src/metrics.rs | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 1ab926f454f..4f3221fafcd 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -1,8 +1,8 @@ use std::{net::SocketAddr, sync::LazyLock}; -use axum::{response::IntoResponse, routing::get, Router}; +use axum::{routing::get, Router}; +use http::StatusCode; use prometheus::{register_int_gauge, IntGauge}; -use thiserror::Error; use tokio::net::TcpListener; use tokio_util::sync::CancellationToken; use tracing::Level; @@ -83,28 +83,18 @@ pub(crate) static UDP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| .expect("Valid at initialization!") }); -#[derive(Error, Debug)] -pub(crate) enum MetricsError { - #[error(transparent)] - FromUtf8(#[from] std::string::FromUtf8Error), - - #[error(transparent)] - Prometheus(#[from] prometheus::Error), -} - -impl IntoResponse for MetricsError { - fn into_response(self) -> axum::response::Response { - (http::StatusCode::INTERNAL_SERVER_ERROR, self.to_string()).into_response() - } -} - -#[tracing::instrument(level = Level::TRACE, ret, err)] -async fn get_metrics() -> Result { +#[tracing::instrument(level = Level::TRACE, ret)] +async fn get_metrics() -> (StatusCode, String) { use prometheus::TextEncoder; let metric_families = prometheus::gather(); - - Ok(TextEncoder.encode_to_string(&metric_families)?) + match TextEncoder.encode_to_string(&metric_families) { + Ok(response) => (StatusCode::OK, response), + Err(fail) => { + tracing::error!(?fail, "Failed GET /metrics"); + (StatusCode::INTERNAL_SERVER_ERROR, fail.to_string()) + } + } } #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] @@ -117,7 +107,9 @@ pub(crate) async fn start_metrics( let listener = TcpListener::bind(address) .await .map_err(AgentError::from) - .inspect_err(|fail| tracing::error!(?fail, "Failed to bind TCP socket for metrics server"))?; + .inspect_err(|fail| { + tracing::error!(?fail, "Failed to bind TCP socket for metrics server") + })?; let cancel_on_error = cancellation_token.clone(); axum::serve(listener, app) From 1fb1fef5a04b0939af932cda552bd0628396a38e Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 12:56:29 -0300 Subject: [PATCH 40/85] outdated docs --- mirrord/agent/src/steal/subscriptions.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 0fe7ccec94c..2bbae45ed8d 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -219,11 +219,6 @@ impl PortSubscriptions { /// * `client_id` - identifier of the client that issued the subscription /// * `port` - number of the subscription port /// - /// # Returns - /// - /// `Some(true)` if the subscprition has an HTTP filter, `Some(false)` if it's unfiltered, and - /// `None` if we could not find the [`PortSubscription`]. - /// /// # Warning /// /// If this method returns an [`Err`], it means that this set is out of sync with the inner From ff303db498e218e60e40e7719859d3dd02e4dc98 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 13:13:38 -0300 Subject: [PATCH 41/85] connection metrics to run tasks --- mirrord/agent/src/sniffer.rs | 3 +++ mirrord/agent/src/steal/connections.rs | 16 ++-------------- mirrord/agent/src/steal/connections/filtered.rs | 10 +++++++++- .../agent/src/steal/connections/unfiltered.rs | 17 ++++++++++++++++- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index 84826977877..9ea28db8684 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -240,6 +240,9 @@ where self.update_packet_filter()?; } + MIRROR_PORT_SUBSCRIPTION.dec(); + MIRROR_CONNECTION_SUBSCRIPTION.dec(); + Ok(()) } diff --git a/mirrord/agent/src/steal/connections.rs b/mirrord/agent/src/steal/connections.rs index a9a56fa4799..c6f3493a59f 100644 --- a/mirrord/agent/src/steal/connections.rs +++ b/mirrord/agent/src/steal/connections.rs @@ -301,8 +301,6 @@ impl StolenConnections { let (task_tx, task_rx) = mpsc::channel(Self::TASK_IN_CHANNEL_CAPACITY); let main_tx = self.main_tx.clone(); - let filtered = matches!(connection.port_subscription, PortSubscription::Filtered(..)); - tracing::trace!(connection_id, "Spawning connection task"); self.tasks.spawn(async move { let task = ConnectionTask { @@ -312,21 +310,9 @@ impl StolenConnections { rx: task_rx, }; - if filtered { - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); - } else { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); - } - match task.run().await { Ok(()) => { tracing::trace!(connection_id, "Connection task finished"); - - if filtered { - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); - } else { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); - } } Err(error) => { tracing::trace!(connection_id, ?error, "Connection task failed"); @@ -484,6 +470,8 @@ impl ConnectionTask { stream: self.connection.stream, }; + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); + task.run(self.tx, &mut self.rx).await } diff --git a/mirrord/agent/src/steal/connections/filtered.rs b/mirrord/agent/src/steal/connections/filtered.rs index b30e48a5757..c70dc1214ee 100644 --- a/mirrord/agent/src/steal/connections/filtered.rs +++ b/mirrord/agent/src/steal/connections/filtered.rs @@ -31,6 +31,7 @@ use tracing::Level; use super::{ConnectionMessageIn, ConnectionMessageOut, ConnectionTaskError}; use crate::{ http::HttpVersion, + metrics::STEAL_FILTERED_CONNECTION_SUBSCRIPTION, steal::{connections::unfiltered::UnfilteredStealTask, http::HttpFilter}, util::ClientId, }; @@ -619,6 +620,8 @@ where // PROTOCOLS` response. let mut queued_raw_data: HashMap>> = Default::default(); + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); + loop { tokio::select! { message = rx.recv() => match message.ok_or(ConnectionTaskError::RecvError)? { @@ -638,6 +641,8 @@ where queued_raw_data.remove(&client_id); self.subscribed.insert(client_id, false); self.blocked_requests.retain(|key, _| key.0 != client_id); + + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); }, }, @@ -646,7 +651,10 @@ where // No more requests from the `FilteringService`. // HTTP connection is closed and possibly upgraded. - None => break, + None => { + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + break + } } } } diff --git a/mirrord/agent/src/steal/connections/unfiltered.rs b/mirrord/agent/src/steal/connections/unfiltered.rs index 5b6676094c3..9c7cb9b0b0c 100644 --- a/mirrord/agent/src/steal/connections/unfiltered.rs +++ b/mirrord/agent/src/steal/connections/unfiltered.rs @@ -7,7 +7,10 @@ use tokio::{ sync::mpsc::{Receiver, Sender}, }; -use super::{ConnectionMessageIn, ConnectionMessageOut, ConnectionTaskError}; +use super::{ + ConnectionMessageIn, ConnectionMessageOut, ConnectionTaskError, + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION, +}; use crate::util::ClientId; /// Manages an unfiltered stolen connection. @@ -35,11 +38,15 @@ impl UnfilteredStealTask { let mut buf = BytesMut::with_capacity(64 * 1024); let mut reading_closed = false; + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); + loop { tokio::select! { read = self.stream.read_buf(&mut buf), if !reading_closed => match read { Ok(..) => { if buf.is_empty() { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + tracing::trace!( client_id = self.client_id, connection_id = self.connection_id, @@ -63,6 +70,8 @@ impl UnfilteredStealTask { Err(e) if e.kind() == ErrorKind::WouldBlock => {} Err(e) => { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + tx.send(ConnectionMessageOut::Closed { client_id: self.client_id, connection_id: self.connection_id @@ -85,6 +94,8 @@ impl UnfilteredStealTask { ConnectionMessageIn::Raw { data, .. } => { let res = if data.is_empty() { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + tracing::trace!( client_id = self.client_id, connection_id = self.connection_id, @@ -97,6 +108,8 @@ impl UnfilteredStealTask { }; if let Err(e) = res { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + tx.send(ConnectionMessageOut::Closed { client_id: self.client_id, connection_id: self.connection_id @@ -115,6 +128,8 @@ impl UnfilteredStealTask { }, ConnectionMessageIn::Unsubscribed { .. } => { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + return Ok(()); } } From 078bc76696fab8aa690bf8b0f4cdd8b1fe5efb3e Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 13:36:15 -0300 Subject: [PATCH 42/85] unused --- mirrord/agent/src/steal/connections.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mirrord/agent/src/steal/connections.rs b/mirrord/agent/src/steal/connections.rs index c6f3493a59f..37ac53f759e 100644 --- a/mirrord/agent/src/steal/connections.rs +++ b/mirrord/agent/src/steal/connections.rs @@ -16,10 +16,8 @@ use tracing::Level; use self::{filtered::DynamicBody, unfiltered::UnfilteredStealTask}; use super::{http::DefaultReversibleStream, subscriptions::PortSubscription}; use crate::{ - http::HttpVersion, - metrics::{STEAL_FILTERED_CONNECTION_SUBSCRIPTION, STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION}, - steal::connections::filtered::FilteredStealTask, - util::ClientId, + http::HttpVersion, metrics::STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION, + steal::connections::filtered::FilteredStealTask, util::ClientId, }; mod filtered; From c6caa148e8c9b5c5ce6b0273c3fc5e2f2000a0e1 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 24 Dec 2024 15:12:53 -0300 Subject: [PATCH 43/85] remove todo --- mirrord/agent/src/file.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 7186803aab9..1f25bffaaaf 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -260,11 +260,6 @@ impl FileManager { } } - // TODO(alex) [mid]: Fails with the wrong error? - /* - mirrord_agent::file: error: IO failed for remote operation with `Failed performing `getaddrinfo` with Some(2) and kind NotFound!! - at mirrord/agent/src/file.rs:261 - */ #[tracing::instrument(level = Level::TRACE, skip(self), err(level = Level::DEBUG))] fn open( &mut self, From d2420b61774d6a042616f4883e2d7af3e24058da Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:50:20 -0300 Subject: [PATCH 44/85] line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/metrics.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 4f3221fafcd..18f4c0910b4 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -116,8 +116,7 @@ pub(crate) async fn start_metrics( .with_graceful_shutdown(async move { cancellation_token.cancelled().await }) .await .inspect_err(|fail| { - tracing::error!(%fail, "Could not start agent metrics - server!"); + tracing::error!(%fail, "Could not start agent metrics server!"); cancel_on_error.cancel(); })?; From 33633d2ac91871f09c8849db554dd55f131840a2 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:50:58 -0300 Subject: [PATCH 45/85] inc not dec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/file.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 1f25bffaaaf..0ac6951bd8f 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -710,7 +710,7 @@ impl FileManager { let dir_stream = path.read_dir()?.enumerate(); if self.dir_streams.insert(fd, dir_stream).is_none() { - OPEN_FD_COUNT.dec(); + OPEN_FD_COUNT.inc(); } Ok(OpenDirResponse { fd }) From e50e047a29553de4e397d6bee7062903d623db5d Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:51:31 -0300 Subject: [PATCH 46/85] inc stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/file.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 0ac6951bd8f..cf203f6b889 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -773,7 +773,7 @@ impl FileManager { let current_and_parent = Self::get_current_and_parent_entries(dir); let stream = GetDEnts64Stream::new(dir.read_dir()?, current_and_parent).peekable(); - // TODO(alex) [mid]: Do we also want to count streams of stuffs? + OPEN_FD_COUNT.inc(); Ok(e.insert(stream)) } }, From b20f2dc845188e72850705eaf83be2af182d6fc3 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:53:12 -0300 Subject: [PATCH 47/85] only dec once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michał Smolarek <34063647+Razz4780@users.noreply.github.com> --- mirrord/agent/src/steal/subscriptions.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 2bbae45ed8d..a752503c60c 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -241,14 +241,14 @@ impl PortSubscriptions { false } PortSubscription::Filtered(filters) => { - filters.remove(&client_id); + if filters.remove(&client_id).is_some() { + STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); + } if filters.is_empty() { e.remove(); - STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); true } else { - STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); false } } From 1317b363e334aa251bd6e13d64513f3e7c99dc83 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:17:51 -0300 Subject: [PATCH 48/85] drop filemanager updates metrics --- mirrord/agent/src/file.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 1f25bffaaaf..296ccfc4aa2 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -72,6 +72,14 @@ pub(crate) struct FileManager { fds_iter: RangeInclusive, } +impl Drop for FileManager { + fn drop(&mut self) { + let descriptors = + self.open_files.len() + self.dir_streams.len() + self.getdents_streams.len(); + OPEN_FD_COUNT.sub(descriptors as i64); + } +} + pub fn get_root_path_from_optional_pid(pid: Option) -> PathBuf { match pid { Some(pid) => PathBuf::from("/proc").join(pid.to_string()).join("root"), From ff6f9aca4ed89e6d1eb9f430862f97f8768afc43 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:20:11 -0300 Subject: [PATCH 49/85] sniffer drop and update_packet_filter metrics --- mirrord/agent/src/sniffer.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index 9ea28db8684..5a35b6af7a5 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -142,6 +142,12 @@ pub(crate) struct TcpConnectionSniffer { clients_closed: FuturesUnordered>, } +impl Drop for TcpConnectionSniffer { + fn drop(&mut self) { + MIRROR_PORT_SUBSCRIPTION.set(0); + } +} + impl fmt::Debug for TcpConnectionSniffer { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TcpConnectionSniffer") @@ -251,6 +257,7 @@ where #[tracing::instrument(level = Level::TRACE, err)] fn update_packet_filter(&mut self) -> Result<(), AgentError> { let ports = self.port_subscriptions.get_subscribed_topics(); + MIRROR_PORT_SUBSCRIPTION.set(ports.len() as i64); let filter = if ports.is_empty() { tracing::trace!("No ports subscribed, setting dummy bpf"); From 774903981d7dd1a57bf458f13e9855a5fa626ff0 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:23:06 -0300 Subject: [PATCH 50/85] remove dec from extra sub case --- mirrord/agent/src/steal/subscriptions.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 2bbae45ed8d..d1bc1017ee3 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -236,10 +236,7 @@ impl PortSubscriptions { true } - PortSubscription::Unfiltered(..) => { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); - false - } + PortSubscription::Unfiltered(..) => false, PortSubscription::Filtered(filters) => { filters.remove(&client_id); From 50d75c59e0564a86d97cc24330bc30e3f676153b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:26:15 -0300 Subject: [PATCH 51/85] move sub to PortSub add --- mirrord/agent/src/steal/connection.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 1ea50cf4913..0418a805916 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -673,11 +673,7 @@ impl TcpConnectionStealer { } Command::PortSubscribe(port_steal) => { - if self.port_subscribe(client_id, port_steal).await? { - STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); - } else { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); - } + self.port_subscribe(client_id, port_steal).await?; } Command::PortUnsubscribe(port) => { From aa65818079a2d08d805d7b9f1987accc83fd6f1f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:28:02 -0300 Subject: [PATCH 52/85] drop for PortSubs, zero subs counter --- mirrord/agent/src/steal/subscriptions.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index e970dd09624..880af28a93c 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -147,6 +147,13 @@ pub struct PortSubscriptions { subscriptions: HashMap, } +impl Drop for PortSubscriptions { + fn drop(&mut self) { + STEAL_FILTERED_PORT_SUBSCRIPTION.set(0); + STEAL_UNFILTERED_PORT_SUBSCRIPTION.set(0); + } +} + impl PortSubscriptions { /// Create an empty instance of this struct. /// From d54df311e1c31ccc1547d03dec4b74aaf33d9e49 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:33:26 -0300 Subject: [PATCH 53/85] new and drop for unfiltered task --- mirrord/agent/src/steal/connection.rs | 1 - mirrord/agent/src/steal/connections.rs | 12 +++--------- .../agent/src/steal/connections/unfiltered.rs | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 0418a805916..dad1822b82f 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -33,7 +33,6 @@ use tracing::{warn, Level}; use crate::{ error::{AgentError, AgentResult}, - metrics::{STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION}, steal::{ connections::{ ConnectionMessageIn, ConnectionMessageOut, StolenConnection, StolenConnections, diff --git a/mirrord/agent/src/steal/connections.rs b/mirrord/agent/src/steal/connections.rs index 37ac53f759e..8e969b0a868 100644 --- a/mirrord/agent/src/steal/connections.rs +++ b/mirrord/agent/src/steal/connections.rs @@ -462,15 +462,9 @@ impl ConnectionTask { }) .await?; - let task = UnfilteredStealTask { - connection_id: self.connection_id, - client_id, - stream: self.connection.stream, - }; - - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); - - task.run(self.tx, &mut self.rx).await + UnfilteredStealTask::new(self.connection_id, client_id, self.connection.stream) + .run(self.tx, &mut self.rx) + .await } PortSubscription::Filtered(filters) => { diff --git a/mirrord/agent/src/steal/connections/unfiltered.rs b/mirrord/agent/src/steal/connections/unfiltered.rs index 9c7cb9b0b0c..754fd30ea57 100644 --- a/mirrord/agent/src/steal/connections/unfiltered.rs +++ b/mirrord/agent/src/steal/connections/unfiltered.rs @@ -22,7 +22,23 @@ pub struct UnfilteredStealTask { pub stream: T, } +impl Drop for UnfilteredStealTask { + fn drop(&mut self) { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + } +} + impl UnfilteredStealTask { + pub(crate) fn new(connection_id: ConnectionId, client_id: ClientId, stream: T) -> Self { + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); + + Self { + connection_id, + client_id, + stream, + } + } + /// Runs this task until the managed connection is closed. /// /// # Note From 612d8b60625af8f22c6f5ca545cefcdddaeebc91 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:35:09 -0300 Subject: [PATCH 54/85] remove inc from run --- mirrord/agent/src/steal/connections/unfiltered.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/mirrord/agent/src/steal/connections/unfiltered.rs b/mirrord/agent/src/steal/connections/unfiltered.rs index 754fd30ea57..d5f6f81c18e 100644 --- a/mirrord/agent/src/steal/connections/unfiltered.rs +++ b/mirrord/agent/src/steal/connections/unfiltered.rs @@ -54,8 +54,6 @@ impl UnfilteredStealTask { let mut buf = BytesMut::with_capacity(64 * 1024); let mut reading_closed = false; - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); - loop { tokio::select! { read = self.stream.read_buf(&mut buf), if !reading_closed => match read { From f87c56f63139a0bbdc6bb9572a6f61c5d429b87c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:49:07 -0300 Subject: [PATCH 55/85] drop for filtered --- .../agent/src/steal/connections/filtered.rs | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/mirrord/agent/src/steal/connections/filtered.rs b/mirrord/agent/src/steal/connections/filtered.rs index c70dc1214ee..cd06b41c8b3 100644 --- a/mirrord/agent/src/steal/connections/filtered.rs +++ b/mirrord/agent/src/steal/connections/filtered.rs @@ -1,5 +1,6 @@ use std::{ - collections::HashMap, future::Future, marker::PhantomData, net::SocketAddr, pin::Pin, sync::Arc, + collections::HashMap, future::Future, marker::PhantomData, net::SocketAddr, ops::Not, pin::Pin, + sync::Arc, }; use bytes::Bytes; @@ -28,7 +29,10 @@ use tokio::{ use tokio_util::sync::{CancellationToken, DropGuard}; use tracing::Level; -use super::{ConnectionMessageIn, ConnectionMessageOut, ConnectionTaskError}; +use super::{ + ConnectionMessageIn, ConnectionMessageOut, ConnectionTaskError, + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION, +}; use crate::{ http::HttpVersion, metrics::STEAL_FILTERED_CONNECTION_SUBSCRIPTION, @@ -369,6 +373,16 @@ pub struct FilteredStealTask { /// For safely downcasting the IO stream after an HTTP upgrade. See [`Upgraded::downcast`]. _io_type: PhantomData T>, + + metrics_updated: bool, +} + +impl Drop for FilteredStealTask { + fn drop(&mut self) { + if self.metrics_updated.not() { + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + } + } } impl FilteredStealTask @@ -444,6 +458,8 @@ where } }; + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); + Self { connection_id, original_destination, @@ -454,6 +470,7 @@ where blocked_requests: Default::default(), next_request_id: Default::default(), _io_type: Default::default(), + metrics_updated: false, } } @@ -796,15 +813,18 @@ where ) -> Result<(), ConnectionTaskError> { let res = self.run_until_http_ends(tx.clone(), rx).await; + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + self.metrics_updated = true; + let res = match res { Ok(data) => self.run_after_http_ends(data, tx.clone(), rx).await, Err(e) => Err(e), }; - for (client_id, subscribed) in self.subscribed { - if subscribed { + for (client_id, subscribed) in self.subscribed.iter() { + if *subscribed { tx.send(ConnectionMessageOut::Closed { - client_id, + client_id: *client_id, connection_id: self.connection_id, }) .await?; From 98c463b552209066679b11231ba8885019675c95 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 12:52:25 -0300 Subject: [PATCH 56/85] docs --- mirrord/agent/src/steal/connections/filtered.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/mirrord/agent/src/steal/connections/filtered.rs b/mirrord/agent/src/steal/connections/filtered.rs index cd06b41c8b3..c2467a343ac 100644 --- a/mirrord/agent/src/steal/connections/filtered.rs +++ b/mirrord/agent/src/steal/connections/filtered.rs @@ -374,6 +374,7 @@ pub struct FilteredStealTask { /// For safely downcasting the IO stream after an HTTP upgrade. See [`Upgraded::downcast`]. _io_type: PhantomData T>, + /// Helps us figuring out if we should update some metrics in the `Drop` implementation. metrics_updated: bool, } From 7fb33fe8ed01c2c5a0591fae5afdef3f79d4b38e Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 15:18:27 -0300 Subject: [PATCH 57/85] remove metrics from some places --- mirrord/agent/src/sniffer.rs | 7 ------- mirrord/agent/src/steal/connections/filtered.rs | 2 -- 2 files changed, 9 deletions(-) diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index 5a35b6af7a5..b7e482a462d 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -246,9 +246,6 @@ where self.update_packet_filter()?; } - MIRROR_PORT_SUBSCRIPTION.dec(); - MIRROR_CONNECTION_SUBSCRIPTION.dec(); - Ok(()) } @@ -289,8 +286,6 @@ where } => { if self.port_subscriptions.subscribe(client_id, port) { self.update_packet_filter()?; - - MIRROR_PORT_SUBSCRIPTION.inc(); } let _ = tx.send(port); @@ -302,8 +297,6 @@ where } => { if self.port_subscriptions.unsubscribe(client_id, port) { self.update_packet_filter()?; - - MIRROR_PORT_SUBSCRIPTION.dec(); } } } diff --git a/mirrord/agent/src/steal/connections/filtered.rs b/mirrord/agent/src/steal/connections/filtered.rs index c2467a343ac..b1507295e3b 100644 --- a/mirrord/agent/src/steal/connections/filtered.rs +++ b/mirrord/agent/src/steal/connections/filtered.rs @@ -638,8 +638,6 @@ where // PROTOCOLS` response. let mut queued_raw_data: HashMap>> = Default::default(); - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); - loop { tokio::select! { message = rx.recv() => match message.ok_or(ConnectionTaskError::RecvError)? { From ccace97409269071a4557c2e3bbfc2aa9b8b1920 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 15:29:24 -0300 Subject: [PATCH 58/85] cancel --- mirrord/agent/src/entrypoint.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index 62d09177677..ce4c4308439 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -509,9 +509,12 @@ async fn start_agent(args: Args) -> AgentResult<()> { if let Some(metrics_address) = args.metrics { let cancellation_token = cancellation_token.clone(); tokio::spawn(async move { - start_metrics(metrics_address, cancellation_token) + start_metrics(metrics_address, cancellation_token.clone()) .await - .inspect_err(|fail| tracing::error!(?fail, "Failed starting metrics server!")) + .inspect_err(|fail| { + tracing::error!(?fail, "Failed starting metrics server!"); + cancellation_token.cancel(); + }) }); } From 882389aa3fc435b1866375b40e4afb9162f182ab Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 15:38:11 -0300 Subject: [PATCH 59/85] near insert and remove --- mirrord/agent/src/outgoing.rs | 22 +++++++++++----------- mirrord/agent/src/outgoing/udp.rs | 10 +++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 1e41c9ce942..4492e03e13b 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -113,6 +113,13 @@ struct TcpOutgoingTask { daemon_tx: Sender, } +impl Drop for TcpOutgoingTask { + fn drop(&mut self) { + let connections = self.readers.keys().chain(self.writers.keys()).count(); + TCP_OUTGOING_CONNECTION.sub(connections as i64); + } +} + impl fmt::Debug for TcpOutgoingTask { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TcpOutgoingTask") @@ -217,11 +224,10 @@ impl TcpOutgoingTask { self.readers.remove(&connection_id); self.writers.remove(&connection_id); + TCP_OUTGOING_CONNECTION.dec(); let daemon_message = DaemonTcpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; - - TCP_OUTGOING_CONNECTION.dec(); } // EOF occurred in one of peer connections. @@ -252,8 +258,6 @@ impl TcpOutgoingTask { self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - - TCP_OUTGOING_CONNECTION.dec(); } } } @@ -292,6 +296,7 @@ impl TcpOutgoingTask { connection_id, ReaderStream::with_capacity(read_half, Self::READ_BUFFER_SIZE), ); + TCP_OUTGOING_CONNECTION.inc(); Ok(DaemonConnect { connection_id, @@ -309,8 +314,6 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Connect(daemon_connect)) .await?; - TCP_OUTGOING_CONNECTION.inc(); - Ok(()) } @@ -356,8 +359,6 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - TCP_OUTGOING_CONNECTION.dec(); - Ok(()) } } @@ -367,6 +368,7 @@ impl TcpOutgoingTask { Err(error) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); + TCP_OUTGOING_CONNECTION.dec(); tracing::trace!( connection_id, @@ -377,8 +379,6 @@ impl TcpOutgoingTask { .send(DaemonTcpOutgoing::Close(connection_id)) .await?; - TCP_OUTGOING_CONNECTION.dec(); - Ok(()) } } @@ -389,8 +389,8 @@ impl TcpOutgoingTask { LayerTcpOutgoing::Close(LayerClose { connection_id }) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - TCP_OUTGOING_CONNECTION.dec(); + Ok(()) } } diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 4ab96dd1264..35f30c90da8 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -152,6 +152,7 @@ impl UdpOutgoingApi { writers.insert(connection_id, (sink, peer_address)); readers.insert(connection_id, stream); + UDP_OUTGOING_CONNECTION.inc(); Ok(DaemonConnect { connection_id, @@ -164,8 +165,6 @@ impl UdpOutgoingApi { debug!("interceptor_task -> daemon_message {:#?}", daemon_message); daemon_tx.send(daemon_message).await?; - - UDP_OUTGOING_CONNECTION.inc(); } // [user] -> [layer] -> [agent] -> [remote] // `user` wrote some message to the remote host. @@ -188,11 +187,10 @@ impl UdpOutgoingApi { warn!("LayerUdpOutgoing::Write -> Failed with {:#?}", fail); writers.remove(&connection_id); readers.remove(&connection_id); + UDP_OUTGOING_CONNECTION.dec(); let daemon_message = DaemonUdpOutgoing::Close(connection_id); daemon_tx.send(daemon_message).await?; - - UDP_OUTGOING_CONNECTION.dec(); } } // [layer] -> [agent] @@ -200,7 +198,6 @@ impl UdpOutgoingApi { LayerUdpOutgoing::Close(LayerClose { ref connection_id }) => { writers.remove(connection_id); readers.remove(connection_id); - UDP_OUTGOING_CONNECTION.dec(); } } @@ -225,11 +222,10 @@ impl UdpOutgoingApi { trace!("interceptor_task -> close connection {:#?}", connection_id); writers.remove(&connection_id); readers.remove(&connection_id); + UDP_OUTGOING_CONNECTION.dec(); let daemon_message = DaemonUdpOutgoing::Close(connection_id); daemon_tx.send(daemon_message).await?; - - UDP_OUTGOING_CONNECTION.dec(); } } } From c0963bb645285696d7fa855772ac54cb2fabadd1 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 15:44:53 -0300 Subject: [PATCH 60/85] connection sub inc --- mirrord/agent/src/sniffer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index b7e482a462d..7c8beab353a 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -145,6 +145,7 @@ pub(crate) struct TcpConnectionSniffer { impl Drop for TcpConnectionSniffer { fn drop(&mut self) { MIRROR_PORT_SUBSCRIPTION.set(0); + MIRROR_CONNECTION_SUBSCRIPTION.set(0); } } @@ -276,8 +277,6 @@ where command: SnifferCommandInner::NewClient(sender), } => { self.handle_new_client(client_id, sender); - - MIRROR_CONNECTION_SUBSCRIPTION.inc(); } SnifferCommand { @@ -404,6 +403,7 @@ where } } + MIRROR_CONNECTION_SUBSCRIPTION.inc(); e.insert_entry(data_tx) } }; From a4c9ff7e13f9121b16dd43e0407d0de16dbaacf8 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 16:19:21 -0300 Subject: [PATCH 61/85] connected clients --- mirrord/agent/src/entrypoint.rs | 10 +++++++++- mirrord/agent/src/metrics.rs | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index ce4c4308439..d00bc7aec83 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -12,7 +12,7 @@ use std::{ use client_connection::AgentTlsConnector; use dns::{DnsCommand, DnsWorker}; use futures::TryFutureExt; -use metrics::start_metrics; +use metrics::{start_metrics, CLIENT_COUNT}; use mirrord_protocol::{ClientMessage, DaemonMessage, GetEnvVarsRequest, LogMessage}; use sniffer::tcp_capture::RawSocketTcpCapture; use tokio::{ @@ -206,6 +206,12 @@ struct ClientConnectionHandler { ready_for_logs: bool, } +impl Drop for ClientConnectionHandler { + fn drop(&mut self) { + CLIENT_COUNT.dec(); + } +} + impl ClientConnectionHandler { /// Initializes [`ClientConnectionHandler`]. pub async fn new( @@ -239,6 +245,8 @@ impl ClientConnectionHandler { ready_for_logs: false, }; + CLIENT_COUNT.inc(); + Ok(client_handler) } diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 18f4c0910b4..2ee1edd9fee 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -9,6 +9,14 @@ use tracing::Level; use crate::error::AgentError; +pub(crate) static CLIENT_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_client_count", + "amount of connected clients to this mirrord-agent" + ) + .expect("Valid at initialization!") +}); + pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_open_fd_count", From bb063c92f2c691c8937105cf8e402e4002ff1f9f Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 16:25:11 -0300 Subject: [PATCH 62/85] dns request --- mirrord/agent/src/dns.rs | 4 ++++ mirrord/agent/src/metrics.rs | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index 4a374eab07e..100cea4a976 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -18,6 +18,7 @@ use tracing::Level; use crate::{ error::{AgentError, AgentResult}, + metrics::DNS_REQUEST_COUNT, watched_task::TaskStatus, }; @@ -187,6 +188,7 @@ impl DnsApi { } self.responses.push_back(response_rx); + DNS_REQUEST_COUNT.inc(); Ok(()) } @@ -209,6 +211,8 @@ impl DnsApi { }), }); + DNS_REQUEST_COUNT.dec(); + Ok(GetAddrInfoResponse(response)) } } diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 2ee1edd9fee..fff01a77631 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -17,6 +17,14 @@ pub(crate) static CLIENT_COUNT: LazyLock = LazyLock::new(|| { .expect("Valid at initialization!") }); +pub(crate) static DNS_REQUEST_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_dns_request_count", + "amount of in-progress dns requests in the mirrord-agent" + ) + .expect("Valid at initialization!") +}); + pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_open_fd_count", From f87410dd2daf1a1fc4e20edf40192b429dd29635 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 16:52:01 -0300 Subject: [PATCH 63/85] http in progress --- mirrord/agent/src/metrics.rs | 8 ++++++++ mirrord/agent/src/steal/connection.rs | 25 +++++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index fff01a77631..fd1b84ae03f 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -83,6 +83,14 @@ pub(crate) static STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION: LazyLock = .expect("Valid at initialization!") }); +pub(crate) static HTTP_REQUEST_IN_PROGRESS_COUNT: LazyLock = LazyLock::new(|| { + register_int_gauge!( + "mirrord_agent_http_request_in_progress_count", + "amount of in-progress http requests in the mirrord-agent" + ) + .expect("Valid at initialization!") +}); + pub(crate) static TCP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_tcp_outgoing_connection_count", diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index dad1822b82f..de379334697 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -33,6 +33,7 @@ use tracing::{warn, Level}; use crate::{ error::{AgentError, AgentResult}, + metrics::HTTP_REQUEST_IN_PROGRESS_COUNT, steal::{ connections::{ ConnectionMessageIn, ConnectionMessageOut, StolenConnection, StolenConnections, @@ -55,6 +56,22 @@ struct MatchedHttpRequest { } impl MatchedHttpRequest { + fn new( + connection_id: ConnectionId, + port: Port, + request_id: RequestId, + request: Request, + ) -> Self { + HTTP_REQUEST_IN_PROGRESS_COUNT.inc(); + + Self { + connection_id, + port, + request_id, + request, + } + } + async fn into_serializable(self) -> AgentResult, hyper::Error> { let ( Parts { @@ -258,6 +275,7 @@ impl Client { } }); + HTTP_REQUEST_IN_PROGRESS_COUNT.dec(); true } } @@ -518,12 +536,7 @@ impl TcpConnectionStealer { return Ok(()); } - let matched_request = MatchedHttpRequest { - connection_id, - request, - request_id: id, - port, - }; + let matched_request = MatchedHttpRequest::new(connection_id, port, id, request); if !client.send_request_async(matched_request) { self.connections From 8a091a92dec6c23e83d932f1ae0fd52e0863048b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 30 Dec 2024 18:21:27 -0300 Subject: [PATCH 64/85] protocol --- Cargo.lock | 2 +- mirrord/protocol/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10e2521bea8..039e19c1c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4469,7 +4469,7 @@ dependencies = [ [[package]] name = "mirrord-protocol" -version = "1.13.1" +version = "1.13.2" dependencies = [ "actix-codec", "bincode", diff --git a/mirrord/protocol/Cargo.toml b/mirrord/protocol/Cargo.toml index 70f33186ba1..34832bbe47f 100644 --- a/mirrord/protocol/Cargo.toml +++ b/mirrord/protocol/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mirrord-protocol" -version = "1.13.1" +version = "1.13.2" authors.workspace = true description.workspace = true documentation.workspace = true From c9a8c810dd54651621d4f0d703b3102670fc4511 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 31 Dec 2024 16:05:05 -0300 Subject: [PATCH 65/85] http request in progress --- mirrord/agent/src/steal/api.rs | 14 ++++++++++++++ mirrord/agent/src/steal/connection.rs | 1 - 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/mirrord/agent/src/steal/api.rs b/mirrord/agent/src/steal/api.rs index 64905576b45..d3ca2458bf5 100644 --- a/mirrord/agent/src/steal/api.rs +++ b/mirrord/agent/src/steal/api.rs @@ -15,6 +15,7 @@ use tokio_stream::wrappers::ReceiverStream; use super::*; use crate::{ error::{AgentError, AgentResult}, + metrics::HTTP_REQUEST_IN_PROGRESS_COUNT, util::ClientId, watched_task::TaskStatus, }; @@ -43,6 +44,12 @@ pub(crate) struct TcpStealerApi { response_body_txs: HashMap<(ConnectionId, RequestId), Sender>>>, } +impl Drop for TcpStealerApi { + fn drop(&mut self) { + HTTP_REQUEST_IN_PROGRESS_COUNT.set(0); + } +} + impl TcpStealerApi { /// Initializes a [`TcpStealerApi`] and sends a message to [`TcpConnectionStealer`] signaling /// that we have a new client. @@ -97,6 +104,7 @@ impl TcpStealerApi { if let DaemonTcp::Close(close) = &msg { self.response_body_txs .retain(|(key_id, _), _| *key_id != close.connection_id); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); } Ok(msg) } @@ -171,6 +179,8 @@ impl TcpStealerApi { LayerTcpSteal::ConnectionUnsubscribe(connection_id) => { self.response_body_txs .retain(|(key_id, _), _| *key_id != connection_id); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + self.connection_unsubscribe(connection_id).await } LayerTcpSteal::PortUnsubscribe(port) => self.port_unsubscribe(port).await, @@ -201,6 +211,7 @@ impl TcpStealerApi { let key = (response.connection_id, response.request_id); self.response_body_txs.insert(key, tx.clone()); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); self.http_response(HttpResponseFallback::Streamed(http_response, None)) .await?; @@ -208,6 +219,7 @@ impl TcpStealerApi { for frame in response.internal_response.body { if let Err(err) = tx.send(Ok(frame.into())).await { self.response_body_txs.remove(&key); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); tracing::trace!(?err, "error while sending streaming response frame"); } } @@ -230,12 +242,14 @@ impl TcpStealerApi { } if send_err || body.is_last { self.response_body_txs.remove(key); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); }; Ok(()) } ChunkedResponse::Error(err) => { self.response_body_txs .remove(&(err.connection_id, err.request_id)); + HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); tracing::trace!(?err, "ChunkedResponse error received"); Ok(()) } diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index de379334697..bb0217cd28c 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -275,7 +275,6 @@ impl Client { } }); - HTTP_REQUEST_IN_PROGRESS_COUNT.dec(); true } } From 85f92bd74d77d979456fba1ad6135b7e9f18352c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 31 Dec 2024 16:07:13 -0300 Subject: [PATCH 66/85] dns --- mirrord/agent/src/dns.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index 100cea4a976..c867619cd24 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -126,12 +126,16 @@ impl DnsWorker { let etc_path = self.etc_path.clone(); let timeout = self.timeout; let attempts = self.attempts; + + DNS_REQUEST_COUNT.inc(); + let lookup_future = async move { let result = Self::do_lookup(etc_path, message.request.node, attempts, timeout).await; if let Err(result) = message.response_tx.send(result) { tracing::error!(?result, "Failed to send query response"); } + DNS_REQUEST_COUNT.dec(); }; tokio::spawn(lookup_future); @@ -188,7 +192,6 @@ impl DnsApi { } self.responses.push_back(response_rx); - DNS_REQUEST_COUNT.inc(); Ok(()) } @@ -211,8 +214,6 @@ impl DnsApi { }), }); - DNS_REQUEST_COUNT.dec(); - Ok(GetAddrInfoResponse(response)) } } From ecf1df7cd76ca00915e257faf9fe5a2be3cb1b9b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 31 Dec 2024 16:24:15 -0300 Subject: [PATCH 67/85] 2 new decs in tcp outgoing --- mirrord/agent/src/outgoing.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 4492e03e13b..6aaab6a641a 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -255,6 +255,8 @@ impl TcpOutgoingTask { "Layer connection is shut down as well, sending close message.", ); + TCP_OUTGOING_CONNECTION.dec(); + self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) .await?; @@ -354,6 +356,7 @@ impl TcpOutgoingTask { connection_id, "Peer connection is shut down as well, sending close message to the client.", ); + TCP_OUTGOING_CONNECTION.dec(); self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) From c8ce3f7ff41aa80436ed8862e4c0c7a465cab9d7 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 31 Dec 2024 16:40:02 -0300 Subject: [PATCH 68/85] dec fd twice --- mirrord/agent/src/file.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 78c99be5016..8e6d84a0354 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -589,10 +589,16 @@ impl FileManager { /// on `close_dir` of an fd. #[tracing::instrument(level = Level::TRACE, skip(self))] pub(crate) fn close_dir(&mut self, fd: u64) -> Option { - if self.dir_streams.remove(&fd).is_none() && self.getdents_streams.remove(&fd).is_none() { - error!("FileManager::close_dir -> fd {:#?} not found", fd); - } else { + let closed_dir_stream = self.dir_streams.remove(&fd); + let closed_getdents_stream = self.getdents_streams.remove(&fd); + + if closed_dir_stream.is_some() && closed_getdents_stream.is_some() { + OPEN_FD_COUNT.dec(); OPEN_FD_COUNT.dec(); + } else if closed_dir_stream.is_some() || closed_getdents_stream.is_some() { + OPEN_FD_COUNT.dec(); + } else { + error!("FileManager::close_dir -> fd {:#?} not found", fd); } None From 101dd99d3e61311a06f3b8bb5d5f122714a4604b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 31 Dec 2024 16:43:35 -0300 Subject: [PATCH 69/85] filtered port --- mirrord/agent/src/steal/subscriptions.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 880af28a93c..0a01c0d36df 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -195,7 +195,14 @@ impl PortSubscriptions { ) -> Result, R::Error> { let add_redirect = match self.subscriptions.entry(port) { Entry::Occupied(mut e) => { + let filtered = filter.is_some(); if e.get_mut().try_extend(client_id, filter) { + if filtered { + STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + } else { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + } + Ok(false) } else { Err(ResponseError::PortAlreadyStolen(port)) @@ -203,6 +210,12 @@ impl PortSubscriptions { } Entry::Vacant(e) => { + if filter.is_some() { + STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + } else { + STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + } + e.insert(PortSubscription::new(client_id, filter)); Ok(true) } From 4ef431afd17b74da15da8f167c6c503db1aece42 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 6 Jan 2025 11:55:47 -0300 Subject: [PATCH 70/85] some docs --- mirrord/agent/src/metrics.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index fd1b84ae03f..5332ad2e7db 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -9,6 +9,8 @@ use tracing::Level; use crate::error::AgentError; +/// Incremented whenever we get a new client in `ClientConnectionHandler`, and decremented +/// when this client is dropped. pub(crate) static CLIENT_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_client_count", @@ -17,6 +19,8 @@ pub(crate) static CLIENT_COUNT: LazyLock = LazyLock::new(|| { .expect("Valid at initialization!") }); +/// Incremented whenever we handle a new `DnsCommand`, and decremented after the result of +/// `do_lookup` has been sent back through the response channel. pub(crate) static DNS_REQUEST_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_dns_request_count", @@ -25,6 +29,8 @@ pub(crate) static DNS_REQUEST_COUNT: LazyLock = LazyLock::new(|| { .expect("Valid at initialization!") }); +/// Incremented and decremented in _open-ish_/_close-ish_ file operations in `FileManager`, +/// Also gets decremented when `FileManager` is dropped. pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_open_fd_count", @@ -33,6 +39,9 @@ pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { .expect("Valid at initialization!") }); +/// Follows the amount of subscribed ports in `update_packet_filter`. We don't really +/// increment/decrement this one, and mostly `set` it to the latest amount of ports, zeroing it when +/// the `TcpConnectionSniffer` gets dropped. pub(crate) static MIRROR_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { register_int_gauge!( "mirrord_agent_mirror_port_subscription_count", From a3c57124b62ea220fc8d6d17180a19ab1552a403 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 6 Jan 2025 12:09:31 -0300 Subject: [PATCH 71/85] improve error with display impl --- mirrord/protocol/src/error.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mirrord/protocol/src/error.rs b/mirrord/protocol/src/error.rs index 20ac38a149d..5ded73a56f6 100644 --- a/mirrord/protocol/src/error.rs +++ b/mirrord/protocol/src/error.rs @@ -44,7 +44,7 @@ pub enum ResponseError { #[error("Remote operation expected fd `{0}` to be a file, but it's a directory!")] NotFile(u64), - #[error("IO failed for remote operation with `{0}!")] + #[error("IO failed for remote operation: `{0}!")] RemoteIO(RemoteIOError), #[error(transparent)] @@ -153,13 +153,26 @@ impl From for RemoteError { /// Our internal version of Rust's `std::io::Error` that can be passed between mirrord-layer and /// mirrord-agent. -#[derive(Encode, Decode, Debug, PartialEq, Clone, Eq, Error)] -#[error("Failed performing `getaddrinfo` with {raw_os_error:?} and kind {kind:?}!")] +/// +/// ### `Display` +/// +/// We manually implement `Display` as this error is mostly seen from a [`ResponseError`] context. +#[derive(Encode, Decode, Debug, PartialEq, Clone, Eq)] pub struct RemoteIOError { pub raw_os_error: Option, pub kind: ErrorKindInternal, } +impl core::fmt::Display for RemoteIOError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self.kind)?; + if let Some(code) = self.raw_os_error { + write!(f, " (error code {code})")?; + } + Ok(()) + } +} + /// Our internal version of Rust's `std::io::Error` that can be passed between mirrord-layer and /// mirrord-agent. /// From ad2150556172ea8784478c737cc46b312ecfa306 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 15 Jan 2025 16:00:02 -0300 Subject: [PATCH 72/85] Make UdpOutgoing look more like TcpOutgoing --- mirrord/agent/src/entrypoint.rs | 4 +- mirrord/agent/src/outgoing/udp.rs | 417 ++++++++++++++++++------------ 2 files changed, 256 insertions(+), 165 deletions(-) diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index d00bc7aec83..17ad783da7c 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -373,7 +373,7 @@ impl ClientConnectionHandler { Ok(message) => self.respond(DaemonMessage::TcpOutgoing(message)).await?, Err(e) => break e, }, - message = self.udp_outgoing_api.daemon_message() => match message { + message = self.udp_outgoing_api.recv_from_task() => match message { Ok(message) => self.respond(DaemonMessage::UdpOutgoing(message)).await?, Err(e) => break e, }, @@ -424,7 +424,7 @@ impl ClientConnectionHandler { self.tcp_outgoing_api.send_to_task(layer_message).await? } ClientMessage::UdpOutgoing(layer_message) => { - self.udp_outgoing_api.layer_message(layer_message).await? + self.udp_outgoing_api.send_to_task(layer_message).await? } ClientMessage::GetEnvVarsRequest(GetEnvVarsRequest { env_vars_filter, diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 35f30c90da8..75f0020af75 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -1,10 +1,11 @@ +use core::fmt; use std::{ collections::HashMap, net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}, thread, }; -use bytes::BytesMut; +use bytes::{Bytes, BytesMut}; use futures::{ prelude::*, stream::{SplitSink, SplitStream}, @@ -15,12 +16,13 @@ use mirrord_protocol::{ }; use streammap_ext::StreamMap; use tokio::{ + io, net::UdpSocket, select, - sync::mpsc::{self, Receiver, Sender}, + sync::mpsc::{self, error::SendError, Receiver, Sender}, }; use tokio_util::{codec::BytesCodec, udp::UdpFramed}; -use tracing::{debug, trace, warn}; +use tracing::Level; use crate::{ error::AgentResult, @@ -29,8 +31,243 @@ use crate::{ watched_task::{TaskStatus, WatchedTask}, }; -type Layer = LayerUdpOutgoing; -type Daemon = DaemonUdpOutgoing; +/// Task that handles [`LayerUdpOutgoing`] and [`DaemonUdpOutgoing`] messages. +/// +/// We start these tasks from the [`UdpOutgoingApi`] as a [`WatchedTask`]. +struct UdpOutgoingTask { + next_connection_id: ConnectionId, + /// Writing halves of peer connections made on layer's requests. + writers: HashMap< + ConnectionId, + ( + SplitSink, (BytesMut, SocketAddr)>, + SocketAddr, + ), + >, + /// Reading halves of peer connections made on layer's requests. + readers: StreamMap>>, + /// Optional pid of agent's target. Used in [`SocketStream::connect`]. + pid: Option, + layer_rx: Receiver, + daemon_tx: Sender, +} + +impl Drop for UdpOutgoingTask { + fn drop(&mut self) { + let connections = self.readers.keys().chain(self.writers.keys()).count(); + UDP_OUTGOING_CONNECTION.sub(connections as i64); + } +} + +impl fmt::Debug for UdpOutgoingTask { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("UdpOutgoingTask") + .field("next_connection_id", &self.next_connection_id) + .field("writers", &self.writers.len()) + .field("readers", &self.readers.len()) + .field("pid", &self.pid) + .finish() + } +} + +impl UdpOutgoingTask { + fn new( + pid: Option, + layer_rx: Receiver, + daemon_tx: Sender, + ) -> Self { + Self { + next_connection_id: 0, + writers: Default::default(), + readers: Default::default(), + pid, + layer_rx, + daemon_tx, + } + } + + /// Runs this task as long as the channels connecting it with [`TcpOutgoingApi`] are open. + /// This routine never fails and returns [`Result`] only due to [`WatchedTask`] constraints. + #[tracing::instrument(level = Level::TRACE, skip(self))] + pub(super) async fn run(mut self) -> AgentResult<()> { + loop { + let channel_closed = select! { + biased; + + message = self.layer_rx.recv() => match message { + // We have a message from the layer to be handled. + Some(message) => { + self.handle_layer_msg(message).await.is_err() + }, + // Our channel with the layer is closed, this task is no longer needed. + None => true, + }, + + // We have data coming from one of our peers. + Some((connection_id, remote_read)) = self.readers.next() => { + self.handle_connection_read(connection_id, remote_read.transpose().map(|remote| remote.map(|(read, _)| read.into()))).await.is_err() + }, + }; + + if channel_closed { + tracing::trace!("Client channel closed, exiting"); + break Ok(()); + } + } + } + + /// Returns [`Err`] only when the client has disconnected. + #[tracing::instrument( + level = Level::TRACE, + skip(read), + fields(read = ?read.as_ref().map(|data| data.as_ref().map(Bytes::len).unwrap_or_default())) + err(level = Level::TRACE) + )] + async fn handle_connection_read( + &mut self, + connection_id: ConnectionId, + read: io::Result>, + ) -> AgentResult<(), SendError> { + match read { + Ok(Some(read)) => { + let message = DaemonUdpOutgoing::Read(Ok(DaemonRead { + connection_id, + bytes: read.to_vec(), + })); + + self.daemon_tx.send(message).await? + } + // An error occurred when reading from a peer connection. + // We remove both io halves and inform the layer that the connection is closed. + // We remove the reader, because otherwise the `StreamMap` will produce an extra `None` + // item from the related stream. + Err(error) => { + tracing::trace!( + ?error, + connection_id, + "Reading from peer connection failed, sending close message.", + ); + + self.readers.remove(&connection_id); + self.writers.remove(&connection_id); + UDP_OUTGOING_CONNECTION.dec(); + + let daemon_message = DaemonUdpOutgoing::Close(connection_id); + self.daemon_tx.send(daemon_message).await?; + } + Ok(None) => { + self.writers.remove(&connection_id); + self.readers.remove(&connection_id); + UDP_OUTGOING_CONNECTION.dec(); + + let daemon_message = DaemonUdpOutgoing::Close(connection_id); + self.daemon_tx.send(daemon_message).await?; + } + } + + Ok(()) + } + + /// Returns [`Err`] only when the client has disconnected. + #[tracing::instrument(level = Level::TRACE, ret)] + async fn handle_layer_msg( + &mut self, + message: LayerUdpOutgoing, + ) -> AgentResult<(), SendError> { + match message { + // [user] -> [layer] -> [agent] -> [layer] + // `user` is asking us to connect to some remote host. + LayerUdpOutgoing::Connect(LayerConnect { remote_address }) => { + let daemon_connect = + connect(remote_address.clone()) + .await + .and_then(|mirror_socket| { + let connection_id = self.next_connection_id; + self.next_connection_id += 1; + + let peer_address = mirror_socket.peer_addr()?; + let local_address = mirror_socket.local_addr()?; + let local_address = SocketAddress::Ip(local_address); + + let framed = UdpFramed::new(mirror_socket, BytesCodec::new()); + + let (sink, stream): ( + SplitSink, (BytesMut, SocketAddr)>, + SplitStream>, + ) = framed.split(); + + self.writers.insert(connection_id, (sink, peer_address)); + self.readers.insert(connection_id, stream); + UDP_OUTGOING_CONNECTION.inc(); + + Ok(DaemonConnect { + connection_id, + remote_address, + local_address, + }) + }); + + tracing::trace!( + result = ?daemon_connect, + "Connection attempt finished.", + ); + + self.daemon_tx + .send(DaemonUdpOutgoing::Connect(daemon_connect)) + .await?; + + Ok(()) + } + // [user] -> [layer] -> [agent] -> [remote] + // `user` wrote some message to the remote host. + LayerUdpOutgoing::Write(LayerWrite { + connection_id, + bytes, + }) => { + let write_result = match self + .writers + .get_mut(&connection_id) + .ok_or(ResponseError::NotFound(connection_id)) + { + Ok((mirror, remote_address)) => mirror + .send((BytesMut::from(bytes.as_slice()), *remote_address)) + .await + .map_err(ResponseError::from), + Err(fail) => Err(fail), + }; + + match write_result { + Ok(()) => Ok(()), + Err(error) => { + self.writers.remove(&connection_id); + self.readers.remove(&connection_id); + UDP_OUTGOING_CONNECTION.dec(); + + tracing::trace!( + connection_id, + ?error, + "Failed to handle layer write, sending close message to the client.", + ); + + let daemon_message = DaemonUdpOutgoing::Close(connection_id); + self.daemon_tx.send(daemon_message).await?; + + Ok(()) + } + } + } + // [layer] -> [agent] + // `layer` closed their interceptor stream. + LayerUdpOutgoing::Close(LayerClose { ref connection_id }) => { + self.writers.remove(connection_id); + self.readers.remove(connection_id); + UDP_OUTGOING_CONNECTION.dec(); + + Ok(()) + } + } + } +} /// Handles (briefly) the `UdpOutgoingRequest` and `UdpOutgoingResponse` messages, mostly the /// passing of these messages to the `interceptor_task` thread. @@ -42,10 +279,10 @@ pub(crate) struct UdpOutgoingApi { task_status: TaskStatus, /// Sends the `Layer` message to the `interceptor_task`. - layer_tx: Sender, + layer_tx: Sender, /// Reads the `Daemon` message from the `interceptor_task`. - daemon_rx: Receiver, + daemon_rx: Receiver, } /// Performs an [`UdpSocket::connect`] that handles 3 situations: @@ -56,8 +293,9 @@ pub(crate) struct UdpOutgoingApi { /// read access to `/etc/resolv.conf`, otherwise they'll be getting a mismatched connection; /// 3. User is trying to use `sendto` and `recvfrom`, we use the same hack as in DNS to fake a /// connection. -#[tracing::instrument(level = "trace", ret)] -async fn connect(remote_address: SocketAddr) -> AgentResult { +#[tracing::instrument(level = Level::TRACE, ret, err(level = Level::DEBUG))] +async fn connect(remote_address: SocketAddress) -> AgentResult { + let remote_address = remote_address.try_into()?; let mirror_address = match remote_address { std::net::SocketAddr::V4(_) => SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0), std::net::SocketAddr::V6(_) => SocketAddr::new(IpAddr::V6(Ipv6Addr::UNSPECIFIED), 0), @@ -76,8 +314,10 @@ impl UdpOutgoingApi { let (layer_tx, layer_rx) = mpsc::channel(1000); let (daemon_tx, daemon_rx) = mpsc::channel(1000); - let watched_task = - WatchedTask::new(Self::TASK_NAME, Self::interceptor_task(layer_rx, daemon_tx)); + let watched_task = WatchedTask::new( + Self::TASK_NAME, + UdpOutgoingTask::new(pid, layer_rx, daemon_tx).run(), + ); let task_status = watched_task.status(); let task = run_thread_in_namespace( @@ -95,158 +335,9 @@ impl UdpOutgoingApi { } } - /// The [`UdpOutgoingApi`] task. - /// - /// Receives [`LayerUdpOutgoing`] messages and replies with [`DaemonUdpOutgoing`]. - #[allow(clippy::type_complexity)] - async fn interceptor_task( - mut layer_rx: Receiver, - daemon_tx: Sender, - ) -> AgentResult<()> { - let mut connection_ids = 0..=ConnectionId::MAX; - - // TODO: Right now we're manually keeping these 2 maps in sync (aviram suggested using - // `Weak` for `writers`). - let mut writers: HashMap< - ConnectionId, - ( - SplitSink, (BytesMut, SocketAddr)>, - SocketAddr, - ), - > = HashMap::default(); - - let mut readers: StreamMap>> = - StreamMap::default(); - - loop { - select! { - biased; - - // [layer] -> [agent] - Some(layer_message) = layer_rx.recv() => { - trace!("udp: interceptor_task -> layer_message {:?}", layer_message); - match layer_message { - // [user] -> [layer] -> [agent] -> [layer] - // `user` is asking us to connect to some remote host. - LayerUdpOutgoing::Connect(LayerConnect { remote_address }) => { - let daemon_connect = connect(remote_address.clone().try_into()?) - .await - .and_then(|mirror_socket| { - let connection_id = connection_ids - .next() - .ok_or_else(|| ResponseError::IdsExhausted("connect".into()))?; - - debug!("interceptor_task -> mirror_socket {:#?}", mirror_socket); - - let peer_address = mirror_socket.peer_addr()?; - let local_address = mirror_socket.local_addr()?; - let local_address = SocketAddress::Ip(local_address); - - let framed = UdpFramed::new(mirror_socket, BytesCodec::new()); - debug!("interceptor_task -> framed {:#?}", framed); - - let (sink, stream): ( - SplitSink, (BytesMut, SocketAddr)>, - SplitStream>, - ) = framed.split(); - - writers.insert(connection_id, (sink, peer_address)); - readers.insert(connection_id, stream); - UDP_OUTGOING_CONNECTION.inc(); - - Ok(DaemonConnect { - connection_id, - remote_address, - local_address - }) - }); - - let daemon_message = DaemonUdpOutgoing::Connect(daemon_connect); - debug!("interceptor_task -> daemon_message {:#?}", daemon_message); - - daemon_tx.send(daemon_message).await?; - } - // [user] -> [layer] -> [agent] -> [remote] - // `user` wrote some message to the remote host. - LayerUdpOutgoing::Write(LayerWrite { - connection_id, - bytes, - }) => { - let daemon_write = match writers - .get_mut(&connection_id) - .ok_or(ResponseError::NotFound(connection_id)) - { - Ok((mirror, remote_address)) => mirror - .send((BytesMut::from(bytes.as_slice()), *remote_address)) - .await - .map_err(ResponseError::from), - Err(fail) => Err(fail), - }; - - if let Err(fail) = daemon_write { - warn!("LayerUdpOutgoing::Write -> Failed with {:#?}", fail); - writers.remove(&connection_id); - readers.remove(&connection_id); - UDP_OUTGOING_CONNECTION.dec(); - - let daemon_message = DaemonUdpOutgoing::Close(connection_id); - daemon_tx.send(daemon_message).await?; - } - } - // [layer] -> [agent] - // `layer` closed their interceptor stream. - LayerUdpOutgoing::Close(LayerClose { ref connection_id }) => { - writers.remove(connection_id); - readers.remove(connection_id); - UDP_OUTGOING_CONNECTION.dec(); - } - } - } - - // [remote] -> [agent] -> [layer] -> [user] - // Read the data from one of the connected remote hosts, and forward the result back - // to the `user`. - Some((connection_id, remote_read)) = readers.next() => { - trace!("interceptor_task -> read connection_id {:#?}", connection_id); - - match remote_read { - Some(read) => { - let daemon_read = read - .map_err(ResponseError::from) - .map(|(bytes, _)| DaemonRead { connection_id, bytes: bytes.to_vec() }); - - let daemon_message = DaemonUdpOutgoing::Read(daemon_read); - daemon_tx.send(daemon_message).await? - } - None => { - trace!("interceptor_task -> close connection {:#?}", connection_id); - writers.remove(&connection_id); - readers.remove(&connection_id); - UDP_OUTGOING_CONNECTION.dec(); - - let daemon_message = DaemonUdpOutgoing::Close(connection_id); - daemon_tx.send(daemon_message).await?; - } - } - } - else => { - // We have no more data coming from any of the remote hosts. - warn!("interceptor_task -> no messages left"); - break; - } - } - } - - Ok(()) - } - /// Sends a `UdpOutgoingRequest` to the `interceptor_task`. - pub(crate) async fn layer_message(&mut self, message: LayerUdpOutgoing) -> AgentResult<()> { - trace!( - "UdpOutgoingApi::layer_message -> layer_message {:#?}", - message - ); - + #[tracing::instrument(level = Level::TRACE, skip(self), err)] + pub(crate) async fn send_to_task(&mut self, message: LayerUdpOutgoing) -> AgentResult<()> { if self.layer_tx.send(message).await.is_ok() { Ok(()) } else { @@ -255,7 +346,7 @@ impl UdpOutgoingApi { } /// Receives a `UdpOutgoingResponse` from the `interceptor_task`. - pub(crate) async fn daemon_message(&mut self) -> AgentResult { + pub(crate) async fn recv_from_task(&mut self) -> AgentResult { match self.daemon_rx.recv().await { Some(msg) => Ok(msg), None => Err(self.task_status.unwrap_err().await), From 6535fe229f21020e484bae003c9b879bc27cd61b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 15 Jan 2025 16:18:17 -0300 Subject: [PATCH 73/85] AgentError everywhere --- mirrord/agent/src/dns.rs | 18 ++++-------------- mirrord/agent/src/sniffer.rs | 14 +++++++------- mirrord/agent/src/sniffer/api.rs | 13 ++++++++----- mirrord/agent/src/sniffer/tcp_capture.rs | 4 ++-- mirrord/agent/src/steal/api.rs | 24 ++++++++++-------------- mirrord/agent/src/steal/connection.rs | 16 +++++----------- mirrord/agent/src/steal/subscriptions.rs | 4 ++-- mirrord/agent/src/util.rs | 4 ++-- mirrord/agent/src/watched_task.rs | 4 ++-- 9 files changed, 42 insertions(+), 59 deletions(-) diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index c867619cd24..23f21e5f5b4 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -16,11 +16,7 @@ use tokio::{ use tokio_util::sync::CancellationToken; use tracing::Level; -use crate::{ - error::{AgentError, AgentResult}, - metrics::DNS_REQUEST_COUNT, - watched_task::TaskStatus, -}; +use crate::{error::AgentResult, metrics::DNS_REQUEST_COUNT, watched_task::TaskStatus}; #[derive(Debug)] pub(crate) struct DnsCommand { @@ -141,10 +137,7 @@ impl DnsWorker { tokio::spawn(lookup_future); } - pub(crate) async fn run( - mut self, - cancellation_token: CancellationToken, - ) -> AgentResult<(), AgentError> { + pub(crate) async fn run(mut self, cancellation_token: CancellationToken) -> AgentResult<()> { loop { tokio::select! { _ = cancellation_token.cancelled() => break Ok(()), @@ -177,10 +170,7 @@ impl DnsApi { /// Schedules a new DNS request. /// Results of scheduled requests are available via [`Self::recv`] (order is preserved). - pub(crate) async fn make_request( - &mut self, - request: GetAddrInfoRequest, - ) -> AgentResult<(), AgentError> { + pub(crate) async fn make_request(&mut self, request: GetAddrInfoRequest) -> AgentResult<()> { let (response_tx, response_rx) = oneshot::channel(); let command = DnsCommand { @@ -199,7 +189,7 @@ impl DnsApi { /// Returns the result of the oldest outstanding DNS request issued with this struct (see /// [`Self::make_request`]). #[tracing::instrument(level = Level::TRACE, skip(self), ret, err)] - pub(crate) async fn recv(&mut self) -> AgentResult { + pub(crate) async fn recv(&mut self) -> AgentResult { let Some(response) = self.responses.next().await else { return future::pending().await; }; diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index 7c8beab353a..6a653587566 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -24,7 +24,7 @@ use self::{ tcp_capture::RawSocketTcpCapture, }; use crate::{ - error::AgentError, + error::AgentResult, http::HttpVersion, metrics::{MIRROR_CONNECTION_SUBSCRIPTION, MIRROR_PORT_SUBSCRIPTION}, util::{ChannelClosedFuture, ClientId, Subscriptions}, @@ -171,7 +171,7 @@ impl TcpConnectionSniffer { command_rx: Receiver, network_interface: Option, is_mesh: bool, - ) -> Result { + ) -> AgentResult { let tcp_capture = RawSocketTcpCapture::new(network_interface, is_mesh).await?; Ok(Self { @@ -198,7 +198,7 @@ where /// Runs the sniffer loop, capturing packets. #[tracing::instrument(level = Level::DEBUG, skip(cancel_token), err)] - pub async fn start(mut self, cancel_token: CancellationToken) -> Result<(), AgentError> { + pub async fn start(mut self, cancel_token: CancellationToken) -> AgentResult<()> { loop { select! { command = self.command_rx.recv() => { @@ -240,7 +240,7 @@ where /// Removes the client with `client_id`, and also unsubscribes its port. /// Adjusts BPF filter if needed. #[tracing::instrument(level = Level::TRACE, err)] - fn handle_client_closed(&mut self, client_id: ClientId) -> Result<(), AgentError> { + fn handle_client_closed(&mut self, client_id: ClientId) -> AgentResult<()> { self.client_txs.remove(&client_id); if self.port_subscriptions.remove_client(client_id) { @@ -253,7 +253,7 @@ where /// Updates BPF filter used by [`Self::tcp_capture`] to match state of /// [`Self::port_subscriptions`]. #[tracing::instrument(level = Level::TRACE, err)] - fn update_packet_filter(&mut self) -> Result<(), AgentError> { + fn update_packet_filter(&mut self) -> AgentResult<()> { let ports = self.port_subscriptions.get_subscribed_topics(); MIRROR_PORT_SUBSCRIPTION.set(ports.len() as i64); @@ -270,7 +270,7 @@ where } #[tracing::instrument(level = Level::TRACE, err)] - fn handle_command(&mut self, command: SnifferCommand) -> Result<(), AgentError> { + fn handle_command(&mut self, command: SnifferCommand) -> AgentResult<()> { match command { SnifferCommand { client_id, @@ -334,7 +334,7 @@ where &mut self, identifier: TcpSessionIdentifier, tcp_packet: TcpPacketData, - ) -> Result<(), AgentError> { + ) -> AgentResult<()> { let data_tx = match self.sessions.entry(identifier) { Entry::Occupied(e) => e, Entry::Vacant(e) => { diff --git a/mirrord/agent/src/sniffer/api.rs b/mirrord/agent/src/sniffer/api.rs index 2dea5534dce..08874e93124 100644 --- a/mirrord/agent/src/sniffer/api.rs +++ b/mirrord/agent/src/sniffer/api.rs @@ -14,7 +14,10 @@ use tokio_stream::{ StreamMap, StreamNotifyClose, }; -use super::messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}; +use super::{ + messages::{SniffedConnection, SnifferCommand, SnifferCommandInner}, + AgentResult, +}; use crate::{error::AgentError, util::ClientId, watched_task::TaskStatus}; /// Interface used by clients to interact with the @@ -57,7 +60,7 @@ impl TcpSnifferApi { client_id: ClientId, sniffer_sender: Sender, mut task_status: TaskStatus, - ) -> Result { + ) -> AgentResult { let (sender, receiver) = mpsc::channel(Self::CONNECTION_CHANNEL_SIZE); let command = SnifferCommand { @@ -81,7 +84,7 @@ impl TcpSnifferApi { /// Send the given command to the connected /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). - async fn send_command(&mut self, command: SnifferCommandInner) -> Result<(), AgentError> { + async fn send_command(&mut self, command: SnifferCommandInner) -> AgentResult<()> { let command = SnifferCommand { client_id: self.client_id, command, @@ -96,7 +99,7 @@ impl TcpSnifferApi { /// Return the next message from the connected /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). - pub async fn recv(&mut self) -> Result<(DaemonTcp, Option), AgentError> { + pub async fn recv(&mut self) -> AgentResult<(DaemonTcp, Option)> { tokio::select! { conn = self.receiver.recv() => match conn { Some(conn) => { @@ -162,7 +165,7 @@ impl TcpSnifferApi { /// Tansforms a [`LayerTcp`] message into a [`SnifferCommand`] and passes it to the connected /// [`TcpConnectionSniffer`](super::TcpConnectionSniffer). - pub async fn handle_client_message(&mut self, message: LayerTcp) -> Result<(), AgentError> { + pub async fn handle_client_message(&mut self, message: LayerTcp) -> AgentResult<()> { match message { LayerTcp::PortSubscribe(port) => { let (tx, rx) = oneshot::channel(); diff --git a/mirrord/agent/src/sniffer/tcp_capture.rs b/mirrord/agent/src/sniffer/tcp_capture.rs index 1d8031d08b3..dc8fb2bba04 100644 --- a/mirrord/agent/src/sniffer/tcp_capture.rs +++ b/mirrord/agent/src/sniffer/tcp_capture.rs @@ -12,7 +12,7 @@ use rawsocket::{filter::SocketFilterProgram, RawCapture}; use tokio::net::UdpSocket; use tracing::Level; -use super::{TcpPacketData, TcpSessionIdentifier}; +use super::{AgentResult, TcpPacketData, TcpSessionIdentifier}; use crate::error::AgentError; /// Trait for structs that are able to sniff incoming Ethernet packets and filter TCP packets. @@ -36,7 +36,7 @@ impl RawSocketTcpCapture { /// /// Returned instance initially uses a BPF filter that drops every packet. #[tracing::instrument(level = Level::DEBUG, err)] - pub async fn new(network_interface: Option, is_mesh: bool) -> Result { + pub async fn new(network_interface: Option, is_mesh: bool) -> AgentResult { // Priority is whatever the user set as an option to mirrord, then we check if we're in a // mesh to use `lo` interface, otherwise we try to get the appropriate interface. let interface = match network_interface.or_else(|| is_mesh.then(|| "lo".to_string())) { diff --git a/mirrord/agent/src/steal/api.rs b/mirrord/agent/src/steal/api.rs index d3ca2458bf5..e411f7971d0 100644 --- a/mirrord/agent/src/steal/api.rs +++ b/mirrord/agent/src/steal/api.rs @@ -11,12 +11,11 @@ use mirrord_protocol::{ }; use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio_stream::wrappers::ReceiverStream; +use tracing::Level; use super::*; use crate::{ - error::{AgentError, AgentResult}, - metrics::HTTP_REQUEST_IN_PROGRESS_COUNT, - util::ClientId, + error::AgentResult, metrics::HTTP_REQUEST_IN_PROGRESS_COUNT, util::ClientId, watched_task::TaskStatus, }; @@ -53,14 +52,14 @@ impl Drop for TcpStealerApi { impl TcpStealerApi { /// Initializes a [`TcpStealerApi`] and sends a message to [`TcpConnectionStealer`] signaling /// that we have a new client. - #[tracing::instrument(level = "trace")] + #[tracing::instrument(level = Level::TRACE, err)] pub(crate) async fn new( client_id: ClientId, command_tx: Sender, task_status: TaskStatus, channel_size: usize, protocol_version: semver::Version, - ) -> AgentResult { + ) -> AgentResult { let (daemon_tx, daemon_rx) = mpsc::channel(channel_size); command_tx @@ -116,10 +115,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::PortSubscribe`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn port_subscribe( - &mut self, - port_steal: StealType, - ) -> AgentResult<(), AgentError> { + pub(crate) async fn port_subscribe(&mut self, port_steal: StealType) -> AgentResult<()> { self.send_command(Command::PortSubscribe(port_steal)).await } @@ -127,7 +123,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::PortUnsubscribe`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn port_unsubscribe(&mut self, port: Port) -> AgentResult<(), AgentError> { + pub(crate) async fn port_unsubscribe(&mut self, port: Port) -> AgentResult<()> { self.send_command(Command::PortUnsubscribe(port)).await } @@ -138,7 +134,7 @@ impl TcpStealerApi { pub(crate) async fn connection_unsubscribe( &mut self, connection_id: ConnectionId, - ) -> AgentResult<(), AgentError> { + ) -> AgentResult<()> { self.send_command(Command::ConnectionUnsubscribe(connection_id)) .await } @@ -147,7 +143,7 @@ impl TcpStealerApi { /// agent, to an internal stealer command [`Command::ResponseData`]. /// /// The actual handling of this message is done in [`TcpConnectionStealer`]. - pub(crate) async fn client_data(&mut self, tcp_data: TcpData) -> AgentResult<(), AgentError> { + pub(crate) async fn client_data(&mut self, tcp_data: TcpData) -> AgentResult<()> { self.send_command(Command::ResponseData(tcp_data)).await } @@ -158,14 +154,14 @@ impl TcpStealerApi { pub(crate) async fn http_response( &mut self, response: HttpResponseFallback, - ) -> AgentResult<(), AgentError> { + ) -> AgentResult<()> { self.send_command(Command::HttpResponse(response)).await } pub(crate) async fn switch_protocol_version( &mut self, version: semver::Version, - ) -> AgentResult<(), AgentError> { + ) -> AgentResult<()> { self.send_command(Command::SwitchProtocolVersion(version)) .await } diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 7a1c358def9..f4d7589ee06 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -32,7 +32,7 @@ use tokio_util::sync::CancellationToken; use tracing::{warn, Level}; use crate::{ - error::{AgentError, AgentResult}, + error::AgentResult, metrics::HTTP_REQUEST_IN_PROGRESS_COUNT, steal::{ connections::{ @@ -360,10 +360,7 @@ impl TcpConnectionStealer { /// /// 4. Handling the cancellation of the whole stealer thread (given `cancellation_token`). #[tracing::instrument(level = "trace", skip(self))] - pub(crate) async fn start( - mut self, - cancellation_token: CancellationToken, - ) -> AgentResult<(), AgentError> { + pub(crate) async fn start(mut self, cancellation_token: CancellationToken) -> AgentResult<()> { loop { tokio::select! { command = self.command_rx.recv() => { @@ -441,10 +438,7 @@ impl TcpConnectionStealer { /// Handles an update from one of the connections in [`Self::connections`]. #[tracing::instrument(level = "trace", skip(self))] - async fn handle_connection_update( - &mut self, - update: ConnectionMessageOut, - ) -> AgentResult<(), AgentError> { + async fn handle_connection_update(&mut self, update: ConnectionMessageOut) -> AgentResult<()> { match update { ConnectionMessageOut::Closed { connection_id, @@ -612,7 +606,7 @@ impl TcpConnectionStealer { /// their subscriptions from [`Self::port_subscriptions`] and all their open /// connections. #[tracing::instrument(level = "trace", skip(self))] - async fn close_client(&mut self, client_id: ClientId) -> AgentResult<(), AgentError> { + async fn close_client(&mut self, client_id: ClientId) -> AgentResult<()> { self.port_subscriptions.remove_all(client_id).await?; let client = self.clients.remove(&client_id).expect("client not found"); @@ -669,7 +663,7 @@ impl TcpConnectionStealer { /// Handles [`Command`]s that were received by [`TcpConnectionStealer::command_rx`]. #[tracing::instrument(level = Level::TRACE, skip(self), err)] - async fn handle_command(&mut self, command: StealerCommand) -> AgentResult<(), AgentError> { + async fn handle_command(&mut self, command: StealerCommand) -> AgentResult<()> { let StealerCommand { client_id, command } = command; match command { diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index 65616e6eb4e..dfe05ea3fbe 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -17,7 +17,7 @@ use super::{ ip_tables::{new_ip6tables, new_iptables, IPTablesWrapper, SafeIpTables}, }; use crate::{ - error::AgentError, + error::{AgentError, AgentResult}, metrics::{STEAL_FILTERED_PORT_SUBSCRIPTION, STEAL_UNFILTERED_PORT_SUBSCRIPTION}, util::ClientId, }; @@ -153,7 +153,7 @@ impl IpTablesRedirector { flush_connections: bool, pod_ips: Option, support_ipv6: bool, - ) -> Result { + ) -> AgentResult { let (pod_ips4, pod_ips6) = pod_ips.map_or_else( || (None, None), |ips| { diff --git a/mirrord/agent/src/util.rs b/mirrord/agent/src/util.rs index 9dcbc6cd892..0c72cddd82f 100644 --- a/mirrord/agent/src/util.rs +++ b/mirrord/agent/src/util.rs @@ -12,7 +12,7 @@ use tokio::sync::mpsc; use tracing::error; use crate::{ - error::AgentError, + error::AgentResult, namespace::{set_namespace, NamespaceType}, }; @@ -151,7 +151,7 @@ where /// Many of the agent's TCP/UDP connections require that they're made from the `pid`'s namespace to /// work. #[tracing::instrument(level = "trace")] -pub(crate) fn enter_namespace(pid: Option, namespace: &str) -> Result<(), AgentError> { +pub(crate) fn enter_namespace(pid: Option, namespace: &str) -> AgentResult<()> { if let Some(pid) = pid { Ok(set_namespace(pid, NamespaceType::Net).inspect_err(|fail| { error!("Failed setting pid {pid:#?} namespace {namespace:#?} with {fail:#?}") diff --git a/mirrord/agent/src/watched_task.rs b/mirrord/agent/src/watched_task.rs index 0212f279163..ad06bb238ee 100644 --- a/mirrord/agent/src/watched_task.rs +++ b/mirrord/agent/src/watched_task.rs @@ -2,7 +2,7 @@ use std::future::Future; use tokio::sync::watch::{self, Receiver, Sender}; -use crate::error::AgentError; +use crate::error::{AgentError, AgentResult}; /// A shared clonable view on a background task's status. #[derive(Debug, Clone)] @@ -83,7 +83,7 @@ impl WatchedTask { impl WatchedTask where - T: Future>, + T: Future>, { /// Execute the wrapped task. /// Store its result in the inner [`TaskStatus`]. From 42fbade79d7c96baad4ed9cc16f747d8bac05f21 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 15 Jan 2025 16:19:50 -0300 Subject: [PATCH 74/85] allow type complexity --- mirrord/agent/src/outgoing/udp.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 75f0020af75..5f568664368 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -37,6 +37,7 @@ use crate::{ struct UdpOutgoingTask { next_connection_id: ConnectionId, /// Writing halves of peer connections made on layer's requests. + #[allow(clippy::type_complexity)] writers: HashMap< ConnectionId, ( @@ -169,6 +170,7 @@ impl UdpOutgoingTask { } /// Returns [`Err`] only when the client has disconnected. + #[allow(clippy::type_complexity)] #[tracing::instrument(level = Level::TRACE, ret)] async fn handle_layer_msg( &mut self, From a94c9f808bc1494fba364379c8b109ae0034dfe0 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 15 Jan 2025 16:37:11 -0300 Subject: [PATCH 75/85] Remove a few extra AgentResult --- mirrord/agent/src/dns.rs | 2 +- mirrord/agent/src/file.rs | 2 +- mirrord/agent/src/outgoing.rs | 4 ++-- mirrord/agent/src/outgoing/udp.rs | 6 +++--- mirrord/agent/src/steal/connection.rs | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index 23f21e5f5b4..18ac4e6f12a 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -83,7 +83,7 @@ impl DnsWorker { // Prepares the `Resolver` after reading some `/etc` DNS files. // // We care about logging these errors, at an `error!` level. - let resolver: AgentResult<_, ResponseError> = try { + let resolver: Result<_, ResponseError> = try { let resolv_conf_path = etc_path.join("resolv.conf"); let hosts_path = etc_path.join("hosts"); diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 46c0ae65ea0..a51e80030be 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -866,7 +866,7 @@ impl FileManager { // buffer (and there was no error converting to a // `DirEntryInternal`. while let Some(entry) = entry_results - .next_if(|entry_res: &AgentResult| { + .next_if(|entry_res: &Result| { entry_res.as_ref().is_ok_and(|entry| { entry.get_d_reclen64() as u64 + result_size <= buffer_size }) diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 6aaab6a641a..2131763d715 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -198,7 +198,7 @@ impl TcpOutgoingTask { &mut self, connection_id: ConnectionId, read: io::Result>, - ) -> AgentResult<(), SendError> { + ) -> Result<(), SendError> { match read { // New bytes came in from a peer connection. // We pass them to the layer. @@ -272,7 +272,7 @@ impl TcpOutgoingTask { async fn handle_layer_msg( &mut self, message: LayerTcpOutgoing, - ) -> AgentResult<(), SendError> { + ) -> Result<(), SendError> { match message { // We make connection to the requested address, split the stream into halves with // `io::split`, and put them into respective maps. diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 5f568664368..a2ed3516ea3 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -128,7 +128,7 @@ impl UdpOutgoingTask { &mut self, connection_id: ConnectionId, read: io::Result>, - ) -> AgentResult<(), SendError> { + ) -> Result<(), SendError> { match read { Ok(Some(read)) => { let message = DaemonUdpOutgoing::Read(Ok(DaemonRead { @@ -175,7 +175,7 @@ impl UdpOutgoingTask { async fn handle_layer_msg( &mut self, message: LayerUdpOutgoing, - ) -> AgentResult<(), SendError> { + ) -> Result<(), SendError> { match message { // [user] -> [layer] -> [agent] -> [layer] // `user` is asking us to connect to some remote host. @@ -296,7 +296,7 @@ pub(crate) struct UdpOutgoingApi { /// 3. User is trying to use `sendto` and `recvfrom`, we use the same hack as in DNS to fake a /// connection. #[tracing::instrument(level = Level::TRACE, ret, err(level = Level::DEBUG))] -async fn connect(remote_address: SocketAddress) -> AgentResult { +async fn connect(remote_address: SocketAddress) -> Result { let remote_address = remote_address.try_into()?; let mirror_address = match remote_address { std::net::SocketAddr::V4(_) => SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0), diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index f4d7589ee06..171cf28e2f7 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -72,7 +72,7 @@ impl MatchedHttpRequest { } } - async fn into_serializable(self) -> AgentResult, hyper::Error> { + async fn into_serializable(self) -> Result, hyper::Error> { let ( Parts { method, @@ -102,7 +102,7 @@ impl MatchedHttpRequest { }) } - async fn into_serializable_fallback(self) -> AgentResult>, hyper::Error> { + async fn into_serializable_fallback(self) -> Result>, hyper::Error> { let ( Parts { method, From d051eb1acae330e170c84d186c6351b840b4ca91 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Thu, 16 Jan 2025 11:11:23 -0300 Subject: [PATCH 76/85] bump protocol --- mirrord/protocol/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/protocol/Cargo.toml b/mirrord/protocol/Cargo.toml index 7d787e1e5c6..e6c9980c15b 100644 --- a/mirrord/protocol/Cargo.toml +++ b/mirrord/protocol/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mirrord-protocol" -version = "1.13.3" +version = "1.13.4" authors.workspace = true description.workspace = true documentation.workspace = true From 4eea59f614f2421b220894782ed473a1fb3ca998 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 20 Jan 2025 17:47:08 -0300 Subject: [PATCH 77/85] fix link doc --- Cargo.lock | 2 +- mirrord/agent/src/outgoing/udp.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f4c41f43f6..8ef5da6b370 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4495,7 +4495,7 @@ dependencies = [ [[package]] name = "mirrord-protocol" -version = "1.13.3" +version = "1.13.4" dependencies = [ "actix-codec", "bincode", diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index a2ed3516ea3..02d6415f06c 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -47,7 +47,7 @@ struct UdpOutgoingTask { >, /// Reading halves of peer connections made on layer's requests. readers: StreamMap>>, - /// Optional pid of agent's target. Used in [`SocketStream::connect`]. + /// Optional pid of agent's target. Used in `SocketStream::connect`. pid: Option, layer_rx: Receiver, daemon_tx: Sender, From 3eadd9acc9b17dcfc0b292cb2a62bfdfc441920c Mon Sep 17 00:00:00 2001 From: meowjesty Date: Mon, 20 Jan 2025 17:48:04 -0300 Subject: [PATCH 78/85] fix wrong doc --- mirrord/agent/src/outgoing/udp.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 02d6415f06c..7d45a56bf7a 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -87,8 +87,9 @@ impl UdpOutgoingTask { } } - /// Runs this task as long as the channels connecting it with [`TcpOutgoingApi`] are open. - /// This routine never fails and returns [`Result`] only due to [`WatchedTask`] constraints. + /// Runs this task as long as the channels connecting it with [`UdpOutgoingApi`] are open. + /// This routine never fails and returns [`AgentResult`] only due to [`WatchedTask`] + /// constraints. #[tracing::instrument(level = Level::TRACE, skip(self))] pub(super) async fn run(mut self) -> AgentResult<()> { loop { From 00fc091e16695f685b2f456f18e4b16b080098e5 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 21 Jan 2025 17:23:19 -0300 Subject: [PATCH 79/85] Put global gauges in state so tests dont explode. --- mirrord/agent/src/dns.rs | 4 +- mirrord/agent/src/entrypoint.rs | 4 +- mirrord/agent/src/file.rs | 18 +- mirrord/agent/src/metrics.rs | 264 ++++++++++++------ mirrord/agent/src/outgoing.rs | 15 +- mirrord/agent/src/outgoing/udp.rs | 13 +- mirrord/agent/src/sniffer.rs | 8 +- mirrord/agent/src/steal/api.rs | 32 ++- mirrord/agent/src/steal/connection.rs | 2 +- .../agent/src/steal/connections/filtered.rs | 11 +- .../agent/src/steal/connections/unfiltered.rs | 14 +- mirrord/agent/src/steal/subscriptions.rs | 22 +- 12 files changed, 256 insertions(+), 151 deletions(-) diff --git a/mirrord/agent/src/dns.rs b/mirrord/agent/src/dns.rs index 18ac4e6f12a..0ca85badea9 100644 --- a/mirrord/agent/src/dns.rs +++ b/mirrord/agent/src/dns.rs @@ -123,7 +123,7 @@ impl DnsWorker { let timeout = self.timeout; let attempts = self.attempts; - DNS_REQUEST_COUNT.inc(); + DNS_REQUEST_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); let lookup_future = async move { let result = Self::do_lookup(etc_path, message.request.node, attempts, timeout).await; @@ -131,7 +131,7 @@ impl DnsWorker { if let Err(result) = message.response_tx.send(result) { tracing::error!(?result, "Failed to send query response"); } - DNS_REQUEST_COUNT.dec(); + DNS_REQUEST_COUNT.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); }; tokio::spawn(lookup_future); diff --git a/mirrord/agent/src/entrypoint.rs b/mirrord/agent/src/entrypoint.rs index a2f2c07acb4..d48f96a7fbb 100644 --- a/mirrord/agent/src/entrypoint.rs +++ b/mirrord/agent/src/entrypoint.rs @@ -208,7 +208,7 @@ struct ClientConnectionHandler { impl Drop for ClientConnectionHandler { fn drop(&mut self) { - CLIENT_COUNT.dec(); + CLIENT_COUNT.fetch_sub(1, Ordering::Relaxed); } } @@ -245,7 +245,7 @@ impl ClientConnectionHandler { ready_for_logs: false, }; - CLIENT_COUNT.inc(); + CLIENT_COUNT.fetch_add(1, Ordering::Relaxed); Ok(client_handler) } diff --git a/mirrord/agent/src/file.rs b/mirrord/agent/src/file.rs index 574f2ad1896..0fa945fbd85 100644 --- a/mirrord/agent/src/file.rs +++ b/mirrord/agent/src/file.rs @@ -80,7 +80,7 @@ impl Drop for FileManager { fn drop(&mut self) { let descriptors = self.open_files.len() + self.dir_streams.len() + self.getdents_streams.len(); - OPEN_FD_COUNT.sub(descriptors as i64); + OPEN_FD_COUNT.fetch_sub(descriptors as i64, std::sync::atomic::Ordering::Relaxed); } } @@ -306,7 +306,7 @@ impl FileManager { }; if self.open_files.insert(fd, remote_file).is_none() { - OPEN_FD_COUNT.inc(); + OPEN_FD_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); } Ok(OpenFileResponse { fd }) @@ -342,7 +342,7 @@ impl FileManager { }; if self.open_files.insert(fd, remote_file).is_none() { - OPEN_FD_COUNT.inc(); + OPEN_FD_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); } Ok(OpenFileResponse { fd }) @@ -643,7 +643,7 @@ impl FileManager { if self.open_files.remove(&fd).is_none() { error!(fd, "fd not found!"); } else { - OPEN_FD_COUNT.dec(); + OPEN_FD_COUNT.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); } None @@ -657,10 +657,10 @@ impl FileManager { let closed_getdents_stream = self.getdents_streams.remove(&fd); if closed_dir_stream.is_some() && closed_getdents_stream.is_some() { - OPEN_FD_COUNT.dec(); - OPEN_FD_COUNT.dec(); + // Closed `dirstream` and `dentsstream` + OPEN_FD_COUNT.fetch_sub(2, std::sync::atomic::Ordering::Relaxed); } else if closed_dir_stream.is_some() || closed_getdents_stream.is_some() { - OPEN_FD_COUNT.dec(); + OPEN_FD_COUNT.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); } else { error!("FileManager::close_dir -> fd {:#?} not found", fd); } @@ -788,7 +788,7 @@ impl FileManager { let dir_stream = path.read_dir()?.enumerate(); if self.dir_streams.insert(fd, dir_stream).is_none() { - OPEN_FD_COUNT.inc(); + OPEN_FD_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); } Ok(OpenDirResponse { fd }) @@ -851,7 +851,7 @@ impl FileManager { let current_and_parent = Self::get_current_and_parent_entries(dir); let stream = GetDEnts64Stream::new(dir.read_dir()?, current_and_parent).peekable(); - OPEN_FD_COUNT.inc(); + OPEN_FD_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); Ok(e.insert(stream)) } }, diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 5332ad2e7db..f7486e411cf 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -1,6 +1,9 @@ -use std::{net::SocketAddr, sync::LazyLock}; +use std::{ + net::SocketAddr, + sync::{atomic::AtomicI64, Arc}, +}; -use axum::{routing::get, Router}; +use axum::{extract::State, routing::get, Router}; use http::StatusCode; use prometheus::{register_int_gauge, IntGauge}; use tokio::net::TcpListener; @@ -11,115 +14,186 @@ use crate::error::AgentError; /// Incremented whenever we get a new client in `ClientConnectionHandler`, and decremented /// when this client is dropped. -pub(crate) static CLIENT_COUNT: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_client_count", - "amount of connected clients to this mirrord-agent" - ) - .expect("Valid at initialization!") -}); +pub(crate) static CLIENT_COUNT: AtomicI64 = AtomicI64::new(0); /// Incremented whenever we handle a new `DnsCommand`, and decremented after the result of /// `do_lookup` has been sent back through the response channel. -pub(crate) static DNS_REQUEST_COUNT: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_dns_request_count", - "amount of in-progress dns requests in the mirrord-agent" - ) - .expect("Valid at initialization!") -}); +pub(crate) static DNS_REQUEST_COUNT: AtomicI64 = AtomicI64::new(0); /// Incremented and decremented in _open-ish_/_close-ish_ file operations in `FileManager`, /// Also gets decremented when `FileManager` is dropped. -pub(crate) static OPEN_FD_COUNT: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_open_fd_count", - "amount of open file descriptors in mirrord-agent file manager" - ) - .expect("Valid at initialization!") -}); +pub(crate) static OPEN_FD_COUNT: AtomicI64 = AtomicI64::new(0); /// Follows the amount of subscribed ports in `update_packet_filter`. We don't really /// increment/decrement this one, and mostly `set` it to the latest amount of ports, zeroing it when /// the `TcpConnectionSniffer` gets dropped. -pub(crate) static MIRROR_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_mirror_port_subscription_count", - "amount of mirror port subscriptions in mirror-agent" - ) - .expect("Valid at initialization") -}); - -pub(crate) static MIRROR_CONNECTION_SUBSCRIPTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_mirror_connection_subscription_count", - "amount of connections in mirror mode in mirrord-agent" - ) - .expect("Valid at initialization!") -}); - -pub(crate) static STEAL_FILTERED_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_steal_filtered_port_subscription_count", - "amount of filtered steal port subscriptions in mirrord-agent" - ) - .expect("Valid at initialization!") -}); - -pub(crate) static STEAL_UNFILTERED_PORT_SUBSCRIPTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_steal_unfiltered_port_subscription_count", - "amount of unfiltered steal port subscriptions in mirrord-agent" - ) - .expect("Valid at initialization!") -}); - -pub(crate) static STEAL_FILTERED_CONNECTION_SUBSCRIPTION: LazyLock = - LazyLock::new(|| { - register_int_gauge!( +pub(crate) static MIRROR_PORT_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static MIRROR_CONNECTION_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static STEAL_FILTERED_PORT_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static STEAL_UNFILTERED_PORT_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static STEAL_FILTERED_CONNECTION_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static HTTP_REQUEST_IN_PROGRESS_COUNT: AtomicI64 = AtomicI64::new(0); + +pub(crate) static TCP_OUTGOING_CONNECTION: AtomicI64 = AtomicI64::new(0); + +pub(crate) static UDP_OUTGOING_CONNECTION: AtomicI64 = AtomicI64::new(0); + +#[derive(Debug)] +struct Metrics { + client_count: IntGauge, + dns_request_count: IntGauge, + open_fd_count: IntGauge, + mirror_port_subscription: IntGauge, + mirror_connection_subscription: IntGauge, + steal_filtered_port_subscription: IntGauge, + steal_unfiltered_port_subscription: IntGauge, + steal_filtered_connection_subscription: IntGauge, + steal_unfiltered_connection_subscription: IntGauge, + http_request_in_progress_count: IntGauge, + tcp_outgoing_connection: IntGauge, + udp_outgoing_connection: IntGauge, +} + +impl Metrics { + fn new() -> Self { + let client_count = register_int_gauge!( + "mirrord_agent_client_count", + "amount of connected clients to this mirrord-agent" + ) + .expect("Valid at initialization!"); + + let dns_request_count = register_int_gauge!( + "mirrord_agent_dns_request_count", + "amount of in-progress dns requests in the mirrord-agent" + ) + .expect("Valid at initialization!"); + + let open_fd_count = register_int_gauge!( + "mirrord_agent_open_fd_count", + "amount of open file descriptors in mirrord-agent file manager" + ) + .expect("Valid at initialization!"); + + let mirror_port_subscription = register_int_gauge!( + "mirrord_agent_mirror_port_subscription_count", + "amount of mirror port subscriptions in mirror-agent" + ) + .expect("Valid at initialization"); + + let mirror_connection_subscription = register_int_gauge!( + "mirrord_agent_mirror_connection_subscription_count", + "amount of connections in mirror mode in mirrord-agent" + ) + .expect("Valid at initialization!"); + + let steal_filtered_port_subscription = register_int_gauge!( + "mirrord_agent_steal_filtered_port_subscription_count", + "amount of filtered steal port subscriptions in mirrord-agent" + ) + .expect("Valid at initialization!"); + + let steal_unfiltered_port_subscription = register_int_gauge!( + "mirrord_agent_steal_unfiltered_port_subscription_count", + "amount of unfiltered steal port subscriptions in mirrord-agent" + ) + .expect("Valid at initialization!"); + + let steal_filtered_connection_subscription = register_int_gauge!( "mirrord_agent_steal_connection_subscription_count", "amount of filtered connections in steal mode in mirrord-agent" ) - .expect("Valid at initialization!") - }); + .expect("Valid at initialization!"); -pub(crate) static STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION: LazyLock = - LazyLock::new(|| { - register_int_gauge!( + let steal_unfiltered_connection_subscription = register_int_gauge!( "mirrord_agent_steal_connection_subscription_count", "amount of unfiltered connections in steal mode in mirrord-agent" ) - .expect("Valid at initialization!") - }); - -pub(crate) static HTTP_REQUEST_IN_PROGRESS_COUNT: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_http_request_in_progress_count", - "amount of in-progress http requests in the mirrord-agent" - ) - .expect("Valid at initialization!") -}); - -pub(crate) static TCP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_tcp_outgoing_connection_count", - "amount of tcp outgoing connections in mirrord-agent" - ) - .expect("Valid at initialization!") -}); - -pub(crate) static UDP_OUTGOING_CONNECTION: LazyLock = LazyLock::new(|| { - register_int_gauge!( - "mirrord_agent_udp_outgoing_connection_count", - "amount of udp outgoing connections in mirrord-agent" - ) - .expect("Valid at initialization!") -}); + .expect("Valid at initialization!"); + + let http_request_in_progress_count = register_int_gauge!( + "mirrord_agent_http_request_in_progress_count", + "amount of in-progress http requests in the mirrord-agent" + ) + .expect("Valid at initialization!"); + + let tcp_outgoing_connection = register_int_gauge!( + "mirrord_agent_tcp_outgoing_connection_count", + "amount of tcp outgoing connections in mirrord-agent" + ) + .expect("Valid at initialization!"); + + let udp_outgoing_connection = register_int_gauge!( + "mirrord_agent_udp_outgoing_connection_count", + "amount of udp outgoing connections in mirrord-agent" + ) + .expect("Valid at initialization!"); + + Self { + client_count, + dns_request_count, + open_fd_count, + mirror_port_subscription, + mirror_connection_subscription, + steal_filtered_port_subscription, + steal_unfiltered_port_subscription, + steal_filtered_connection_subscription, + steal_unfiltered_connection_subscription, + http_request_in_progress_count, + tcp_outgoing_connection, + udp_outgoing_connection, + } + } + + fn update_gauges(&self) { + use std::sync::atomic::Ordering; + + let Self { + client_count, + dns_request_count, + open_fd_count, + mirror_port_subscription, + mirror_connection_subscription, + steal_filtered_port_subscription, + steal_unfiltered_port_subscription, + steal_filtered_connection_subscription, + steal_unfiltered_connection_subscription, + http_request_in_progress_count, + tcp_outgoing_connection, + udp_outgoing_connection, + } = self; + + client_count.set(CLIENT_COUNT.load(Ordering::Relaxed)); + dns_request_count.set(DNS_REQUEST_COUNT.load(Ordering::Relaxed)); + open_fd_count.set(OPEN_FD_COUNT.load(Ordering::Relaxed)); + mirror_port_subscription.set(MIRROR_PORT_SUBSCRIPTION.load(Ordering::Relaxed)); + mirror_connection_subscription.set(MIRROR_CONNECTION_SUBSCRIPTION.load(Ordering::Relaxed)); + steal_filtered_port_subscription + .set(STEAL_FILTERED_PORT_SUBSCRIPTION.load(Ordering::Relaxed)); + steal_unfiltered_port_subscription + .set(STEAL_UNFILTERED_PORT_SUBSCRIPTION.load(Ordering::Relaxed)); + steal_filtered_connection_subscription + .set(STEAL_FILTERED_CONNECTION_SUBSCRIPTION.load(Ordering::Relaxed)); + steal_unfiltered_connection_subscription + .set(STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.load(Ordering::Relaxed)); + http_request_in_progress_count.set(HTTP_REQUEST_IN_PROGRESS_COUNT.load(Ordering::Relaxed)); + tcp_outgoing_connection.set(TCP_OUTGOING_CONNECTION.load(Ordering::Relaxed)); + udp_outgoing_connection.set(UDP_OUTGOING_CONNECTION.load(Ordering::Relaxed)); + } +} #[tracing::instrument(level = Level::TRACE, ret)] -async fn get_metrics() -> (StatusCode, String) { +async fn get_metrics(State(state): State>) -> (StatusCode, String) { use prometheus::TextEncoder; + state.update_gauges(); + let metric_families = prometheus::gather(); match TextEncoder.encode_to_string(&metric_families) { Ok(response) => (StatusCode::OK, response), @@ -135,7 +209,11 @@ pub(crate) async fn start_metrics( address: SocketAddr, cancellation_token: CancellationToken, ) -> Result<(), axum::BoxError> { - let app = Router::new().route("/metrics", get(get_metrics)); + let metrics_state = Arc::new(Metrics::new()); + + let app = Router::new() + .route("/metrics", get(get_metrics)) + .with_state(metrics_state); let listener = TcpListener::bind(address) .await @@ -158,7 +236,7 @@ pub(crate) async fn start_metrics( #[cfg(test)] mod tests { - use std::time::Duration; + use std::{sync::atomic::Ordering, time::Duration}; use tokio_util::sync::CancellationToken; @@ -177,7 +255,7 @@ mod tests { .unwrap() }); - OPEN_FD_COUNT.inc(); + OPEN_FD_COUNT.fetch_add(1, Ordering::Relaxed); // Give the server some time to start. tokio::time::sleep(Duration::from_secs(1)).await; diff --git a/mirrord/agent/src/outgoing.rs b/mirrord/agent/src/outgoing.rs index 2131763d715..96a063d7a05 100644 --- a/mirrord/agent/src/outgoing.rs +++ b/mirrord/agent/src/outgoing.rs @@ -116,7 +116,7 @@ struct TcpOutgoingTask { impl Drop for TcpOutgoingTask { fn drop(&mut self) { let connections = self.readers.keys().chain(self.writers.keys()).count(); - TCP_OUTGOING_CONNECTION.sub(connections as i64); + TCP_OUTGOING_CONNECTION.fetch_sub(connections as i64, std::sync::atomic::Ordering::Relaxed); } } @@ -224,7 +224,7 @@ impl TcpOutgoingTask { self.readers.remove(&connection_id); self.writers.remove(&connection_id); - TCP_OUTGOING_CONNECTION.dec(); + TCP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); let daemon_message = DaemonTcpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; @@ -255,7 +255,7 @@ impl TcpOutgoingTask { "Layer connection is shut down as well, sending close message.", ); - TCP_OUTGOING_CONNECTION.dec(); + TCP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) @@ -298,7 +298,7 @@ impl TcpOutgoingTask { connection_id, ReaderStream::with_capacity(read_half, Self::READ_BUFFER_SIZE), ); - TCP_OUTGOING_CONNECTION.inc(); + TCP_OUTGOING_CONNECTION.fetch_add(1, std::sync::atomic::Ordering::Relaxed); Ok(DaemonConnect { connection_id, @@ -356,7 +356,8 @@ impl TcpOutgoingTask { connection_id, "Peer connection is shut down as well, sending close message to the client.", ); - TCP_OUTGOING_CONNECTION.dec(); + TCP_OUTGOING_CONNECTION + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); self.daemon_tx .send(DaemonTcpOutgoing::Close(connection_id)) @@ -371,7 +372,7 @@ impl TcpOutgoingTask { Err(error) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - TCP_OUTGOING_CONNECTION.dec(); + TCP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tracing::trace!( connection_id, @@ -392,7 +393,7 @@ impl TcpOutgoingTask { LayerTcpOutgoing::Close(LayerClose { connection_id }) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - TCP_OUTGOING_CONNECTION.dec(); + TCP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); Ok(()) } diff --git a/mirrord/agent/src/outgoing/udp.rs b/mirrord/agent/src/outgoing/udp.rs index 7d45a56bf7a..0dab137a92b 100644 --- a/mirrord/agent/src/outgoing/udp.rs +++ b/mirrord/agent/src/outgoing/udp.rs @@ -56,7 +56,7 @@ struct UdpOutgoingTask { impl Drop for UdpOutgoingTask { fn drop(&mut self) { let connections = self.readers.keys().chain(self.writers.keys()).count(); - UDP_OUTGOING_CONNECTION.sub(connections as i64); + UDP_OUTGOING_CONNECTION.fetch_sub(connections as i64, std::sync::atomic::Ordering::Relaxed); } } @@ -152,7 +152,7 @@ impl UdpOutgoingTask { self.readers.remove(&connection_id); self.writers.remove(&connection_id); - UDP_OUTGOING_CONNECTION.dec(); + UDP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); let daemon_message = DaemonUdpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; @@ -160,7 +160,7 @@ impl UdpOutgoingTask { Ok(None) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - UDP_OUTGOING_CONNECTION.dec(); + UDP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); let daemon_message = DaemonUdpOutgoing::Close(connection_id); self.daemon_tx.send(daemon_message).await?; @@ -201,7 +201,8 @@ impl UdpOutgoingTask { self.writers.insert(connection_id, (sink, peer_address)); self.readers.insert(connection_id, stream); - UDP_OUTGOING_CONNECTION.inc(); + UDP_OUTGOING_CONNECTION + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); Ok(DaemonConnect { connection_id, @@ -244,7 +245,7 @@ impl UdpOutgoingTask { Err(error) => { self.writers.remove(&connection_id); self.readers.remove(&connection_id); - UDP_OUTGOING_CONNECTION.dec(); + UDP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tracing::trace!( connection_id, @@ -264,7 +265,7 @@ impl UdpOutgoingTask { LayerUdpOutgoing::Close(LayerClose { ref connection_id }) => { self.writers.remove(connection_id); self.readers.remove(connection_id); - UDP_OUTGOING_CONNECTION.dec(); + UDP_OUTGOING_CONNECTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); Ok(()) } diff --git a/mirrord/agent/src/sniffer.rs b/mirrord/agent/src/sniffer.rs index 6a653587566..0d5cccfb584 100644 --- a/mirrord/agent/src/sniffer.rs +++ b/mirrord/agent/src/sniffer.rs @@ -144,8 +144,8 @@ pub(crate) struct TcpConnectionSniffer { impl Drop for TcpConnectionSniffer { fn drop(&mut self) { - MIRROR_PORT_SUBSCRIPTION.set(0); - MIRROR_CONNECTION_SUBSCRIPTION.set(0); + MIRROR_PORT_SUBSCRIPTION.store(0, std::sync::atomic::Ordering::Relaxed); + MIRROR_CONNECTION_SUBSCRIPTION.store(0, std::sync::atomic::Ordering::Relaxed); } } @@ -255,7 +255,7 @@ where #[tracing::instrument(level = Level::TRACE, err)] fn update_packet_filter(&mut self) -> AgentResult<()> { let ports = self.port_subscriptions.get_subscribed_topics(); - MIRROR_PORT_SUBSCRIPTION.set(ports.len() as i64); + MIRROR_PORT_SUBSCRIPTION.store(ports.len() as i64, std::sync::atomic::Ordering::Relaxed); let filter = if ports.is_empty() { tracing::trace!("No ports subscribed, setting dummy bpf"); @@ -403,7 +403,7 @@ where } } - MIRROR_CONNECTION_SUBSCRIPTION.inc(); + MIRROR_CONNECTION_SUBSCRIPTION.fetch_add(1, std::sync::atomic::Ordering::Relaxed); e.insert_entry(data_tx) } }; diff --git a/mirrord/agent/src/steal/api.rs b/mirrord/agent/src/steal/api.rs index a2739590e8e..2fc5733f8fa 100644 --- a/mirrord/agent/src/steal/api.rs +++ b/mirrord/agent/src/steal/api.rs @@ -52,7 +52,7 @@ pub(crate) struct TcpStealerApi { impl Drop for TcpStealerApi { fn drop(&mut self) { - HTTP_REQUEST_IN_PROGRESS_COUNT.set(0); + HTTP_REQUEST_IN_PROGRESS_COUNT.store(0, std::sync::atomic::Ordering::Relaxed); } } @@ -110,7 +110,10 @@ impl TcpStealerApi { if let DaemonTcp::Close(close) = &msg { self.response_body_txs .retain(|(key_id, _), _| *key_id != close.connection_id); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); } Ok(msg) } @@ -182,7 +185,10 @@ impl TcpStealerApi { LayerTcpSteal::ConnectionUnsubscribe(connection_id) => { self.response_body_txs .retain(|(key_id, _), _| *key_id != connection_id); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); self.connection_unsubscribe(connection_id).await } @@ -214,7 +220,10 @@ impl TcpStealerApi { let key = (response.connection_id, response.request_id); self.response_body_txs.insert(key, tx.clone()); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); self.http_response(HttpResponseFallback::Streamed(http_response)) .await?; @@ -222,7 +231,10 @@ impl TcpStealerApi { for frame in response.internal_response.body { if let Err(err) = tx.send(Ok(frame.into())).await { self.response_body_txs.remove(&key); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); tracing::trace!(?err, "error while sending streaming response frame"); } } @@ -245,14 +257,20 @@ impl TcpStealerApi { } if send_err || body.is_last { self.response_body_txs.remove(key); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); }; Ok(()) } ChunkedResponse::Error(err) => { self.response_body_txs .remove(&(err.connection_id, err.request_id)); - HTTP_REQUEST_IN_PROGRESS_COUNT.set(self.response_body_txs.len() as i64); + HTTP_REQUEST_IN_PROGRESS_COUNT.store( + self.response_body_txs.len() as i64, + std::sync::atomic::Ordering::Relaxed, + ); tracing::trace!(?err, "ChunkedResponse error received"); Ok(()) } diff --git a/mirrord/agent/src/steal/connection.rs b/mirrord/agent/src/steal/connection.rs index 849d9397461..6515f2ecfc9 100644 --- a/mirrord/agent/src/steal/connection.rs +++ b/mirrord/agent/src/steal/connection.rs @@ -62,7 +62,7 @@ impl MatchedHttpRequest { request_id: RequestId, request: Request, ) -> Self { - HTTP_REQUEST_IN_PROGRESS_COUNT.inc(); + HTTP_REQUEST_IN_PROGRESS_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); Self { connection_id, diff --git a/mirrord/agent/src/steal/connections/filtered.rs b/mirrord/agent/src/steal/connections/filtered.rs index b1507295e3b..ecc9f0064ad 100644 --- a/mirrord/agent/src/steal/connections/filtered.rs +++ b/mirrord/agent/src/steal/connections/filtered.rs @@ -381,7 +381,8 @@ pub struct FilteredStealTask { impl Drop for FilteredStealTask { fn drop(&mut self) { if self.metrics_updated.not() { - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_FILTERED_CONNECTION_SUBSCRIPTION + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); } } } @@ -459,7 +460,7 @@ where } }; - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.inc(); + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.fetch_add(1, std::sync::atomic::Ordering::Relaxed); Self { connection_id, @@ -658,7 +659,7 @@ where self.subscribed.insert(client_id, false); self.blocked_requests.retain(|key, _| key.0 != client_id); - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); }, }, @@ -668,7 +669,7 @@ where // No more requests from the `FilteringService`. // HTTP connection is closed and possibly upgraded. None => { - STEAL_FILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_FILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); break } } @@ -812,7 +813,7 @@ where ) -> Result<(), ConnectionTaskError> { let res = self.run_until_http_ends(tx.clone(), rx).await; - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); self.metrics_updated = true; let res = match res { diff --git a/mirrord/agent/src/steal/connections/unfiltered.rs b/mirrord/agent/src/steal/connections/unfiltered.rs index d5f6f81c18e..ec54691315e 100644 --- a/mirrord/agent/src/steal/connections/unfiltered.rs +++ b/mirrord/agent/src/steal/connections/unfiltered.rs @@ -24,13 +24,13 @@ pub struct UnfilteredStealTask { impl Drop for UnfilteredStealTask { fn drop(&mut self) { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); } } impl UnfilteredStealTask { pub(crate) fn new(connection_id: ConnectionId, client_id: ClientId, stream: T) -> Self { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.inc(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_add(1, std::sync::atomic::Ordering::Relaxed); Self { connection_id, @@ -59,7 +59,7 @@ impl UnfilteredStealTask { read = self.stream.read_buf(&mut buf), if !reading_closed => match read { Ok(..) => { if buf.is_empty() { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tracing::trace!( client_id = self.client_id, @@ -84,7 +84,7 @@ impl UnfilteredStealTask { Err(e) if e.kind() == ErrorKind::WouldBlock => {} Err(e) => { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tx.send(ConnectionMessageOut::Closed { client_id: self.client_id, @@ -108,7 +108,7 @@ impl UnfilteredStealTask { ConnectionMessageIn::Raw { data, .. } => { let res = if data.is_empty() { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tracing::trace!( client_id = self.client_id, @@ -122,7 +122,7 @@ impl UnfilteredStealTask { }; if let Err(e) = res { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); tx.send(ConnectionMessageOut::Closed { client_id: self.client_id, @@ -142,7 +142,7 @@ impl UnfilteredStealTask { }, ConnectionMessageIn::Unsubscribed { .. } => { - STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_CONNECTION_SUBSCRIPTION.fetch_sub(1, std::sync::atomic::Ordering::Relaxed); return Ok(()); } diff --git a/mirrord/agent/src/steal/subscriptions.rs b/mirrord/agent/src/steal/subscriptions.rs index dfe05ea3fbe..901ecd725ef 100644 --- a/mirrord/agent/src/steal/subscriptions.rs +++ b/mirrord/agent/src/steal/subscriptions.rs @@ -316,8 +316,8 @@ pub struct PortSubscriptions { impl Drop for PortSubscriptions { fn drop(&mut self) { - STEAL_FILTERED_PORT_SUBSCRIPTION.set(0); - STEAL_UNFILTERED_PORT_SUBSCRIPTION.set(0); + STEAL_FILTERED_PORT_SUBSCRIPTION.store(0, std::sync::atomic::Ordering::Relaxed); + STEAL_UNFILTERED_PORT_SUBSCRIPTION.store(0, std::sync::atomic::Ordering::Relaxed); } } @@ -365,9 +365,11 @@ impl PortSubscriptions { let filtered = filter.is_some(); if e.get_mut().try_extend(client_id, filter) { if filtered { - STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + STEAL_FILTERED_PORT_SUBSCRIPTION + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); } else { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + STEAL_UNFILTERED_PORT_SUBSCRIPTION + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); } Ok(false) @@ -378,9 +380,11 @@ impl PortSubscriptions { Entry::Vacant(e) => { if filter.is_some() { - STEAL_FILTERED_PORT_SUBSCRIPTION.inc(); + STEAL_FILTERED_PORT_SUBSCRIPTION + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); } else { - STEAL_UNFILTERED_PORT_SUBSCRIPTION.inc(); + STEAL_UNFILTERED_PORT_SUBSCRIPTION + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); } e.insert(PortSubscription::new(client_id, filter)); @@ -419,14 +423,16 @@ impl PortSubscriptions { let remove_redirect = match e.get_mut() { PortSubscription::Unfiltered(subscribed_client) if *subscribed_client == client_id => { e.remove(); - STEAL_UNFILTERED_PORT_SUBSCRIPTION.dec(); + STEAL_UNFILTERED_PORT_SUBSCRIPTION + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); true } PortSubscription::Unfiltered(..) => false, PortSubscription::Filtered(filters) => { if filters.remove(&client_id).is_some() { - STEAL_FILTERED_PORT_SUBSCRIPTION.dec(); + STEAL_FILTERED_PORT_SUBSCRIPTION + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); } if filters.is_empty() { From 8c5d296a2371f190eda8868f4e07dcc53140adee Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 21 Jan 2025 17:35:05 -0300 Subject: [PATCH 80/85] Fix repeated value. --- mirrord/agent/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index f7486e411cf..fcd788e9f9b 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -112,7 +112,7 @@ impl Metrics { .expect("Valid at initialization!"); let steal_unfiltered_connection_subscription = register_int_gauge!( - "mirrord_agent_steal_connection_subscription_count", + "mirrord_agent_steal_unfiltered_connection_subscription_count", "amount of unfiltered connections in steal mode in mirrord-agent" ) .expect("Valid at initialization!"); From 1d3997fb71ec91bcc6340e6406aad81349712a33 Mon Sep 17 00:00:00 2001 From: meowjesty Date: Tue, 21 Jan 2025 17:37:14 -0300 Subject: [PATCH 81/85] changelog --- changelog.d/2975.added.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/2975.added.md diff --git a/changelog.d/2975.added.md b/changelog.d/2975.added.md new file mode 100644 index 00000000000..17c59da1f7b --- /dev/null +++ b/changelog.d/2975.added.md @@ -0,0 +1 @@ +Add prometheus metrics to the mirrord-agent. From 7c1ef451939cb98046f0086f0f88a110e3b6571b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 22 Jan 2025 12:04:46 -0300 Subject: [PATCH 82/85] Dont use default registry. --- mirrord/agent/src/metrics.rs | 217 +++++++++++++++++++++++------------ 1 file changed, 141 insertions(+), 76 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index fcd788e9f9b..8d8dd1f4b90 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -5,7 +5,7 @@ use std::{ use axum::{extract::State, routing::get, Router}; use http::StatusCode; -use prometheus::{register_int_gauge, IntGauge}; +use prometheus::{proto::MetricFamily, IntGauge, Registry}; use tokio::net::TcpListener; use tokio_util::sync::CancellationToken; use tracing::Level; @@ -47,6 +47,7 @@ pub(crate) static UDP_OUTGOING_CONNECTION: AtomicI64 = AtomicI64::new(0); #[derive(Debug)] struct Metrics { + registry: Registry, client_count: IntGauge, dns_request_count: IntGauge, open_fd_count: IntGauge, @@ -61,81 +62,144 @@ struct Metrics { udp_outgoing_connection: IntGauge, } +macro_rules! register_unique_only { + ($registry: ident, $gauge: ident) => { + match $registry.register(Box::new($gauge.clone())) { + Ok(()) => Ok(()), + Err(prometheus::Error::AlreadyReg) => Ok(()), + Err(fail) => Err(fail), + } + }; +} + impl Metrics { fn new() -> Self { - let client_count = register_int_gauge!( - "mirrord_agent_client_count", - "amount of connected clients to this mirrord-agent" - ) - .expect("Valid at initialization!"); - - let dns_request_count = register_int_gauge!( - "mirrord_agent_dns_request_count", - "amount of in-progress dns requests in the mirrord-agent" - ) - .expect("Valid at initialization!"); - - let open_fd_count = register_int_gauge!( - "mirrord_agent_open_fd_count", - "amount of open file descriptors in mirrord-agent file manager" - ) - .expect("Valid at initialization!"); - - let mirror_port_subscription = register_int_gauge!( - "mirrord_agent_mirror_port_subscription_count", - "amount of mirror port subscriptions in mirror-agent" - ) - .expect("Valid at initialization"); - - let mirror_connection_subscription = register_int_gauge!( - "mirrord_agent_mirror_connection_subscription_count", - "amount of connections in mirror mode in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let steal_filtered_port_subscription = register_int_gauge!( - "mirrord_agent_steal_filtered_port_subscription_count", - "amount of filtered steal port subscriptions in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let steal_unfiltered_port_subscription = register_int_gauge!( - "mirrord_agent_steal_unfiltered_port_subscription_count", - "amount of unfiltered steal port subscriptions in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let steal_filtered_connection_subscription = register_int_gauge!( - "mirrord_agent_steal_connection_subscription_count", - "amount of filtered connections in steal mode in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let steal_unfiltered_connection_subscription = register_int_gauge!( - "mirrord_agent_steal_unfiltered_connection_subscription_count", - "amount of unfiltered connections in steal mode in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let http_request_in_progress_count = register_int_gauge!( - "mirrord_agent_http_request_in_progress_count", - "amount of in-progress http requests in the mirrord-agent" - ) - .expect("Valid at initialization!"); - - let tcp_outgoing_connection = register_int_gauge!( - "mirrord_agent_tcp_outgoing_connection_count", - "amount of tcp outgoing connections in mirrord-agent" - ) - .expect("Valid at initialization!"); - - let udp_outgoing_connection = register_int_gauge!( - "mirrord_agent_udp_outgoing_connection_count", - "amount of udp outgoing connections in mirrord-agent" - ) - .expect("Valid at initialization!"); + use prometheus::Opts; + + let registry = Registry::new(); + + let client_count = { + let opts = Opts::new( + "mirrord_agent_client_count", + "amount of connected clients to this mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let dns_request_count = { + let opts = Opts::new( + "mirrord_agent_dns_request_count", + "amount of in-progress dns requests in the mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let open_fd_count = { + let opts = Opts::new( + "mirrord_agent_open_fd_count", + "amount of open file descriptors in mirrord-agent file manager", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let mirror_port_subscription = { + let opts = Opts::new( + "mirrord_agent_mirror_port_subscription_count", + "amount of mirror port subscriptions in mirror-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let mirror_connection_subscription = { + let opts = Opts::new( + "mirrord_agent_mirror_connection_subscription_count", + "amount of connections in mirror mode in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let steal_filtered_port_subscription = { + let opts = Opts::new( + "mirrord_agent_steal_filtered_port_subscription_count", + "amount of filtered steal port subscriptions in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let steal_unfiltered_port_subscription = { + let opts = Opts::new( + "mirrord_agent_steal_unfiltered_port_subscription_count", + "amount of unfiltered steal port subscriptions in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let steal_filtered_connection_subscription = { + let opts = Opts::new( + "mirrord_agent_steal_connection_subscription_count", + "amount of filtered connections in steal mode in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let steal_unfiltered_connection_subscription = { + let opts = Opts::new( + "mirrord_agent_steal_unfiltered_connection_subscription_count", + "amount of unfiltered connections in steal mode in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let http_request_in_progress_count = { + let opts = Opts::new( + "mirrord_agent_http_request_in_progress_count", + "amount of in-progress http requests in the mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let tcp_outgoing_connection = { + let opts = Opts::new( + "mirrord_agent_tcp_outgoing_connection_count", + "amount of tcp outgoing connections in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + let udp_outgoing_connection = { + let opts = Opts::new( + "mirrord_agent_udp_outgoing_connection_count", + "amount of udp outgoing connections in mirrord-agent", + ); + IntGauge::with_opts(opts).expect("Valid at initialization!") + }; + + register_unique_only!(registry, client_count).expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, dns_request_count) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, open_fd_count) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, mirror_port_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, mirror_connection_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, steal_filtered_port_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, steal_unfiltered_port_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, steal_filtered_connection_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, steal_unfiltered_connection_subscription) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, http_request_in_progress_count) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, tcp_outgoing_connection) + .expect("Only `AlreadyReg` error is ignored!"); + register_unique_only!(registry, udp_outgoing_connection) + .expect("Only `AlreadyReg` error is ignored!"); Self { + registry, client_count, dns_request_count, open_fd_count, @@ -151,10 +215,11 @@ impl Metrics { } } - fn update_gauges(&self) { + fn gather_metrics(&self) -> Vec { use std::sync::atomic::Ordering; let Self { + registry, client_count, dns_request_count, open_fd_count, @@ -185,6 +250,8 @@ impl Metrics { http_request_in_progress_count.set(HTTP_REQUEST_IN_PROGRESS_COUNT.load(Ordering::Relaxed)); tcp_outgoing_connection.set(TCP_OUTGOING_CONNECTION.load(Ordering::Relaxed)); udp_outgoing_connection.set(UDP_OUTGOING_CONNECTION.load(Ordering::Relaxed)); + + registry.gather() } } @@ -192,9 +259,7 @@ impl Metrics { async fn get_metrics(State(state): State>) -> (StatusCode, String) { use prometheus::TextEncoder; - state.update_gauges(); - - let metric_families = prometheus::gather(); + let metric_families = state.gather_metrics(); match TextEncoder.encode_to_string(&metric_families) { Ok(response) => (StatusCode::OK, response), Err(fail) => { From aaa750fcc1b4dabe33d8d09b7516a5f739e5586b Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 22 Jan 2025 12:27:29 -0300 Subject: [PATCH 83/85] no ignoring alreadyreg --- mirrord/agent/src/metrics.rs | 86 ++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 8d8dd1f4b90..9e2fcfb545d 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -45,6 +45,13 @@ pub(crate) static TCP_OUTGOING_CONNECTION: AtomicI64 = AtomicI64::new(0); pub(crate) static UDP_OUTGOING_CONNECTION: AtomicI64 = AtomicI64::new(0); +/// The state with all the metrics [`IntGauge`]s and the prometheus [`Registry`] where we keep them. +/// +/// **Do not** modify the gauges directly! +/// +/// Instead rely on [`Metrics::gather_metrics`], as we actually use a bunch of [`AtomicI64`]s to +/// keep track of the values, they are the ones being (de|in)cremented. These gauges are just set +/// when it's time to send them via [`get_metrics`]. #[derive(Debug)] struct Metrics { registry: Registry, @@ -62,17 +69,8 @@ struct Metrics { udp_outgoing_connection: IntGauge, } -macro_rules! register_unique_only { - ($registry: ident, $gauge: ident) => { - match $registry.register(Box::new($gauge.clone())) { - Ok(()) => Ok(()), - Err(prometheus::Error::AlreadyReg) => Ok(()), - Err(fail) => Err(fail), - } - }; -} - impl Metrics { + /// Creates a [`Registry`] to ... register our [`IntGauge`]s. fn new() -> Self { use prometheus::Opts; @@ -174,29 +172,42 @@ impl Metrics { IntGauge::with_opts(opts).expect("Valid at initialization!") }; - register_unique_only!(registry, client_count).expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, dns_request_count) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, open_fd_count) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, mirror_port_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, mirror_connection_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, steal_filtered_port_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, steal_unfiltered_port_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, steal_filtered_connection_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, steal_unfiltered_connection_subscription) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, http_request_in_progress_count) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, tcp_outgoing_connection) - .expect("Only `AlreadyReg` error is ignored!"); - register_unique_only!(registry, udp_outgoing_connection) - .expect("Only `AlreadyReg` error is ignored!"); + registry + .register(Box::new(client_count.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(dns_request_count.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(open_fd_count.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(mirror_port_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(mirror_connection_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(steal_filtered_port_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(steal_unfiltered_port_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(steal_filtered_connection_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(steal_unfiltered_connection_subscription.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(http_request_in_progress_count.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(tcp_outgoing_connection.clone())) + .expect("Register must be valid at initialization!"); + registry + .register(Box::new(udp_outgoing_connection.clone())) + .expect("Register must be valid at initialization!"); Self { registry, @@ -215,6 +226,11 @@ impl Metrics { } } + /// Calls [`IntGauge::set`] on every [`IntGauge`] of `Self`, setting it to the value of + /// the corresponding [`AtomicI64`] global (the uppercase named version of the gauge). + /// + /// Returns the list of [`MetricFamily`] registered in our [`Metrics::registry`], ready to be + /// encoded and sent to prometheus. fn gather_metrics(&self) -> Vec { use std::sync::atomic::Ordering; @@ -255,6 +271,10 @@ impl Metrics { } } +/// `GET /metrics` +/// +/// Prepares all the metrics with [`Metrics::gather_metrics`], and responds to the prometheus +/// request. #[tracing::instrument(level = Level::TRACE, ret)] async fn get_metrics(State(state): State>) -> (StatusCode, String) { use prometheus::TextEncoder; From caaca9a2e7063ada54e67de96842f6a84932b1ff Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 22 Jan 2025 12:30:59 -0300 Subject: [PATCH 84/85] lil docs --- mirrord/agent/src/metrics.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mirrord/agent/src/metrics.rs b/mirrord/agent/src/metrics.rs index 9e2fcfb545d..59fefbc2305 100644 --- a/mirrord/agent/src/metrics.rs +++ b/mirrord/agent/src/metrics.rs @@ -289,6 +289,11 @@ async fn get_metrics(State(state): State>) -> (StatusCode, String) } } +/// Starts the mirrord-agent prometheus metrics service. +/// +/// You can get the metrics from `GET address/metrics`. +/// +/// - `address`: comes from a mirrord-agent config. #[tracing::instrument(level = Level::TRACE, skip_all, ret ,err)] pub(crate) async fn start_metrics( address: SocketAddr, From 29c58a01aab3992771d41b08d5fececc9f0f0c3a Mon Sep 17 00:00:00 2001 From: meowjesty Date: Wed, 22 Jan 2025 14:56:27 -0300 Subject: [PATCH 85/85] Remove example comments from readme. --- mirrord/agent/README.md | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/mirrord/agent/README.md b/mirrord/agent/README.md index 5cd0f1542e7..d7456ead64c 100644 --- a/mirrord/agent/README.md +++ b/mirrord/agent/README.md @@ -105,55 +105,16 @@ metadata: namespace: monitoring data: prometheus.yml: | - # A scrape configuration for running Prometheus on a Kubernetes cluster. - # This uses separate scrape configs for cluster components (i.e. API server, node) - # and services to allow each to use different authentication configs. - # - # Kubernetes labels will be added as Prometheus labels on metrics via the - # `labelmap` relabeling action. - # - # If you are using Kubernetes 1.7.2 or earlier, please take note of the comments - # for the kubernetes-cadvisor job; you will need to edit or remove this job. - - # Keep at most 100 sets of details of targets dropped by relabeling. - # This information is used to display in the UI for troubleshooting. global: keep_dropped_targets: 100 - # Scrape config for API servers. - # - # Kubernetes exposes API servers as endpoints to the default/kubernetes - # service so this uses `endpoints` role and uses relabelling to only keep - # the endpoints associated with the default/kubernetes service using the - # default named port `https`. This works for single API server deployments as - # well as HA API server deployments. scrape_configs: - # Example scrape config for pods - # - # The relabeling allows the actual pod scrape to be configured - # for all the declared ports (or port-free target if none is declared) - # or only some ports. - job_name: "kubernetes-pods" kubernetes_sd_configs: - role: pod relabel_configs: - # Example relabel to scrape only pods that have - # "example.io/should_be_scraped = true" annotation. - # - source_labels: [__meta_kubernetes_pod_annotation_example_io_should_be_scraped] - # action: keep - # regex: true - # - # Example relabel to customize metric path based on pod - # "example.io/metric_path = " annotation. - # - source_labels: [__meta_kubernetes_pod_annotation_example_io_metric_path] - # action: replace - # target_label: __metrics_path__ - # regex: (.+) - # - # Example relabel to scrape only single, desired port for the pod - # based on pod "example.io/scrape_port = " annotation. - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+)