diff --git a/agent-bridle-tool-shell/src/lib.rs b/agent-bridle-tool-shell/src/lib.rs index 050dfb1..65a825c 100644 --- a/agent-bridle-tool-shell/src/lib.rs +++ b/agent-bridle-tool-shell/src/lib.rs @@ -11,12 +11,14 @@ //! *advisory*: the result's `sandbox_kind` reports what actually enforced it //! (I9), today [`agent_bridle_core::SandboxKind::None`]. //! -//! **Increment 1** of agent-bridle#34: a single command with quoted arguments. -//! Pipelines, redirections, `&&`/`||`/`;` and globbing are added incrementally; -//! until then they are refused as *unsupported* (distinct from the *dynamic* -//! constructs refused by design). `brush-bridle-core` remains the deferred, -//! reversible full-bash alternative engine behind the same registry seam -//! (ADR 0005 D4 — tracked on agent-bridle#20). +//! **Increments 1–2** of agent-bridle#34: a **pipeline** of simple commands +//! (`a | b | c`) with quoted arguments. Redirections, `&&`/`||`/`;` and globbing +//! are added incrementally; until then they are refused as *unsupported* +//! (distinct from the *dynamic* constructs refused by design). The process +//! spawning is behind a `Spawner` seam (mocked in unit tests; real path in +//! `tests/real_spawn.rs`). `brush-bridle-core` remains the deferred, reversible +//! full-bash alternative engine behind the same registry seam (ADR 0005 D4 — +//! tracked on agent-bridle#20). #![forbid(unsafe_code)] #![warn(missing_docs)] diff --git a/agent-bridle-tool-shell/src/parse.rs b/agent-bridle-tool-shell/src/parse.rs index 76d6383..7426d04 100644 --- a/agent-bridle-tool-shell/src/parse.rs +++ b/agent-bridle-tool-shell/src/parse.rs @@ -2,18 +2,19 @@ //! //! `agent-bridle` is the **exec funnel**: rather than hand a string to a shell //! interpreter, the engine parses it itself and runs only what it can confine. -//! This is **increment 1** — a single command with quoted arguments. Pipelines, -//! redirections, `&&`/`||`/`;`, globbing and variable expansion are added in -//! later increments (tracked on agent-bridle#34); until then each is refused as -//! [`Refusal::Unsupported`], kept distinct from the [`Refusal::Dynamic`] -//! constructs refused **by design** — command/arithmetic substitution, -//! backticks, subshells: the undecidable interiors ADR 0001 says may never be -//! statically cleared. +//! This covers **increments 1–2** — a pipeline of simple commands +//! (`a | b | c`) with quoted arguments. Redirections, `&&`/`||`/`;`, globbing +//! and variable expansion are added in later increments (tracked on +//! agent-bridle#34); until then each is refused as [`Refusal::Unsupported`], +//! kept distinct from the [`Refusal::Dynamic`] constructs refused **by design** +//! — command/arithmetic substitution, backticks, subshells: the undecidable +//! interiors ADR 0001 says may never be statically cleared. //! //! Quoting is honored, so a metacharacter **inside quotes is a literal //! argument** — only *unquoted* operators and constructs are recognized. That is -//! the safety property the unit tests pin: `echo "a|b"` is the two-element argv -//! `["echo", "a|b"]`, while `echo a|b` is refused. +//! the safety property the unit tests pin: `echo "a|b"` is the single-stage argv +//! `["echo", "a|b"]`, while `echo a | b` is the two-stage pipeline +//! `[["echo", "a"], ["b"]]`. use std::fmt; @@ -26,11 +27,11 @@ pub enum Refusal { /// shell, use the embedder's unbridled/`--yolo` allowance (ADR 0003 / 0005 D5). Dynamic(&'static str), /// A construct the safe-subset engine will support but **does not yet** in - /// this increment (pipelines, redirections, sequencing, globbing, variable - /// expansion). Tracked on agent-bridle#34. + /// this increment (redirections, sequencing, globbing, variable expansion). + /// Tracked on agent-bridle#34. Unsupported(&'static str), /// The input could not be parsed (unterminated quote, trailing backslash, - /// empty command). + /// empty command or pipeline stage). Malformed(String), } @@ -62,14 +63,21 @@ impl fmt::Display for Refusal { } } -/// Parse a `cmd` string into a **single command's argv**, or a [`Refusal`]. +/// A parsed pipeline: an ordered list of command stages, each its own argv. +/// A single command (no `|`) is a one-element pipeline. +pub type Pipeline = Vec>; + +/// Parse a `cmd` string into a [`Pipeline`] (one or more `|`-separated command +/// stages), or a [`Refusal`]. /// /// Single quotes are fully literal; double quotes are literal except `$` and a /// backtick still trigger substitution detection (as in a real shell). An -/// unquoted backslash escapes the next character. Any *unquoted* operator -/// (`|`, `&&`, `;`, `<`, `>`) is [`Refusal::Unsupported`] in this increment, and -/// any substitution (`$(...)`, backticks, `( … )`) is [`Refusal::Dynamic`]. -pub fn classify(input: &str) -> Result, Refusal> { +/// unquoted backslash escapes the next character. An unquoted single `|` +/// separates pipeline stages; every other operator (`&&`, `||`, `;`, `<`, `>`) +/// is [`Refusal::Unsupported`] in this increment, and any substitution +/// (`$(...)`, backticks, `( … )`) is [`Refusal::Dynamic`]. +pub fn classify(input: &str) -> Result { + let mut pipeline: Pipeline = Vec::new(); let mut words: Vec = Vec::new(); let mut cur = String::new(); let mut has_word = false; @@ -126,14 +134,23 @@ pub fn classify(input: &str) -> Result, Refusal> { has_word = false; } } - // ── operators: supported later, refused (cleanly) for now ─────── + // ── pipeline stage separator (a single, unquoted `|`) ──────────── '|' => { - return Err(Refusal::Unsupported(if chars.peek() == Some(&'|') { - "logical OR `||`" - } else { - "pipeline `|`" - })) + if chars.peek() == Some(&'|') { + return Err(Refusal::Unsupported("logical OR `||`")); + } + if has_word { + words.push(std::mem::take(&mut cur)); + has_word = false; + } + if words.is_empty() { + return Err(Refusal::Malformed( + "empty pipeline stage (nothing before `|`)".into(), + )); + } + pipeline.push(std::mem::take(&mut words)); } + // ── operators: supported later, refused (cleanly) for now ─────── '&' => { return Err(Refusal::Unsupported(if chars.peek() == Some(&'&') { "logical AND `&&`" @@ -158,13 +175,21 @@ pub fn classify(input: &str) -> Result, Refusal> { } } + // Finalize the trailing stage. if has_word { words.push(cur); } - if words.is_empty() { + if !words.is_empty() { + pipeline.push(words); + } else if pipeline.is_empty() { return Err(Refusal::Malformed("empty command".into())); + } else { + // A `|` with nothing after it. + return Err(Refusal::Malformed( + "empty pipeline stage (nothing after `|`)".into(), + )); } - Ok(words) + Ok(pipeline) } /// Classify a `$`: `$(` is command/arithmetic substitution (dynamic, refused by @@ -181,66 +206,100 @@ fn dollar_refusal(next: Option) -> Refusal { mod tests { use super::*; + /// One command's argv. fn argv(parts: &[&str]) -> Vec { parts.iter().map(|s| (*s).to_string()).collect() } - // ── the safe argv cases ───────────────────────────────────────────────── + /// A single-stage pipeline (the common case). + fn one(parts: &[&str]) -> Pipeline { + vec![argv(parts)] + } + + // ── single commands (increment 1, now one-stage pipelines) ────────────── #[test] fn simple_command_splits_on_whitespace() { assert_eq!( classify("echo hi there").unwrap(), - argv(&["echo", "hi", "there"]) + one(&["echo", "hi", "there"]) ); } #[test] fn collapses_runs_of_whitespace() { - assert_eq!(classify(" echo\t hi \n").unwrap(), argv(&["echo", "hi"])); + assert_eq!(classify(" echo\t hi \n").unwrap(), one(&["echo", "hi"])); } #[test] fn single_quotes_are_literal() { - assert_eq!(classify("echo 'a b'").unwrap(), argv(&["echo", "a b"])); + assert_eq!(classify("echo 'a b'").unwrap(), one(&["echo", "a b"])); } #[test] fn double_quotes_group_words() { - assert_eq!(classify("echo \"a b\"").unwrap(), argv(&["echo", "a b"])); + assert_eq!(classify("echo \"a b\"").unwrap(), one(&["echo", "a b"])); } #[test] fn empty_quotes_produce_an_empty_arg() { - assert_eq!(classify("echo ''").unwrap(), argv(&["echo", ""])); + assert_eq!(classify("echo ''").unwrap(), one(&["echo", ""])); } #[test] fn backslash_escapes_a_space() { - assert_eq!(classify("echo a\\ b").unwrap(), argv(&["echo", "a b"])); + assert_eq!(classify("echo a\\ b").unwrap(), one(&["echo", "a b"])); } #[test] fn escaped_dollar_is_a_literal_dollar() { - // The escape hatch for a literal `$`: refusal of bare `$` is escapable. - assert_eq!(classify("echo \\$5").unwrap(), argv(&["echo", "$5"])); - assert_eq!(classify("echo \"\\$5\"").unwrap(), argv(&["echo", "$5"])); + assert_eq!(classify("echo \\$5").unwrap(), one(&["echo", "$5"])); + assert_eq!(classify("echo \"\\$5\"").unwrap(), one(&["echo", "$5"])); } - // ── the security property: quoted metacharacters are LITERAL ──────────── + // ── pipelines (increment 2) ───────────────────────────────────────────── + + #[test] + fn pipeline_splits_into_stages() { + assert_eq!( + classify("grep foo | head -n 3").unwrap(), + vec![argv(&["grep", "foo"]), argv(&["head", "-n", "3"])] + ); + } + + #[test] + fn multi_stage_pipeline() { + assert_eq!( + classify("a | b | c").unwrap(), + vec![argv(&["a"]), argv(&["b"]), argv(&["c"])] + ); + } #[test] - fn quoted_pipe_is_a_literal_argument_not_an_operator() { - // Load-bearing: a metacharacter inside quotes must NOT be treated as an - // operator. `echo "a|b"` is a single literal arg, not a refused pipeline. - assert_eq!(classify("echo \"a|b\"").unwrap(), argv(&["echo", "a|b"])); + fn pipe_without_surrounding_spaces() { assert_eq!( - classify("echo 'a && b'").unwrap(), - argv(&["echo", "a && b"]) + classify("grep foo|wc -l").unwrap(), + vec![argv(&["grep", "foo"]), argv(&["wc", "-l"])] ); + } + + #[test] + fn empty_pipeline_stage_is_malformed() { + assert!(matches!(classify("| x"), Err(Refusal::Malformed(_)))); + assert!(matches!(classify("x |"), Err(Refusal::Malformed(_)))); + assert!(matches!(classify("a | | b"), Err(Refusal::Malformed(_)))); + } + + // ── the security property: quoted metacharacters are LITERAL ──────────── + + #[test] + fn quoted_pipe_is_a_literal_argument_not_a_separator() { + // Load-bearing: a `|` inside quotes is one literal arg, NOT a stage split. + assert_eq!(classify("echo \"a|b\"").unwrap(), one(&["echo", "a|b"])); + assert_eq!(classify("echo 'a && b'").unwrap(), one(&["echo", "a && b"])); assert_eq!( classify("grep '$(x)' f").unwrap(), - argv(&["grep", "$(x)", "f"]) + one(&["grep", "$(x)", "f"]) ); } @@ -260,6 +319,11 @@ mod tests { classify("echo \"$(id)\""), Err(Refusal::Dynamic(_)) )); + // Even downstream of a (now-supported) pipe, a dynamic stage is refused. + assert!(matches!( + classify("echo hi | $(evil)"), + Err(Refusal::Dynamic(_)) + )); } #[test] @@ -267,17 +331,7 @@ mod tests { assert!(matches!(classify("(echo hi)"), Err(Refusal::Dynamic(_)))); } - // ── operators refused as UNSUPPORTED (this increment) ─────────────────── - - #[test] - fn pipeline_is_unsupported_and_the_second_command_never_runs() { - // The whole string is refused before any argv is produced, so `rm` - // downstream of the pipe is never reachable. - assert!(matches!( - classify("echo a | rm -rf x"), - Err(Refusal::Unsupported(_)) - )); - } + // ── operators still refused as UNSUPPORTED ────────────────────────────── #[test] fn logical_and_sequencing_redirection_globbing_are_unsupported() { @@ -322,7 +376,7 @@ mod tests { .unwrap_err() .to_string() .contains("refused by design")); - assert!(classify("a | b") + assert!(classify("a && b") .unwrap_err() .to_string() .contains("not yet supported")); diff --git a/agent-bridle-tool-shell/src/shell_tool.rs b/agent-bridle-tool-shell/src/shell_tool.rs index 946eebd..857b733 100644 --- a/agent-bridle-tool-shell/src/shell_tool.rs +++ b/agent-bridle-tool-shell/src/shell_tool.rs @@ -3,19 +3,22 @@ //! Per ADR 0005, the object-capability *boundary* is L3 (kernel) and this engine //! is the L2 *convenience*: `agent-bridle` is the exec funnel — it parses the //! request itself (see [`crate::parse`]), checks the `exec`/`fs` leash, spawns -//! the program directly, and **refuses the dynamic constructs by design**. Until -//! an L3 backstop is active (deferred — agent-bridle#35), a run is honestly -//! *advisory*: the result's `sandbox_kind` reports what actually enforced it -//! (I9), which today is [`SandboxKind::None`]. +//! the program(s) directly, and **refuses the dynamic constructs by design**. +//! Until an L3 backstop is active (deferred — agent-bridle#35), a run is honestly +//! *advisory*: the result's `sandbox_kind` reports what actually enforced it (I9), +//! today [`SandboxKind::None`]. //! -//! This is **increment 1** of agent-bridle#34: a single command with quoted -//! arguments. Pipelines, redirections, `&&`/`||`/`;` and globbing land in later -//! increments; today they are refused as `Unsupported` (distinct from the -//! `Dynamic` constructs refused by design). `brush-bridle-core` remains the -//! deferred, reversible full-bash alternative engine (ADR 0005 D4, #20). - +//! **Increments 1–2** of agent-bridle#34: a **pipeline** of simple commands +//! (`a | b | c`) with quoted arguments. Redirections, `&&`/`||`/`;` and globbing +//! land in later increments; today they are refused as `Unsupported`. The actual +//! process spawning is behind a [`Spawner`] seam so the parse + leash logic is +//! unit-tested against a mock (no real subprocesses); the real `std::process` +//! path is covered by the `tests/real_spawn.rs` integration test. + +use std::io::Read; use std::path::Path; -use std::process::Stdio; +use std::process::{Child, ChildStdout, Command, Stdio}; +use std::sync::Arc; use std::time::Duration; use agent_bridle_core::{ @@ -23,32 +26,83 @@ use agent_bridle_core::{ }; use async_trait::async_trait; -use crate::parse::{classify, Refusal}; +use crate::parse::{classify, Pipeline, Refusal}; /// Maximum permitted (and default-clamped) wall-clock timeout. const MAX_TIMEOUT_SECS: u64 = 300; /// Default timeout when the caller does not specify one. const DEFAULT_TIMEOUT_SECS: u64 = 60; /// Cap on captured stdout/stderr returned in the envelope (bytes), so a chatty -/// command cannot return unbounded output. Streaming/byte-exact caps are a -/// follow-up; the timeout bounds runaway producers in the meantime. +/// command cannot return unbounded output. Streaming caps are a follow-up; the +/// timeout bounds runaway producers in the meantime. const MAX_OUTPUT_BYTES: usize = 1 << 20; // 1 MiB +/// What a finished pipeline produced (the last stage's exit code; concatenated +/// output). The unit of the [`Spawner`] seam. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct Captured { + pub exit_code: i32, + pub stdout: String, + pub stderr: String, +} + +/// The pipeline-execution seam. +/// +/// The real implementation ([`OsSpawner`]) spawns processes; tests inject a mock +/// so the parse + leash logic is verified without real subprocesses (the +/// workspace norm: no real process/fs in unit tests). A `Spawner` only ever +/// receives a pipeline that already passed the `exec` leash — admission happens +/// in [`ShellTool::invoke`] *before* the spawner is called. +pub(crate) trait Spawner: Send + Sync { + /// Run a leash-approved pipeline to completion, capturing its output. + fn run(&self, stages: &[Vec], cwd: Option<&str>) -> ToolResult; +} + +/// The real spawner: a `std::process` pipeline wired with OS pipes. +struct OsSpawner; + +impl Spawner for OsSpawner { + fn run(&self, stages: &[Vec], cwd: Option<&str>) -> ToolResult { + run_pipeline(stages, cwd) + } +} + /// The confined shell tool. /// /// Registers under `"shell"`. Accepts either argv form (`program` + `args`) or a /// free-form `cmd` string parsed by the safe-subset engine. Leash refusals /// (out-of-scope `exec`, a refused construct) are returned as a **structured -/// denied envelope** (`denied: true`), not a hard error — so an agent gets an -/// actionable signal it can correct. -#[derive(Debug, Default, Clone, Copy)] -pub struct ShellTool; +/// denied envelope** (`denied: true`), not a hard error. +#[derive(Clone)] +pub struct ShellTool { + spawner: Arc, +} + +impl std::fmt::Debug for ShellTool { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("ShellTool") + } +} impl ShellTool { - /// Construct the tool. + /// Construct the tool with the real OS spawner. #[must_use] pub fn new() -> Self { - Self + Self { + spawner: Arc::new(OsSpawner), + } + } + + /// Construct with an injected spawner (tests only). + #[cfg(test)] + fn with_spawner(spawner: Arc) -> Self { + Self { spawner } + } +} + +impl Default for ShellTool { + fn default() -> Self { + Self::new() } } @@ -75,11 +129,10 @@ impl Tool for ShellTool { "cmd": { "type": "string", "description": "Free-form command line run by the confined safe-subset engine: \ - a single command with quoted arguments. Dynamic constructs \ - ($(...), backticks, subshells) are refused by design; pipelines, \ - redirections, &&/||/; and globbing are added incrementally and are \ - refused (with a clear `denied` reason) until then. Mutually exclusive \ - with `program`." + a pipeline of simple commands (a | b | c) with quoted arguments. Dynamic \ + constructs ($(...), backticks, subshells) are refused by design; \ + redirections, &&/||/; and globbing are added incrementally and refused \ + (with a clear `denied` reason) until then. Mutually exclusive with `program`." }, "cwd": { "type": "string", @@ -102,21 +155,24 @@ impl Tool for ShellTool { cx: &ToolContext, ) -> ToolResult { let parsed = ShellArgs::parse(&args)?; - // Honest reporting (ADR 0005 D1 / I9): this engine is L2 convenience, so - // the kind is whatever is actually in force — None until L3 (#35). + // Honest reporting (ADR 0005 D1 / I9): L2 convenience, so the kind is + // whatever is actually in force — None until L3 (#35). let sandbox_kind = cx.sandbox_kind(); - // Resolve to a single command's argv, or surface a structured refusal. - let argv = match parsed.argv() { - Ok(argv) => argv, + // Resolve to a pipeline, or surface a structured refusal. + let pipeline = match parsed.pipeline() { + Ok(p) => p, Err(refusal) => return Ok(refused_envelope(sandbox_kind, &refusal)), }; - // Leash: the exec axis. Out-of-scope program → structured denial. - if let Err(e) = cx.check_exec(&argv[0]) { - return Ok(deny(sandbox_kind, DenialKind::Exec, &argv[0], &e)); + // Atomic admission (ADR 0001): EVERY stage's program must be in `exec` + // scope *before any stage spawns*, so a single out-of-scope stage denies + // the whole pipeline with no partial side effects. + for stage in &pipeline { + if let Err(e) = cx.check_exec(&stage[0]) { + return Ok(deny(sandbox_kind, DenialKind::Exec, &stage[0], &e)); + } } - // Leash: a provided cwd must be within fs_read scope. if let Some(cwd) = &parsed.cwd { if let Err(e) = cx.check_path_read(Path::new(cwd)) { @@ -126,9 +182,10 @@ impl Tool for ShellTool { // Run on a blocking thread, bounded by the timeout. On timeout the // blocking task is detached and a timeout envelope is returned. + let spawner = Arc::clone(&self.spawner); let cwd = parsed.cwd.clone(); let timeout = parsed.timeout; - let run = tokio::task::spawn_blocking(move || run_command(&argv, cwd.as_deref())); + let run = tokio::task::spawn_blocking(move || spawner.run(&pipeline, cwd.as_deref())); match tokio::time::timeout(timeout, run).await { Ok(joined) => { let captured = joined @@ -235,43 +292,107 @@ impl ShellArgs { }) } - /// Resolve to a single command's argv (argv form is already structured; - /// free-form is parsed by the safe-subset engine). - fn argv(&self) -> Result, Refusal> { + /// Resolve to a pipeline (argv form is a one-stage pipeline; free-form is + /// parsed by the safe-subset engine). + fn pipeline(&self) -> Result { if let Some(program) = &self.program { let mut argv = Vec::with_capacity(1 + self.args.len()); argv.push(program.clone()); argv.extend(self.args.iter().cloned()); - Ok(argv) + Ok(vec![argv]) } else { classify(self.cmd.as_deref().unwrap_or("")) } } } -/// What a finished command produced. -struct Captured { - exit_code: i32, - stdout: String, - stderr: String, -} +/// Spawn a pipeline of commands wired with OS pipes, capturing the last stage's +/// stdout and every stage's stderr. The pipeline's exit code is the last stage's +/// (bash semantics without `pipefail`). +/// +/// Deadlock-free: every stage's stderr and the last stage's stdout are drained +/// by their own threads, so no pipe can fill while we `wait()` the children. +fn run_pipeline(stages: &[Vec], cwd: Option<&str>) -> ToolResult { + debug_assert!(!stages.is_empty(), "the parser guarantees ≥1 stage"); + + let mut children: Vec = Vec::with_capacity(stages.len()); + let mut prev_stdout: Option = None; + + for (i, argv) in stages.iter().enumerate() { + let mut cmd = Command::new(&argv[0]); + cmd.args(&argv[1..]); + if let Some(dir) = cwd { + cmd.current_dir(dir); + } + cmd.stdin(match prev_stdout.take() { + Some(out) => Stdio::from(out), + None => Stdio::null(), + }); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let mut child = match cmd.spawn() { + Ok(c) => c, + Err(e) => { + // A bad stage must not orphan the stages already started. + for mut started in children { + let _ = started.kill(); + let _ = started.wait(); + } + return Err(ToolError::Exec(e)); + } + }; -/// Spawn `argv` directly (no shell), capture stdout/stderr, and return the exit -/// code. `argv[0]` is the program; `argv[1..]` the arguments. stdin is closed. -fn run_command(argv: &[String], cwd: Option<&str>) -> ToolResult { - let mut cmd = std::process::Command::new(&argv[0]); - cmd.args(&argv[1..]) - .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - if let Some(dir) = cwd { - cmd.current_dir(dir); + // Wire this stage's stdout into the next stage's stdin — except the last + // stage, whose stdout we capture below. + if i + 1 < stages.len() { + prev_stdout = child.stdout.take(); + } + children.push(child); } - let out = cmd.output().map_err(ToolError::Exec)?; + + // Drain every stderr and the last stdout concurrently so no pipe deadlocks. + let mut stderr_readers = Vec::with_capacity(children.len()); + for child in &mut children { + let mut err = child.stderr.take().expect("stderr is piped"); + stderr_readers.push(std::thread::spawn(move || { + let mut buf = Vec::new(); + let _ = err.read_to_end(&mut buf); + buf + })); + } + let mut last_stdout = children + .last_mut() + .expect("≥1 stage") + .stdout + .take() + .expect("last stage stdout is piped"); + let stdout_reader = std::thread::spawn(move || { + let mut buf = Vec::new(); + let _ = last_stdout.read_to_end(&mut buf); + buf + }); + + // Wait all stages; the pipeline's exit code is the last stage's. + let last = children.len() - 1; + let mut exit_code = -1; + for (i, child) in children.iter_mut().enumerate() { + let status = child.wait().map_err(ToolError::Exec)?; + if i == last { + exit_code = status.code().unwrap_or(-1); + } + } + + let stdout = stdout_reader.join().unwrap_or_default(); + let mut stderr = Vec::new(); + for reader in stderr_readers { + stderr.extend(reader.join().unwrap_or_default()); + } + Ok(Captured { - exit_code: out.status.code().unwrap_or(-1), - stdout: capped_utf8(&out.stdout), - stderr: capped_utf8(&out.stderr), + exit_code, + stdout: capped_utf8(&stdout), + stderr: capped_utf8(&stderr), }) } @@ -287,15 +408,39 @@ fn capped_utf8(bytes: &[u8]) -> String { mod tests { use super::*; use agent_bridle_core::{Caveats, Gate, Scope}; + use std::sync::Mutex; + + /// A spawner that records every pipeline it is asked to run and returns a + /// canned result — no real processes. `block_ms` lets a test exercise the + /// timeout path deterministically without a real `sleep`. + #[derive(Default)] + struct MockSpawner { + calls: Mutex>, + stdout: String, + exit_code: i32, + block_ms: u64, + } + + impl Spawner for MockSpawner { + fn run(&self, stages: &[Vec], _cwd: Option<&str>) -> ToolResult { + self.calls.lock().unwrap().push(stages.to_vec()); + if self.block_ms > 0 { + std::thread::sleep(Duration::from_millis(self.block_ms)); + } + Ok(Captured { + exit_code: self.exit_code, + stdout: self.stdout.clone(), + stderr: String::new(), + }) + } + } - /// Mint a `ToolContext` the only legitimate way — through the gate. fn ctx(granted: Caveats) -> ToolContext { Gate::new(0) - .authorize(&ShellTool, &granted) + .authorize(&ShellTool::new(), &granted) .expect("authorize") } - /// A grant that allows exactly the given exec basenames. fn exec_only(names: &[&str]) -> Caveats { Caveats { exec: Scope::only(names.iter().map(|s| (*s).to_string())), @@ -303,6 +448,15 @@ mod tests { } } + /// Pipelines the mock recorded (what *would* have been spawned). + fn recorded(mock: &Arc) -> Vec { + mock.calls.lock().unwrap().clone() + } + + fn argv(parts: &[&str]) -> Vec { + parts.iter().map(|s| (*s).to_string()).collect() + } + #[test] fn name_is_shell() { assert_eq!(ShellTool::new().name(), "shell"); @@ -318,104 +472,117 @@ mod tests { } #[tokio::test] - async fn argv_mode_runs_a_permitted_command() { - let cx = ctx(exec_only(&["echo"])); - let out = ShellTool::new() - .invoke(serde_json::json!({"program": "echo", "args": ["hi"]}), &cx) + async fn argv_mode_resolves_to_one_stage_and_runs() { + let mock = Arc::new(MockSpawner { + stdout: "hi\n".into(), + ..Default::default() + }); + let out = ShellTool::with_spawner(mock.clone()) + .invoke( + serde_json::json!({"program": "echo", "args": ["hi"]}), + &ctx(exec_only(&["echo"])), + ) .await .expect("invoke"); assert_eq!(out["exit_code"], 0); assert_eq!(out["stdout"], "hi\n"); assert_eq!(out["sandbox_kind"], "none"); // honest: advisory until L3 - assert!(out.get("denied").is_none()); - } - - #[tokio::test] - async fn cmd_mode_runs_a_simple_command() { - let cx = ctx(exec_only(&["echo"])); - let out = ShellTool::new() - .invoke(serde_json::json!({"cmd": "echo hi"}), &cx) - .await - .expect("invoke"); - assert_eq!(out["stdout"], "hi\n"); + assert_eq!(recorded(&mock), vec![vec![argv(&["echo", "hi"])]]); } #[tokio::test] - async fn cmd_mode_honors_quoting() { - let cx = ctx(exec_only(&["echo"])); - let out = ShellTool::new() - .invoke(serde_json::json!({"cmd": "echo \"a b\""}), &cx) + async fn pipeline_passes_every_stage_to_the_spawner() { + let mock = Arc::new(MockSpawner::default()); + ShellTool::with_spawner(mock.clone()) + .invoke( + serde_json::json!({"cmd": "grep foo | wc -l"}), + &ctx(exec_only(&["grep", "wc"])), + ) .await .expect("invoke"); - assert_eq!(out["stdout"], "a b\n"); + assert_eq!( + recorded(&mock), + vec![vec![argv(&["grep", "foo"]), argv(&["wc", "-l"])]] + ); } - /// The load-bearing security test for the engine: a quoted metacharacter is a - /// literal argument — `echo "a|b"` prints `a|b`, it does not pipe. #[tokio::test] - async fn quoted_pipe_is_a_literal_argument() { - let cx = ctx(exec_only(&["echo"])); - let out = ShellTool::new() - .invoke(serde_json::json!({"cmd": "echo \"a|b\""}), &cx) + async fn quoted_pipe_stays_a_single_stage() { + let mock = Arc::new(MockSpawner::default()); + ShellTool::with_spawner(mock.clone()) + .invoke( + serde_json::json!({"cmd": "echo \"a|b\""}), + &ctx(exec_only(&["echo"])), + ) .await .expect("invoke"); - assert_eq!(out["stdout"], "a|b\n"); - assert!(out.get("denied").is_none()); + assert_eq!(recorded(&mock), vec![vec![argv(&["echo", "a|b"])]]); } + /// THE security assertion: an out-of-scope program is denied and the spawner + /// is NEVER called — nothing would have run. #[tokio::test] - async fn out_of_scope_exec_is_denied_structurally() { - let cx = ctx(exec_only(&["echo"])); // rm not granted - let out = ShellTool::new() + async fn out_of_scope_exec_denied_before_spawn() { + let mock = Arc::new(MockSpawner::default()); + let out = ShellTool::with_spawner(mock.clone()) .invoke( - serde_json::json!({"program": "rm", "args": ["-rf", "x"]}), - &cx, + serde_json::json!({"program": "rm", "args": ["-rf", "/tmp/x"]}), + &ctx(exec_only(&["echo"])), ) .await .expect("invoke"); assert_eq!(out["denied"], true); assert_eq!(out["denials"][0]["kind"], "exec"); assert_eq!(out["denials"][0]["target"], "rm"); + assert!(recorded(&mock).is_empty(), "spawner must not be called"); } - /// A dynamic construct is refused by design and is NEVER executed. + /// Atomic admission: if ANY pipeline stage is out of scope, the whole + /// pipeline is denied and nothing spawns — no partial side effects. #[tokio::test] - async fn command_substitution_is_refused_and_not_run() { - let cx = ctx(Caveats::top()); // even with full exec, the form is refused - let out = ShellTool::new() - .invoke(serde_json::json!({"cmd": "echo $(whoami)"}), &cx) + async fn pipeline_atomic_admission_denies_whole_if_any_stage_out_of_scope() { + let mock = Arc::new(MockSpawner::default()); + let out = ShellTool::with_spawner(mock.clone()) + .invoke( + serde_json::json!({"cmd": "grep foo | rm -rf x"}), + &ctx(exec_only(&["grep"])), // rm not granted + ) .await .expect("invoke"); assert_eq!(out["denied"], true); + assert_eq!(out["denials"][0]["target"], "rm"); assert!( - out.get("exit_code").is_none(), - "nothing must have run: {out}" + recorded(&mock).is_empty(), + "an out-of-scope stage must abort the whole pipeline before any spawn" ); - assert!(out["denials"][0]["reason"] - .as_str() - .unwrap() - .contains("refused by design")); } - /// A pipeline is refused as unsupported — the downstream command never runs. + /// A dynamic construct is refused by design and never reaches the spawner. #[tokio::test] - async fn pipeline_is_refused_so_downstream_never_runs() { - let cx = ctx(Caveats::top()); - let out = ShellTool::new() - .invoke(serde_json::json!({"cmd": "echo a | rm -rf x"}), &cx) + async fn dynamic_construct_refused_before_spawn() { + let mock = Arc::new(MockSpawner::default()); + let out = ShellTool::with_spawner(mock.clone()) + .invoke( + serde_json::json!({"cmd": "echo $(whoami)"}), + &ctx(Caveats::top()), + ) .await .expect("invoke"); assert_eq!(out["denied"], true); assert!(out.get("exit_code").is_none()); + assert!(out["denials"][0]["reason"] + .as_str() + .unwrap() + .contains("refused by design")); + assert!(recorded(&mock).is_empty()); } #[tokio::test] async fn both_program_and_cmd_is_a_hard_error() { - let cx = ctx(Caveats::top()); let res = ShellTool::new() .invoke( serde_json::json!({"program": "echo", "cmd": "echo hi"}), - &cx, + &ctx(Caveats::top()), ) .await; assert!(res.is_err()); @@ -424,11 +591,15 @@ mod tests { #[tokio::test] async fn timeout_is_reported() { - let cx = ctx(exec_only(&["sleep"])); - let out = ShellTool::new() + // The mock blocks longer than the 1s timeout — no real process involved. + let mock = Arc::new(MockSpawner { + block_ms: 1500, + ..Default::default() + }); + let out = ShellTool::with_spawner(mock) .invoke( - serde_json::json!({"program": "sleep", "args": ["5"], "timeout_secs": 1}), - &cx, + serde_json::json!({"program": "anything", "timeout_secs": 1}), + &ctx(exec_only(&["anything"])), ) .await .expect("invoke"); diff --git a/agent-bridle-tool-shell/tests/real_spawn.rs b/agent-bridle-tool-shell/tests/real_spawn.rs new file mode 100644 index 0000000..0667f72 --- /dev/null +++ b/agent-bridle-tool-shell/tests/real_spawn.rs @@ -0,0 +1,113 @@ +//! Real-spawn integration tests for the engine's `std::process` path. +//! +//! These exercise the *real* `OsSpawner` with actual processes, and are kept out +//! of the unit tests (which mock the spawner) per the workspace norm: no real +//! subprocesses/fs in unit tests. They use only universally-present tools +//! (`echo`, `cat`, `true`, `false`). +#![cfg(feature = "shell")] + +use agent_bridle_core::{Caveats, Gate, Scope, Tool, ToolContext}; +use agent_bridle_tool_shell::ShellTool; + +/// Mint a context the only legitimate way — through the gate. +fn ctx(granted: Caveats) -> ToolContext { + Gate::new(0) + .authorize(&ShellTool::new(), &granted) + .expect("authorize") +} + +fn exec_only(names: &[&str]) -> Caveats { + Caveats { + exec: Scope::only(names.iter().map(|s| (*s).to_string())), + ..Caveats::top() + } +} + +#[tokio::test] +async fn real_echo_runs_and_captures_stdout() { + let out = ShellTool::new() + .invoke( + serde_json::json!({"program": "echo", "args": ["hello"]}), + &ctx(exec_only(&["echo"])), + ) + .await + .expect("invoke"); + assert_eq!(out["exit_code"], 0); + assert_eq!(out["stdout"], "hello\n"); + assert!(out.get("denied").is_none()); +} + +#[tokio::test] +async fn real_pipeline_passes_data_between_stages() { + // echo's stdout becomes cat's stdin; cat echoes it back. + let out = ShellTool::new() + .invoke( + serde_json::json!({"cmd": "echo hello | cat"}), + &ctx(exec_only(&["echo", "cat"])), + ) + .await + .expect("invoke"); + assert_eq!(out["exit_code"], 0); + assert_eq!(out["stdout"], "hello\n"); +} + +#[tokio::test] +async fn real_pipeline_exit_code_is_the_last_stage() { + // `true | false` → last stage (false) exits 1. + let out = ShellTool::new() + .invoke( + serde_json::json!({"cmd": "true | false"}), + &ctx(exec_only(&["true", "false"])), + ) + .await + .expect("invoke"); + assert_eq!( + out["exit_code"], 1, + "pipeline exit is the last stage's: {out}" + ); + + // `false | true` → last stage (true) exits 0, even though stage 1 failed. + let out = ShellTool::new() + .invoke( + serde_json::json!({"cmd": "false | true"}), + &ctx(exec_only(&["true", "false"])), + ) + .await + .expect("invoke"); + assert_eq!(out["exit_code"], 0, "no pipefail: {out}"); +} + +#[tokio::test] +async fn real_stderr_and_nonzero_exit_are_captured() { + // `cat` of a nonexistent path writes to stderr and exits non-zero. + let out = ShellTool::new() + .invoke( + serde_json::json!({"program": "cat", "args": ["/nonexistent/agent-bridle/path"]}), + &ctx(exec_only(&["cat"])), + ) + .await + .expect("invoke"); + assert_ne!( + out["exit_code"], 0, + "cat of a missing file must fail: {out}" + ); + assert!( + !out["stderr"].as_str().unwrap_or("").is_empty(), + "stderr must be captured: {out}" + ); +} + +#[tokio::test] +async fn real_out_of_scope_program_is_denied_and_never_spawns() { + // `rm` is not granted: the leash denies it before any real process starts. + let out = ShellTool::new() + .invoke( + serde_json::json!({"program": "rm", "args": ["-rf", "/tmp/agent-bridle-should-not-exist"]}), + &ctx(exec_only(&["echo"])), + ) + .await + .expect("invoke"); + assert_eq!(out["denied"], true); + assert_eq!(out["denials"][0]["target"], "rm"); + assert!(out.get("exit_code").is_none(), "nothing ran: {out}"); +}