diff --git a/agent-bridle-tool-shell/src/lib.rs b/agent-bridle-tool-shell/src/lib.rs index 212c346..f982fb1 100644 --- a/agent-bridle-tool-shell/src/lib.rs +++ b/agent-bridle-tool-shell/src/lib.rs @@ -11,14 +11,16 @@ //! *advisory*: the result's `sandbox_kind` reports what actually enforced it //! (I9), today [`agent_bridle_core::SandboxKind::None`]. //! -//! **Increments 1–4** of agent-bridle#34: a sequence of pipelines joined by +//! **Increments 1–5** of agent-bridle#34: a sequence of pipelines joined by //! `&&`/`||`/`;` (short-circuit semantics), each pipeline simple commands with -//! quoted arguments and **file redirections** (`> out`, `>> out`, `< in`) whose -//! targets are leash-checked (`fs_write`/`fs_read`). Globbing and fd-number -//! redirections are added incrementally; until then they are refused as -//! *unsupported* (distinct from the *dynamic* constructs refused by design). The -//! process spawning is behind a `Spawner` seam (mocked in unit tests; real path -//! in `tests/real_spawn.rs`). `brush-bridle-core` remains the deferred, reversible +//! quoted arguments, **file redirections** (`> out`, `>> out`, `< in`) and +//! **filename globbing** (`*`/`?`/`[…]`) — all filesystem touches bridle performs +//! (redirect opens, glob directory listings) are leash-checked +//! (`fs_write`/`fs_read`). Variable expansion (`$VAR`) and fd-number redirections +//! are added incrementally; until then they are refused as *unsupported* (distinct +//! from the *dynamic* constructs refused by design). The process spawning is +//! behind a `Spawner` seam (mocked in unit tests; real path in +//! `tests/real_spawn.rs`). `brush-bridle-core` remains the deferred, reversible //! full-bash alternative engine behind the same registry seam (ADR 0005 D4 — //! tracked on agent-bridle#20). diff --git a/agent-bridle-tool-shell/src/parse.rs b/agent-bridle-tool-shell/src/parse.rs index adf9915..274819e 100644 --- a/agent-bridle-tool-shell/src/parse.rs +++ b/agent-bridle-tool-shell/src/parse.rs @@ -2,20 +2,23 @@ //! //! `agent-bridle` is the **exec funnel**: rather than hand a string to a shell //! interpreter, the engine parses it itself and runs only what it can confine. -//! This covers **increments 1–4** — a sequence of pipelines joined by `&&`, -//! `||` and `;`, where each pipeline is simple commands (`a | b | c`) with quoted -//! arguments and file redirections (`> out`, `>> out`, `< in`). Globbing and -//! variable/parameter expansion are added in a later increment (tracked on -//! agent-bridle#34); until then each is refused as [`Refusal::Unsupported`], kept -//! distinct from the [`Refusal::Dynamic`] constructs refused **by design** — -//! command/arithmetic substitution, backticks, subshells: the undecidable -//! interiors ADR 0001 says may never be statically cleared. fd-number -//! redirections (`2>`, `2>&1`) are refused as `Unsupported` for now (a focused -//! follow-up) rather than risk silently mishandling bash's fd-number rules. +//! This covers **increments 1–5** — a sequence of pipelines joined by `&&`, +//! `||` and `;`, each pipeline simple commands (`a | b | c`) with quoted +//! arguments, file redirections (`> out`, `>> out`, `< in`), and **filename +//! globbing** (`*`, `?`, `[…]`). The parser only *marks* an argument as a glob +//! ([`Arg::Glob`]); expansion (a filesystem read) is leash-checked and performed +//! by the executor. Variable/parameter expansion is added in a later increment +//! (tracked on agent-bridle#34); until then `$` is refused as +//! [`Refusal::Unsupported`], kept distinct from the [`Refusal::Dynamic`] +//! constructs refused **by design** — command/arithmetic substitution, +//! backticks, subshells: the undecidable interiors ADR 0001 says may never be +//! statically cleared. fd-number redirections (`2>`, `2>&1`) are refused as +//! `Unsupported` for now (a focused follow-up). //! //! Quoting is honored, so a metacharacter **inside quotes is a literal //! argument** — only *unquoted* operators and constructs are recognized: -//! `echo "a&&b"` is one argv, while `a && b` is two pipelines joined by `&&`. +//! `echo "a*b"` is one literal arg, while `echo a*` is a glob and `a && b` is two +//! pipelines joined by `&&`. use std::fmt; use std::iter::Peekable; @@ -31,8 +34,8 @@ pub enum Refusal { /// shell, use the embedder's unbridled/`--yolo` allowance (ADR 0003 / 0005 D5). Dynamic(&'static str), /// A construct the safe-subset engine will support but **does not yet** in - /// this increment (globbing, variable expansion, fd-number redirections). - /// Tracked on agent-bridle#34. + /// this increment (variable expansion, fd-number redirections). Tracked on + /// agent-bridle#34. Unsupported(&'static str), /// The input could not be parsed (unterminated quote, trailing backslash, /// empty command/stage, missing redirection target, dangling separator). @@ -67,6 +70,16 @@ impl fmt::Display for Refusal { } } +/// One argument word: a literal, or a glob pattern to be expanded by the +/// executor (a filesystem read, leash-checked first). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Arg { + /// A literal word (quoting already resolved). + Lit(String), + /// A word containing unquoted `*`/`?`/`[…]` — a glob pattern. + Glob(String), +} + /// A file redirection on one command stage. `agent-bridle` performs the open, so /// each is leash-checked (`fs_write` for stdout, `fs_read` for stdin) before the /// stage runs. @@ -78,11 +91,11 @@ pub enum Redirect { Stdin { path: String }, } -/// One command stage: its argv plus any redirections. +/// One command stage: its argv (literals + globs) plus any redirections. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Command { - /// The program (argv[0]) and arguments. - pub argv: Vec, + /// The program (argv[0]) and arguments, each a literal or a glob. + pub argv: Vec, /// File redirections, in source order (last stdout redirect wins for fd 1). pub redirects: Vec, } @@ -139,7 +152,7 @@ pub fn classify(input: &str) -> Result { let mut script: Script = Vec::new(); let mut pending_sep = Sep::Seq; // separator for the NEXT finalized pipeline let mut stages: Vec = Vec::new(); // current pipeline's stages - let mut argv: Vec = Vec::new(); // current stage's argv + let mut argv: Vec = Vec::new(); // current stage's args let mut redirects: Vec = Vec::new(); // current stage's redirects loop { @@ -159,7 +172,6 @@ pub fn classify(input: &str) -> Result { Sep::Or, )?; } else { - // Pipe: finalize the current stage into the current pipeline. if argv.is_empty() { return Err(Refusal::Malformed( "empty pipeline stage (nothing before `|`)".into(), @@ -204,7 +216,6 @@ pub fn classify(input: &str) -> Result { chars.next(); return Err(dollar_refusal(chars.peek().copied())); } - '*' | '?' | '[' => return Err(Refusal::Unsupported("filename globbing")), '>' => { chars.next(); let append = chars.peek() == Some(&'>'); @@ -228,7 +239,14 @@ pub fn classify(input: &str) -> Result { let path = read_redirect_target(&mut chars)?; redirects.push(Redirect::Stdin { path }); } - _ => argv.push(read_word(&mut chars)?), + _ => { + let (word, is_glob) = read_word(&mut chars)?; + argv.push(if is_glob { + Arg::Glob(word) + } else { + Arg::Lit(word) + }); + } } } @@ -254,12 +272,11 @@ pub fn classify(input: &str) -> Result { } /// Finalize the current stage + pipeline and push it to the script under -/// `pending_sep`, then arm `pending_sep` for the next pipeline. An empty pipeline -/// around a separator is malformed. +/// `pending_sep`, then arm `pending_sep` for the next pipeline. fn push_pipeline( script: &mut Script, stages: &mut Vec, - argv: &mut Vec, + argv: &mut Vec, redirects: &mut Vec, pending_sep: &mut Sep, next_sep: Sep, @@ -280,11 +297,10 @@ fn push_pipeline( } /// Finalize the current stage into the current pipeline and return the pipeline. -/// `Ok(None)` means the pipeline was entirely empty (no stages/argv/redirects). -/// A `|` with no following stage, or a redirect with no command, is malformed. +/// `Ok(None)` means the pipeline was entirely empty. fn finalize_pipeline( stages: &mut Vec, - argv: &mut Vec, + argv: &mut Vec, redirects: &mut Vec, ) -> Result, Refusal> { if argv.is_empty() { @@ -312,18 +328,26 @@ fn skip_whitespace(chars: &mut Peekable) { } } -/// Read the next word (no leading-whitespace skip — the caller does that). Stops -/// at unquoted whitespace or an operator/special char (which the caller then -/// handles). Single quotes are literal; double quotes are literal except `$` and -/// a backtick still trigger substitution detection; an unquoted backslash -/// escapes the next char. A bare digit-word immediately followed by `>`/`<` is a -/// refused fd-number redirection (rather than a silently-mishandled arg). -fn read_word(chars: &mut Peekable) -> Result { +/// Read the next word (no leading-whitespace skip — the caller does that). +/// Returns the word and whether it contained an *unquoted* glob metacharacter +/// (`*`/`?`/`[`). Stops at unquoted whitespace or an operator char. Single quotes +/// are literal; double quotes are literal except `$`/backtick still trigger +/// substitution detection; an unquoted backslash escapes the next char. A bare +/// digit-word immediately followed by `>`/`<` is a refused fd-number redirection. +fn read_word(chars: &mut Peekable) -> Result<(String, bool), Refusal> { let mut cur = String::new(); + let mut is_glob = false; while let Some(&c) = chars.peek() { match c { ' ' | '\t' | '\n' | '\r' => break, - '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '*' | '?' | '[' => break, + // Operators / substitution starts end the word; the caller handles them. + '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' => break, + // Unquoted glob metacharacters: part of the word, and mark it a glob. + '*' | '?' | '[' => { + cur.push(c); + chars.next(); + is_glob = true; + } '\'' => { chars.next(); loop { @@ -370,15 +394,17 @@ fn read_word(chars: &mut Peekable) -> Result { } // `2>` / `0<` etc.: a bare digit-word touching a redirect operator. if !cur.is_empty() + && !is_glob && cur.bytes().all(|b| b.is_ascii_digit()) && matches!(chars.peek(), Some('>') | Some('<')) { return Err(Refusal::Unsupported("fd-number redirection (e.g. `2>`)")); } - Ok(cur) + Ok((cur, is_glob)) } -/// Read a redirection target (the word after `>`/`>>`/`<`). +/// Read a redirection target (the word after `>`/`>>`/`<`). The target is taken +/// literally (redirect targets are not globbed in this engine). fn read_redirect_target(chars: &mut Peekable) -> Result { skip_whitespace(chars); match chars.peek().copied() { @@ -387,7 +413,7 @@ fn read_redirect_target(chars: &mut Peekable) -> Result } _ => {} } - let target = read_word(chars)?; + let (target, _is_glob) = read_word(chars)?; if target.is_empty() { return Err(Refusal::Malformed("empty redirection target".into())); } @@ -408,142 +434,115 @@ fn dollar_refusal(next: Option) -> Refusal { mod tests { use super::*; - fn argv(parts: &[&str]) -> Vec { - parts.iter().map(|s| (*s).to_string()).collect() + fn lit(s: &str) -> Arg { + Arg::Lit(s.to_string()) } - fn stage(parts: &[&str]) -> Command { + fn glob(s: &str) -> Arg { + Arg::Glob(s.to_string()) + } + fn stage(args: Vec) -> Command { Command { - argv: argv(parts), + argv: args, redirects: vec![], } } - /// A single-pipeline, single-stage script with no redirects. + fn lits(parts: &[&str]) -> Command { + stage(parts.iter().map(|s| lit(s)).collect()) + } + /// A single-pipeline, single-stage, all-literal script. fn one(parts: &[&str]) -> Script { vec![ScriptItem { sep: Sep::Seq, - pipeline: vec![stage(parts)], + pipeline: vec![lits(parts)], }] } - fn item(sep: Sep, stages: Vec) -> ScriptItem { - ScriptItem { - sep, - pipeline: stages, - } - } - // ── words / quoting / pipelines / redirects (increments 1–3) ──────────── + // ── words / quoting / pipelines / redirects / sequencing (1–4) ─────────── #[test] - fn simple_command_and_quoting() { + fn simple_and_quoting_and_sequencing() { assert_eq!( classify("echo hi there").unwrap(), one(&["echo", "hi", "there"]) ); assert_eq!(classify("echo 'a b'").unwrap(), one(&["echo", "a b"])); - assert_eq!(classify("echo \"a|b\"").unwrap(), one(&["echo", "a|b"])); - assert_eq!(classify("echo \"a&&b\"").unwrap(), one(&["echo", "a&&b"])); - } - - #[test] - fn pipeline_and_redirect_still_parse() { assert_eq!( - classify("grep foo | wc -l").unwrap(), - vec![item( - Sep::Seq, - vec![stage(&["grep", "foo"]), stage(&["wc", "-l"])] - )] + classify("a && b").unwrap(), + vec![ + ScriptItem { + sep: Sep::Seq, + pipeline: vec![lits(&["a"])] + }, + ScriptItem { + sep: Sep::And, + pipeline: vec![lits(&["b"])] + }, + ] ); - let p = classify("echo hi > out").unwrap(); - assert_eq!(p[0].pipeline[0].stdout_redirect(), Some(("out", false))); } - // ── sequencing (increment 4) ──────────────────────────────────────────── + // ── globbing (increment 5) ────────────────────────────────────────────── #[test] - fn and_or_seq_separators() { - assert_eq!( - classify("a && b").unwrap(), - vec![ - item(Sep::Seq, vec![stage(&["a"])]), - item(Sep::And, vec![stage(&["b"])]) - ] - ); + fn glob_words_are_marked() { assert_eq!( - classify("a || b").unwrap(), - vec![ - item(Sep::Seq, vec![stage(&["a"])]), - item(Sep::Or, vec![stage(&["b"])]) - ] + classify("ls *.rs").unwrap(), + vec![ScriptItem { + sep: Sep::Seq, + pipeline: vec![stage(vec![lit("ls"), glob("*.rs")])] + }] ); assert_eq!( - classify("a ; b").unwrap(), - vec![ - item(Sep::Seq, vec![stage(&["a"])]), - item(Sep::Seq, vec![stage(&["b"])]) - ] + classify("cat foo?").unwrap(), + vec![ScriptItem { + sep: Sep::Seq, + pipeline: vec![stage(vec![lit("cat"), glob("foo?")])] + }] ); - } - - #[test] - fn mixed_separators_and_pipelines() { - // `a | b && c ; d` → [ {Seq, a|b}, {And, c}, {Seq, d} ] assert_eq!( - classify("a | b && c ; d").unwrap(), - vec![ - item(Sep::Seq, vec![stage(&["a"]), stage(&["b"])]), - item(Sep::And, vec![stage(&["c"])]), - item(Sep::Seq, vec![stage(&["d"])]), - ] + classify("ls [abc].txt").unwrap(), + vec![ScriptItem { + sep: Sep::Seq, + pipeline: vec![stage(vec![lit("ls"), glob("[abc].txt")])] + }] ); - } - - #[test] - fn trailing_semicolon_is_ok_but_dangling_andor_is_not() { + // Glob in a sub-path keeps the prefix in the pattern. assert_eq!( - classify("a ; b ;").unwrap(), - vec![ - item(Sep::Seq, vec![stage(&["a"])]), - item(Sep::Seq, vec![stage(&["b"])]) - ] + classify("cat src/*.rs").unwrap(), + vec![ScriptItem { + sep: Sep::Seq, + pipeline: vec![stage(vec![lit("cat"), glob("src/*.rs")])] + }] ); - assert!(matches!(classify("a &&"), Err(Refusal::Malformed(_)))); - assert!(matches!(classify("a ||"), Err(Refusal::Malformed(_)))); } + /// A quoted metacharacter is a literal arg, NOT a glob. #[test] - fn leading_or_doubled_separators_are_malformed() { - assert!(matches!(classify("; a"), Err(Refusal::Malformed(_)))); - assert!(matches!(classify("&& a"), Err(Refusal::Malformed(_)))); - assert!(matches!(classify("a ; ; b"), Err(Refusal::Malformed(_)))); - assert!(matches!(classify("a && | b"), Err(Refusal::Malformed(_)))); + fn quoted_glob_chars_are_literal() { + assert_eq!(classify("echo \"*.rs\"").unwrap(), one(&["echo", "*.rs"])); + assert_eq!(classify("echo '[abc]'").unwrap(), one(&["echo", "[abc]"])); + assert_eq!(classify("echo a\\*b").unwrap(), one(&["echo", "a*b"])); // escaped * } - // ── refusals ──────────────────────────────────────────────────────────── + // ── refusals (now $ and fd; globbing is no longer refused) ─────────────── #[test] - fn dynamic_and_unsupported_and_fd_still_refused() { + fn dynamic_unsupported_and_fd_still_refused() { assert!(matches!(classify("echo $(id)"), Err(Refusal::Dynamic(_)))); assert!(matches!(classify("echo `id`"), Err(Refusal::Dynamic(_)))); assert!(matches!(classify("(echo hi)"), Err(Refusal::Dynamic(_)))); - assert!(matches!(classify("ls *.rs"), Err(Refusal::Unsupported(_)))); assert!(matches!( classify("echo $HOME"), Err(Refusal::Unsupported(_)) )); assert!(matches!(classify("echo 2>f"), Err(Refusal::Unsupported(_)))); - assert!(matches!(classify("echo x &"), Err(Refusal::Unsupported(_)))); // background - // A dynamic stage anywhere in the sequence is refused. - assert!(matches!( - classify("echo ok && $(evil)"), - Err(Refusal::Dynamic(_)) - )); + assert!(matches!(classify("echo x &"), Err(Refusal::Unsupported(_)))); } #[test] fn empty_and_unterminated_are_malformed() { - assert!(matches!(classify(" "), Err(Refusal::Malformed(_)))); assert!(matches!(classify(""), Err(Refusal::Malformed(_)))); assert!(matches!(classify("echo 'oops"), Err(Refusal::Malformed(_)))); - assert!(matches!(classify("x |"), Err(Refusal::Malformed(_)))); + assert!(matches!(classify("a &&"), Err(Refusal::Malformed(_)))); } } diff --git a/agent-bridle-tool-shell/src/shell_tool.rs b/agent-bridle-tool-shell/src/shell_tool.rs index bb2fddd..3231a87 100644 --- a/agent-bridle-tool-shell/src/shell_tool.rs +++ b/agent-bridle-tool-shell/src/shell_tool.rs @@ -8,17 +8,17 @@ //! *advisory*: the result's `sandbox_kind` reports what actually enforced it (I9), //! today [`SandboxKind::None`]. //! -//! **Increments 1–4** of agent-bridle#34: a sequence of pipelines joined by -//! `&&`/`||`/`;`, each pipeline simple commands with quoted arguments and file -//! redirections (`> out`, `>> out`, `< in`). Globbing and variable expansion land -//! in a later increment. Because `agent-bridle` performs each redirect's file -//! open itself, those opens are leash-checked (`fs_write`/`fs_read`) *before any -//! stage spawns* — a real enforcement point, unlike a spawned program's own -//! opens (L3's job). Process spawning is behind a [`Spawner`] seam (mocked in -//! unit tests; real path in `tests/real_spawn.rs`). +//! **Increments 1–5** of agent-bridle#34: a sequence of pipelines joined by +//! `&&`/`||`/`;`, each pipeline simple commands with quoted arguments, file +//! redirections (`> out`, `>> out`, `< in`), and **filename globbing** +//! (`*`/`?`/`[…]`). Because `agent-bridle` performs each redirect's open and each +//! glob's directory listing itself, those filesystem touches are leash-checked +//! (`fs_write`/`fs_read`) *before any stage spawns* — a real enforcement point, +//! unlike a spawned program's own opens (L3's job). Process spawning is behind a +//! [`Spawner`] seam (mocked in unit tests; real path in `tests/real_spawn.rs`). use std::io::Read; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdout, Stdio}; use std::sync::Arc; use std::time::Duration; @@ -28,7 +28,7 @@ use agent_bridle_core::{ }; use async_trait::async_trait; -use crate::parse::{classify, Command, Redirect, Refusal, Script, ScriptItem, Sep}; +use crate::parse::{classify, Arg, Command, Redirect, Refusal, Script, ScriptItem, Sep}; /// Maximum permitted (and default-clamped) wall-clock timeout. const MAX_TIMEOUT_SECS: u64 = 300; @@ -50,17 +50,19 @@ pub(crate) struct Captured { /// The pipeline-execution seam. /// -/// The real implementation ([`OsSpawner`]) spawns processes; tests inject a mock -/// so the parse + leash + sequencing logic is verified without real subprocesses -/// (the workspace norm: no real process/fs in unit tests). A `Spawner` only ever -/// receives a pipeline that already passed the `exec` **and** redirect-`fs` leash -/// — admission happens in [`ShellTool::invoke`] *before* the spawner runs. +/// The real implementation ([`OsSpawner`]) spawns processes (and expands globs +/// against the real filesystem); tests inject a mock so the parse + leash + +/// sequencing logic is verified without real subprocesses (the workspace norm: +/// no real process/fs in unit tests). A `Spawner` only ever receives a pipeline +/// that already passed the `exec` **and** `fs` (redirect + glob-dir) leash — +/// admission happens in [`ShellTool::invoke`] *before* the spawner runs. pub(crate) trait Spawner: Send + Sync { /// Run one leash-approved pipeline to completion, capturing its output. fn run(&self, stages: &[Command], cwd: Option<&str>) -> ToolResult; } -/// The real spawner: a `std::process` pipeline wired with OS pipes + redirects. +/// The real spawner: a `std::process` pipeline wired with OS pipes + redirects, +/// expanding globs against the real filesystem. struct OsSpawner; impl Spawner for OsSpawner { @@ -126,16 +128,18 @@ impl Tool for ShellTool { "args": { "type": "array", "items": { "type": "string" }, - "description": "Argv form: arguments passed to `program` (argv[1..])." + "description": "Argv form: arguments passed to `program` (argv[1..]). Taken \ + literally — no globbing/quoting interpretation." }, "cmd": { "type": "string", "description": "Free-form command line run by the confined safe-subset engine: \ - pipelines (a | b) joined by &&/||/;, with quoted arguments and file \ - redirections (> out, >> out, < in; targets gated by fs_write/fs_read). \ + pipelines (a | b) joined by &&/||/;, with quoted arguments, file \ + redirections (> out, >> out, < in; targets gated by fs_write/fs_read), and \ + filename globbing (*, ?, [..]; gated by fs_read on the listed directory). \ Dynamic constructs ($(...), backticks, subshells) are refused by design; \ - globbing and fd-number redirections (2>, 2>&1) are added incrementally and \ - refused (with a clear `denied` reason) until then. Mutually exclusive with `program`." + variable expansion ($VAR) and fd-number redirections (2>, 2>&1) are refused \ + for now. Mutually exclusive with `program`." }, "cwd": { "type": "string", @@ -168,15 +172,41 @@ impl Tool for ShellTool { Err(refusal) => return Ok(refused_envelope(sandbox_kind, &refusal)), }; - // Atomic admission (ADR 0001): every program (`exec`) and every redirect - // target (`fs_write`/`fs_read`, which bridle itself opens) across the - // WHOLE script must pass *before any stage spawns* — one out-of-scope - // element denies the whole script with no partial side effects (even for - // commands a `&&`/`||` would have short-circuited away). + // Atomic admission (ADR 0001): across the WHOLE script, every program + // (`exec`), every redirect target (`fs_write`/`fs_read`), and every glob's + // listed directory (`fs_read`) — all filesystem touches bridle performs — + // must pass *before any stage spawns*. One out-of-scope element denies the + // whole script with no partial side effects. for item in &script { for stage in &item.pipeline { - if let Err(e) = cx.check_exec(&stage.argv[0]) { - return Ok(deny(sandbox_kind, DenialKind::Exec, &stage.argv[0], &e)); + match stage.argv.first() { + Some(Arg::Lit(program)) => { + if let Err(e) = cx.check_exec(program) { + return Ok(deny(sandbox_kind, DenialKind::Exec, program, &e)); + } + } + Some(Arg::Glob(pattern)) => { + return Ok(deny( + sandbox_kind, + DenialKind::Exec, + pattern, + &ToolError::denied("a glob pattern is not allowed as a program name"), + )); + } + None => {} // the parser guarantees a non-empty stage + } + for arg in &stage.argv { + if let Arg::Glob(pattern) = arg { + let dir = resolve_glob_dir(parsed.cwd.as_deref(), split_glob(pattern).0); + if let Err(e) = cx.check_path_read(&dir) { + return Ok(deny( + sandbox_kind, + DenialKind::Open, + &dir.to_string_lossy(), + &e, + )); + } + } } for redirect in &stage.redirects { let (path, checked) = match redirect { @@ -344,13 +374,14 @@ impl ShellArgs { }) } - /// Resolve to a script (argv form is a one-pipeline, one-stage script with no - /// redirects; free-form is parsed by the safe-subset engine). + /// Resolve to a script. Argv form is a one-pipeline, one-stage script whose + /// args are all **literal** (no globbing/parsing); free-form is parsed by the + /// safe-subset engine. fn script(&self) -> Result { if let Some(program) = &self.program { let mut argv = Vec::with_capacity(1 + self.args.len()); - argv.push(program.clone()); - argv.extend(self.args.iter().cloned()); + argv.push(Arg::Lit(program.clone())); + argv.extend(self.args.iter().cloned().map(Arg::Lit)); Ok(vec![ScriptItem { sep: Sep::Seq, pipeline: vec![Command { @@ -364,6 +395,113 @@ impl ShellArgs { } } +// ── glob expansion ────────────────────────────────────────────────────────── + +/// Split a glob pattern into (directory prefix incl. trailing `/`, basename +/// pattern). `*.rs` → `("", "*.rs")`; `src/*.rs` → `("src/", "*.rs")`. +fn split_glob(pattern: &str) -> (&str, &str) { + match pattern.rfind('/') { + Some(i) => (&pattern[..=i], &pattern[i + 1..]), + None => ("", pattern), + } +} + +/// The directory a glob lists, resolved against the command's `cwd`. +fn resolve_glob_dir(cwd: Option<&str>, dir_part: &str) -> PathBuf { + let base = || cwd.map_or_else(|| PathBuf::from("."), PathBuf::from); + if dir_part.is_empty() { + base() + } else if Path::new(dir_part).is_absolute() { + PathBuf::from(dir_part) + } else { + base().join(dir_part) + } +} + +/// Expand a glob pattern against a directory listing (the `list_dir` seam lets +/// unit tests avoid the real filesystem). Only the last path segment is globbed; +/// hidden entries are skipped unless the pattern's basename starts with `.`. +/// No match → the literal pattern (bash `nullglob` off). +fn expand_glob( + pattern: &str, + cwd: Option<&str>, + list_dir: &dyn Fn(&Path) -> Vec, +) -> Vec { + let (dir_part, base) = split_glob(pattern); + let dir = resolve_glob_dir(cwd, dir_part); + let base_hidden = base.starts_with('.'); + let mut matches: Vec = list_dir(&dir) + .into_iter() + .filter(|name| (base_hidden || !name.starts_with('.')) && fnmatch(base, name)) + .map(|name| format!("{dir_part}{name}")) + .collect(); + matches.sort(); + if matches.is_empty() { + vec![pattern.to_string()] + } else { + matches + } +} + +/// Glob match: `*` (any run), `?` (one char), `[…]` (class with ranges and +/// `!`/`^` negation). `*`/`?`/`[` do not cross `/` (single-segment matching). +fn fnmatch(pattern: &str, name: &str) -> bool { + let p: Vec = pattern.chars().collect(); + let n: Vec = name.chars().collect(); + fnmatch_inner(&p, &n) +} + +fn fnmatch_inner(p: &[char], n: &[char]) -> bool { + match p.first() { + None => n.is_empty(), + Some('*') => fnmatch_inner(&p[1..], n) || (!n.is_empty() && fnmatch_inner(p, &n[1..])), + Some('?') => !n.is_empty() && fnmatch_inner(&p[1..], &n[1..]), + Some('[') => { + if n.is_empty() { + return false; + } + match match_class(&p[1..], n[0]) { + Some((matched, rest)) => matched && fnmatch_inner(rest, &n[1..]), + // Malformed class (no closing `]`): treat `[` as a literal. + None => n[0] == '[' && fnmatch_inner(&p[1..], &n[1..]), + } + } + Some(&c) => !n.is_empty() && c == n[0] && fnmatch_inner(&p[1..], &n[1..]), + } +} + +/// Match a `[...]` class against `c`. `p` begins just after `[`. Returns +/// `(matched, remaining pattern after ])`, or `None` if there is no closing `]`. +fn match_class(p: &[char], c: char) -> Option<(bool, &[char])> { + let mut i = 0; + let negate = matches!(p.first(), Some('!' | '^')); + if negate { + i = 1; + } + let mut matched = false; + let mut first = true; + while i < p.len() { + if p[i] == ']' && !first { + return Some((matched ^ negate, &p[i + 1..])); + } + first = false; + if i + 2 < p.len() && p[i + 1] == '-' && p[i + 2] != ']' { + if c >= p[i] && c <= p[i + 2] { + matched = true; + } + i += 3; + } else { + if c == p[i] { + matched = true; + } + i += 1; + } + } + None +} + +// ── process execution ─────────────────────────────────────────────────────── + /// Open a file for an `fs_write` redirect target (`>` truncates, `>>` appends). fn open_for_write(path: &str, append: bool) -> std::io::Result { std::fs::OpenOptions::new() @@ -383,6 +521,27 @@ fn kill_all(children: &mut [Child]) { } } +/// Lower a stage's [`Arg`] list into a concrete argv, expanding globs against the +/// real filesystem. +fn expand_stage_argv(stage: &Command, cwd: Option<&str>) -> Vec { + let list_dir = |dir: &Path| -> Vec { + std::fs::read_dir(dir) + .map(|rd| { + rd.filter_map(|e| e.ok().and_then(|e| e.file_name().into_string().ok())) + .collect() + }) + .unwrap_or_default() + }; + let mut argv = Vec::with_capacity(stage.argv.len()); + for arg in &stage.argv { + match arg { + Arg::Lit(s) => argv.push(s.clone()), + Arg::Glob(pattern) => argv.extend(expand_glob(pattern, cwd, &list_dir)), + } + } + argv +} + /// Spawn a pipeline of commands wired with OS pipes and file redirections, /// capturing the last stage's stdout (unless it is redirected to a file) and /// every stage's stderr. The pipeline's exit code is the last stage's (bash @@ -398,8 +557,9 @@ fn run_pipeline(stages: &[Command], cwd: Option<&str>) -> ToolResult { let mut prev_stdout: Option = None; for (i, stage) in stages.iter().enumerate() { - let mut cmd = std::process::Command::new(&stage.argv[0]); - cmd.args(&stage.argv[1..]); + let argv = expand_stage_argv(stage, cwd); + let mut cmd = std::process::Command::new(&argv[0]); + cmd.args(&argv[1..]); if let Some(dir) = cwd { cmd.current_dir(dir); } @@ -414,7 +574,7 @@ fn run_pipeline(stages: &[Command], cwd: Option<&str>) -> ToolResult { } }; cmd.stdin(Stdio::from(file)); - prev_stdout = None; // discard the incoming pipe, if any + prev_stdout = None; } else { cmd.stdin(match prev_stdout.take() { Some(out) => Stdio::from(out), @@ -446,8 +606,6 @@ fn run_pipeline(stages: &[Command], cwd: Option<&str>) -> ToolResult { } }; - // Wire this stage's stdout into the next stage's stdin only if it is - // piped (no redirect) and there is a next stage. if !stdout_to_file && i + 1 < n { prev_stdout = child.stdout.take(); } @@ -531,13 +689,11 @@ mod tests { use std::sync::Mutex; /// A spawner that records every pipeline it runs and returns a canned exit - /// code per program (argv0), default 0 — no real processes. `block_ms` lets - /// a test exercise the timeout path without a real `sleep`. + /// code per program (argv0), default 0 — no real processes. #[derive(Default)] struct MockSpawner { calls: Mutex>>, exit_by_program: HashMap, - stdout_by_program: HashMap, block_ms: u64, } @@ -549,20 +705,27 @@ mod tests { } } + /// A stage's program word (argv[0]) for test assertions. + fn prog(stage: &Command) -> &str { + match stage.argv.first() { + Some(Arg::Lit(s) | Arg::Glob(s)) => s, + None => "", + } + } + impl Spawner for MockSpawner { fn run(&self, stages: &[Command], _cwd: Option<&str>) -> ToolResult { self.calls.lock().unwrap().push(stages.to_vec()); if self.block_ms > 0 { std::thread::sleep(Duration::from_millis(self.block_ms)); } - let prog = &stages[0].argv[0]; Ok(Captured { - exit_code: self.exit_by_program.get(prog).copied().unwrap_or(0), - stdout: self - .stdout_by_program - .get(prog) - .cloned() - .unwrap_or_default(), + exit_code: self + .exit_by_program + .get(prog(&stages[0])) + .copied() + .unwrap_or(0), + stdout: String::new(), stderr: String::new(), }) } @@ -581,41 +744,23 @@ mod tests { } } - /// The programs (argv0 of each pipeline's first stage) the mock actually ran, - /// in order — the cheapest way to assert short-circuit behavior. + fn calls(mock: &Arc) -> Vec> { + mock.calls.lock().unwrap().clone() + } + fn ran_programs(mock: &Arc) -> Vec { - mock.calls - .lock() - .unwrap() + calls(mock) .iter() - .map(|pipeline| pipeline[0].argv[0].clone()) + .map(|pipeline| prog(&pipeline[0]).to_string()) .collect() } - #[test] - fn name_is_shell() { - assert_eq!(ShellTool::new().name(), "shell"); - } - - #[tokio::test] - async fn and_runs_second_only_on_success() { - // `true && echo` → both run. - let mock = Arc::new(MockSpawner::default()); - ShellTool::with_spawner(mock.clone()) - .invoke( - serde_json::json!({"cmd": "true && echo hi"}), - &ctx(exec_only(&["true", "echo"])), - ) - .await - .expect("invoke"); - assert_eq!(ran_programs(&mock), vec!["true", "echo"]); - } + // ── sequencing / leash (carried from earlier increments) ──────────────── #[tokio::test] async fn and_short_circuits_on_failure() { - // `false && echo` → `echo` is skipped because `false` exits non-zero. let mock = Arc::new(MockSpawner::with_exit("false", 1)); - let out = ShellTool::with_spawner(mock.clone()) + ShellTool::with_spawner(mock.clone()) .invoke( serde_json::json!({"cmd": "false && echo hi"}), &ctx(exec_only(&["false", "echo"])), @@ -623,98 +768,71 @@ mod tests { .await .expect("invoke"); assert_eq!(ran_programs(&mock), vec!["false"], "echo must be skipped"); - assert_eq!(out["exit_code"], 1, "script exit is the last that ran"); } #[tokio::test] - async fn or_runs_second_only_on_failure() { - // `false || echo` → `echo` runs (fallback). - let mock = Arc::new(MockSpawner::with_exit("false", 1)); - ShellTool::with_spawner(mock.clone()) + async fn out_of_scope_anywhere_denies_the_whole_script() { + let mock = Arc::new(MockSpawner::default()); + let out = ShellTool::with_spawner(mock.clone()) .invoke( - serde_json::json!({"cmd": "false || echo hi"}), - &ctx(exec_only(&["false", "echo"])), + serde_json::json!({"cmd": "echo ok ; rm -rf x"}), + &ctx(exec_only(&["echo"])), ) .await .expect("invoke"); - assert_eq!(ran_programs(&mock), vec!["false", "echo"]); + assert_eq!(out["denied"], true); + assert!(ran_programs(&mock).is_empty()); } + // ── globbing (increment 5) ────────────────────────────────────────────── + + /// A glob arg reaches the spawner as an (unexpanded) `Glob` — expansion is + /// the spawner's job; the leash (fs_read on the listed dir) passed in invoke. #[tokio::test] - async fn or_short_circuits_on_success() { - // `true || echo` → `echo` is skipped because `true` succeeded. + async fn glob_arg_reaches_spawner_after_leash() { let mock = Arc::new(MockSpawner::default()); ShellTool::with_spawner(mock.clone()) .invoke( - serde_json::json!({"cmd": "true || echo hi"}), - &ctx(exec_only(&["true", "echo"])), - ) - .await - .expect("invoke"); - assert_eq!(ran_programs(&mock), vec!["true"]); - } - - #[tokio::test] - async fn semicolon_runs_unconditionally() { - // `false ; echo` → both run; exit is the last (`echo` → 0). - let mock = Arc::new(MockSpawner::with_exit("false", 1)); - let out = ShellTool::with_spawner(mock.clone()) - .invoke( - serde_json::json!({"cmd": "false ; echo hi"}), - &ctx(exec_only(&["false", "echo"])), + serde_json::json!({"cmd": "ls *.rs"}), // fs_read is All by default + &ctx(exec_only(&["ls"])), ) .await .expect("invoke"); - assert_eq!(ran_programs(&mock), vec!["false", "echo"]); - assert_eq!(out["exit_code"], 0); + let c = calls(&mock); + assert_eq!( + c[0][0].argv, + vec![Arg::Lit("ls".into()), Arg::Glob("*.rs".into())] + ); } - /// Atomic admission across the WHOLE script: an out-of-scope command anywhere - /// (even one a `;` would reach, or a `&&` would skip) denies everything and - /// nothing spawns. + /// A glob in the program position is refused (we never exec a pattern). #[tokio::test] - async fn out_of_scope_anywhere_denies_the_whole_script() { + async fn glob_as_program_name_denied() { let mock = Arc::new(MockSpawner::default()); let out = ShellTool::with_spawner(mock.clone()) - .invoke( - serde_json::json!({"cmd": "echo ok ; rm -rf x"}), - &ctx(exec_only(&["echo"])), // rm not granted - ) + .invoke(serde_json::json!({"cmd": "*.sh foo"}), &ctx(Caveats::top())) .await .expect("invoke"); assert_eq!(out["denied"], true); - assert_eq!(out["denials"][0]["target"], "rm"); - assert!(ran_programs(&mock).is_empty(), "nothing must run"); - } - - #[tokio::test] - async fn combined_output_concatenates_in_order() { - let mut mock = MockSpawner::default(); - mock.stdout_by_program.insert("echo".into(), "x".into()); - mock.stdout_by_program.insert("printf".into(), "y".into()); - let mock = Arc::new(mock); - let out = ShellTool::with_spawner(mock.clone()) - .invoke( - serde_json::json!({"cmd": "echo a ; printf b"}), - &ctx(exec_only(&["echo", "printf"])), - ) - .await - .expect("invoke"); - assert_eq!(out["stdout"], "xy"); + assert!(ran_programs(&mock).is_empty()); } - /// A dynamic construct anywhere in the sequence is refused before any spawn. + /// The directory a glob lists is an `fs_read`; out of scope ⇒ denied, no spawn. #[tokio::test] - async fn dynamic_in_sequence_refused_before_spawn() { + async fn glob_dir_out_of_fs_read_scope_denied() { let mock = Arc::new(MockSpawner::default()); + let granted = Caveats { + exec: Scope::only(["echo".to_string()]), + // fs_read restricted to the temp dir; the cwd glob lists elsewhere. + fs_read: Scope::only([std::env::temp_dir().to_string_lossy().into_owned()]), + ..Caveats::top() + }; let out = ShellTool::with_spawner(mock.clone()) - .invoke( - serde_json::json!({"cmd": "echo ok && $(evil)"}), - &ctx(Caveats::top()), - ) + .invoke(serde_json::json!({"cmd": "echo *"}), &ctx(granted)) .await .expect("invoke"); assert_eq!(out["denied"], true); + assert_eq!(out["denials"][0]["kind"], "open"); assert!(ran_programs(&mock).is_empty()); } @@ -727,12 +845,10 @@ mod tests { ) .await; assert!(res.is_err()); - assert!(res.unwrap_err().to_string().contains("exactly one")); } #[tokio::test] async fn timeout_is_reported() { - // The mock blocks longer than the 1s timeout — no real process involved. let mock = Arc::new(MockSpawner { block_ms: 1500, ..Default::default() @@ -746,4 +862,50 @@ mod tests { .expect("invoke"); assert_eq!(out["timed_out"], true); } + + // ── pure glob matching / expansion (no real fs) ───────────────────────── + + #[test] + fn fnmatch_basics() { + assert!(fnmatch("*.rs", "a.rs")); + assert!(!fnmatch("*.rs", "a.txt")); + assert!(fnmatch("a?c", "abc")); + assert!(!fnmatch("a?c", "ac")); + assert!(fnmatch("*", "")); + assert!(fnmatch("a*", "a")); + assert!(fnmatch("[abc]x", "bx")); + assert!(!fnmatch("[abc]x", "dx")); + assert!(fnmatch("[!abc]x", "dx")); + assert!(fnmatch("[a-c]", "b")); + assert!(!fnmatch("[a-c]", "d")); + assert!(fnmatch("foo*bar", "fooXYbar")); + } + + #[test] + fn expand_glob_with_a_fake_lister() { + let entries = || { + vec![ + "a.rs".to_string(), + "b.rs".to_string(), + "c.txt".to_string(), + ".hidden.rs".to_string(), + ] + }; + let lister = |_dir: &Path| entries(); + + // *.rs matches the two .rs files (sorted), hidden excluded. + assert_eq!(expand_glob("*.rs", None, &lister), vec!["a.rs", "b.rs"]); + // * excludes the dotfile. + assert_eq!( + expand_glob("*", None, &lister), + vec!["a.rs", "b.rs", "c.txt"] + ); + // No match → the literal pattern (nullglob off). + assert_eq!(expand_glob("zzz*", None, &lister), vec!["zzz*"]); + // Sub-path keeps the directory prefix on each match. + assert_eq!( + expand_glob("src/*.rs", None, &lister), + vec!["src/a.rs", "src/b.rs"] + ); + } } diff --git a/agent-bridle-tool-shell/tests/real_spawn.rs b/agent-bridle-tool-shell/tests/real_spawn.rs index 495d1c6..98116de 100644 --- a/agent-bridle-tool-shell/tests/real_spawn.rs +++ b/agent-bridle-tool-shell/tests/real_spawn.rs @@ -245,3 +245,40 @@ async fn real_or_fallback_and_semicolon_sequence() { .expect("invoke"); assert_eq!(out["stdout"], "a\nb\n"); } + +#[tokio::test] +async fn real_glob_expands_against_the_filesystem() { + // A unique temp dir with a.rs="A", b.rs="B", c.txt="C". + let dir = unique_temp("glob"); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("a.rs"), "A").unwrap(); + std::fs::write(dir.join("b.rs"), "B").unwrap(); + std::fs::write(dir.join("c.txt"), "C").unwrap(); + let d = dir.to_string_lossy().into_owned(); + + // `cat *.rs` (cwd = the temp dir) → expands to `cat a.rs b.rs` (sorted) → "AB". + let out = ShellTool::new() + .invoke( + serde_json::json!({"cmd": "cat *.rs", "cwd": d}), + &ctx(exec_only(&["cat"])), + ) + .await + .expect("invoke"); + assert_eq!(out["exit_code"], 0); + assert_eq!(out["stdout"], "AB", "glob expanded + sorted: {out}"); + + // No match → the literal pattern; `cat zzz*` fails (no such file). + let out = ShellTool::new() + .invoke( + serde_json::json!({"cmd": "cat zzz*", "cwd": d}), + &ctx(exec_only(&["cat"])), + ) + .await + .expect("invoke"); + assert_ne!( + out["exit_code"], 0, + "unmatched glob → literal, cat fails: {out}" + ); + + let _ = std::fs::remove_dir_all(&dir); +}