Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 105 additions & 63 deletions agent-bridle-tool-shell/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
//! This covers the safe-subset engine — a sequence of pipelines joined by `&&`,
//! `||` and `;`, each pipeline simple commands (`a | b | c`) with quoted
//! arguments, file redirections (`> out`, `>> out`, `< in`, `2> err`, `2>&1`),
//! filename globbing (`*`, `?`, `[…]`), and **whole-word variable expansion**
//! (`$VAR` / `${VAR}`). The parser only *marks* an argument ([`Arg::Glob`] /
//! [`Arg::Var`]); the filesystem read (globbing) and the env read + allowlist
//! check (variables) are the executor's job.
//! filename globbing (`*`, `?`, `[…]`), and **variable expansion** (`$VAR` /
//! `${VAR}`, including mixed words like `$HOME/config` and references inside
//! double quotes). The parser only *marks* an argument ([`Arg::Glob`] /
//! [`Arg::Var`] segments); the filesystem read (globbing) and the env read +
//! allowlist check (variables) are the executor's job.
//!
//! Refused **by design** (security, [`Refusal::Dynamic`]): command/arithmetic
//! substitution `$(…)`, backticks, subshells — the undecidable interiors ADR
//! 0001 says may never be statically cleared. Refused for now
//! ([`Refusal::Unsupported`]): `$VAR` mixed into a larger word (use a standalone
//! `$VAR`) or inside double quotes, and fd redirections other than
//! ([`Refusal::Unsupported`]): a word that is *both* a glob and variable-bearing
//! (`$DIR/*.rs`), a `$VAR` in a redirect target, and fd redirections other than
//! `1>`/`2>`/`0<`/`2>&1` (e.g. `3>`, `1>&2`).
//!
//! Quoting is honored, so a metacharacter **inside quotes is a literal
Expand Down Expand Up @@ -72,17 +73,27 @@ impl fmt::Display for Refusal {
}
}

/// One argument word: a literal, a glob pattern, or a variable reference. The
/// executor lowers globs (a filesystem read) and variables (an env read +
/// A piece of a variable-bearing word: a literal run or a `$NAME` reference.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Seg {
/// A literal run of characters.
Lit(String),
/// A `$NAME` / `${NAME}` reference (the variable name only).
Var(String),
}

/// One argument word: a literal, a glob pattern, or a word that contains one or
/// more `$VAR` references (possibly mixed with literals, e.g. `$HOME/config`).
/// The executor lowers globs (a filesystem read) and variables (an env read +
/// allowlist check) into literals — both leash-/policy-checked first.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Arg {
/// A literal word (quoting already resolved).
Lit(String),
/// A word containing unquoted `*`/`?`/`[…]` — a glob pattern.
Glob(String),
/// A standalone `$NAME` / `${NAME}` variable reference (the name only).
Var(String),
/// A word with `$VAR` reference(s) (segments concatenated at expansion time).
Var(Vec<Seg>),
}

/// A redirection on one command stage. `agent-bridle` performs the open, so each
Expand Down Expand Up @@ -369,25 +380,26 @@ fn skip_whitespace(chars: &mut Peekable<Chars>) {

/// Read the next word as an [`Arg`]. Stops at unquoted whitespace or an operator.
/// Single quotes are literal; double quotes are literal except `$`/backtick still
/// trigger substitution detection; an unquoted backslash escapes the next char.
/// An unquoted `*`/`?`/`[` marks the word a glob; an unquoted `$` at the start of
/// a word begins a standalone variable reference (see [`read_variable`]).
/// expand; an unquoted backslash escapes the next char. An unquoted `*`/`?`/`[`
/// marks the word a glob; a `$NAME`/`${NAME}` (unquoted or inside double quotes)
/// contributes a [`Seg::Var`] so a word can mix literals and variables
/// (`$HOME/config`). A word that is BOTH a glob and variable-bearing is refused.
fn read_word(chars: &mut Peekable<Chars>) -> Result<Arg, Refusal> {
let mut cur = String::new();
let mut cur = String::new(); // current literal run
let mut segs: Vec<Seg> = Vec::new(); // pieces flushed when a `$VAR` appears
let mut is_glob = false;

while let Some(&c) = chars.peek() {
match c {
' ' | '\t' | '\n' | '\r' => break,
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '`' => break,
'$' => {
// A variable is only allowed as a *standalone* word.
if !cur.is_empty() || is_glob {
return Err(Refusal::Unsupported(
"mixed variable expansion (use a standalone $VAR)",
));
}
chars.next(); // consume '$'
return read_variable(chars);
let name = read_var_name(chars)?;
if !cur.is_empty() {
segs.push(Seg::Lit(std::mem::take(&mut cur)));
}
segs.push(Seg::Var(name));
}
'*' | '?' | '[' => {
cur.push(c);
Expand Down Expand Up @@ -416,12 +428,13 @@ fn read_word(chars: &mut Peekable<Chars>) -> Result<Arg, Refusal> {
}
_ => cur.push('\\'),
},
// `$VAR` inside double quotes is not expanded here — use a
// bare `$VAR`. (Avoids the segment model; a documented gap.)
// `$VAR` inside double quotes expands (a literal `$` is `\$`).
Some('$') => {
return Err(Refusal::Unsupported(
"variable expansion inside double quotes (use a bare $VAR)",
))
let name = read_var_name(chars)?;
if !cur.is_empty() {
segs.push(Seg::Lit(std::mem::take(&mut cur)));
}
segs.push(Seg::Var(name));
}
Some('`') => {
return Err(Refusal::Dynamic("command substitution (backticks)"))
Expand All @@ -444,18 +457,30 @@ fn read_word(chars: &mut Peekable<Chars>) -> Result<Arg, Refusal> {
}
}
}
// A bare digit-word touching a redirect op (`2>`, `0<`, …) is handled by the
// caller as an fd redirect — it is returned here as a plain literal.
Ok(if is_glob {
Arg::Glob(cur)
} else {
Arg::Lit(cur)
})

if segs.is_empty() {
// Pure literal/glob word. (A bare digit-word touching a redirect op is
// handled by the caller as an fd redirect.)
return Ok(if is_glob {
Arg::Glob(cur)
} else {
Arg::Lit(cur)
});
}
// A variable-bearing word.
if is_glob {
return Err(Refusal::Unsupported("glob and variable in one word"));
}
if !cur.is_empty() {
segs.push(Seg::Lit(cur));
}
Ok(Arg::Var(segs))
}

/// Read a variable reference after a consumed `$`: `$NAME` or `${NAME}`. The
/// variable must be a **whole word** (nothing else adjacent). `$(` is dynamic.
fn read_variable(chars: &mut Peekable<Chars>) -> Result<Arg, Refusal> {
/// Read a variable name after a consumed `$`: `NAME` (bare) or `{NAME}`. `$(` is
/// command/arithmetic substitution (dynamic, refused); a `$` not followed by a
/// name/brace is a refused bare `$` (escape as `\$` for a literal dollar).
fn read_var_name(chars: &mut Peekable<Chars>) -> Result<String, Refusal> {
let name = match chars.peek() {
Some('(') => return Err(Refusal::Dynamic("command/arithmetic substitution `$(`")),
Some('{') => {
Expand Down Expand Up @@ -491,17 +516,7 @@ fn read_variable(chars: &mut Peekable<Chars>) -> Result<Arg, Refusal> {
if !is_valid_var_name(&name) {
return Err(Refusal::Unsupported("unsupported variable name"));
}
// Whole-word only: the variable must be the entire word.
match chars.peek() {
None => {}
Some(&(' ' | '\t' | '\n' | '\r' | '|' | '&' | ';' | '<' | '>' | '(' | ')')) => {}
_ => {
return Err(Refusal::Unsupported(
"mixed variable expansion (use a standalone $VAR)",
))
}
}
Ok(Arg::Var(name))
Ok(name)
}

/// A valid environment variable name: `[A-Za-z_][A-Za-z0-9_]*`.
Expand Down Expand Up @@ -610,8 +625,15 @@ mod tests {
fn glob(s: &str) -> Arg {
Arg::Glob(s.to_string())
}
fn var(s: &str) -> Arg {
Arg::Var(s.to_string())
/// A single-variable word, e.g. `$HOME`.
fn var(name: &str) -> Arg {
Arg::Var(vec![Seg::Var(name.to_string())])
}
fn slit(s: &str) -> Seg {
Seg::Lit(s.to_string())
}
fn svar(s: &str) -> Seg {
Seg::Var(s.to_string())
}
fn stage(args: Vec<Arg>) -> Command {
Command {
Expand Down Expand Up @@ -675,23 +697,43 @@ mod tests {
}

#[test]
fn mixed_or_quoted_or_invalid_variables_are_refused() {
assert!(matches!(
classify("echo $HOME/x"),
Err(Refusal::Unsupported(_))
)); // mixed
assert!(matches!(
classify("echo foo$HOME"),
Err(Refusal::Unsupported(_))
)); // mixed
assert!(matches!(
classify("echo \"$HOME\""),
Err(Refusal::Unsupported(_))
)); // quoted
fn mixed_and_quoted_variables_are_segments() {
// `$HOME/x` → variable + literal segments.
assert_eq!(
classify("echo $HOME/x").unwrap(),
one_stage(vec![lit("echo"), Arg::Var(vec![svar("HOME"), slit("/x")])])
);
// `foo$HOME` → literal + variable.
assert_eq!(
classify("echo foo$HOME").unwrap(),
one_stage(vec![lit("echo"), Arg::Var(vec![slit("foo"), svar("HOME")])])
);
// Inside double quotes the variable still expands.
assert_eq!(
classify("echo \"$HOME/x\"").unwrap(),
one_stage(vec![lit("echo"), Arg::Var(vec![svar("HOME"), slit("/x")])])
);
// Multiple segments: literal, var, literal.
assert_eq!(
classify("echo pre${X}.log").unwrap(),
one_stage(vec![
lit("echo"),
Arg::Var(vec![slit("pre"), svar("X"), slit(".log")]),
])
);
}

#[test]
fn invalid_or_dynamic_variable_forms_are_refused() {
assert!(matches!(classify("echo $1"), Err(Refusal::Unsupported(_)))); // positional/invalid
assert!(matches!(classify("echo $"), Err(Refusal::Unsupported(_)))); // bare $
assert!(matches!(classify("echo $(id)"), Err(Refusal::Dynamic(_)))); // substitution
// Escaped `$` is a literal dollar, not a variable.
// A word that is BOTH a glob and variable-bearing is refused (deferred).
assert!(matches!(
classify("cat $DIR/*.rs"),
Err(Refusal::Unsupported(_))
));
// Escaped `$` is a literal dollar, not a variable.
assert_eq!(
classify("echo \\$HOME").unwrap(),
one_stage(vec![lit("echo"), lit("$HOME")])
Expand Down
74 changes: 50 additions & 24 deletions agent-bridle-tool-shell/src/shell_tool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ use agent_bridle_core::{
};
use async_trait::async_trait;

use crate::parse::{classify, Arg, Command, Redirect, Refusal, Script, ScriptItem, Sep, StderrTo};
use crate::parse::{
classify, Arg, Command, Redirect, Refusal, Script, ScriptItem, Seg, Sep, StderrTo,
};

/// Maximum permitted (and default-clamped) wall-clock timeout.
const MAX_TIMEOUT_SECS: u64 = 300;
Expand Down Expand Up @@ -141,11 +143,12 @@ impl Tool for ShellTool {
pipelines (a | b) joined by &&/||/;, with quoted arguments, redirections \
(> out, >> out, < in, 2> err, 2>&1; file targets gated by fs_write/fs_read), \
filename globbing (*, ?, [..]; gated by fs_read on the listed directory), \
and whole-word $VAR/${VAR} expansion for an allowlisted, secret-free set \
(HOME, PWD, USER, TMPDIR, ...). Dynamic constructs ($(...), backticks, \
subshells) are refused by design; a $VAR mixed into a word, quoted, or \
outside the allowlist, and fd redirections other than 1>/2>/0</2>&1, \
are refused. Mutually exclusive with `program`."
and $VAR/${VAR} expansion (incl. mixed words like $HOME/config and \
inside double quotes) for an allowlisted, secret-free set (HOME, PWD, \
USER, TMPDIR, ...). Dynamic constructs ($(...), backticks, subshells) are \
refused by design; a $VAR outside the allowlist, a $VAR in a redirect \
target or combined with a glob, and fd redirections other than \
1>/2>/0</2>&1, are refused. Mutually exclusive with `program`."
},
"cwd": {
"type": "string",
Expand Down Expand Up @@ -199,11 +202,11 @@ impl Tool for ShellTool {
&ToolError::denied("a glob pattern is not allowed as a program name"),
));
}
Some(Arg::Var(name)) => {
Some(Arg::Var(_segs)) => {
return Ok(deny(
sandbox_kind,
DenialKind::Exec,
name,
"$VAR",
&ToolError::denied("a variable is not allowed as a program name"),
));
}
Expand All @@ -224,18 +227,25 @@ impl Tool for ShellTool {
));
}
}
// A variable must be on the env allowlist (no secret leak).
Arg::Var(name) if !is_allowed_var(name) => {
return Ok(deny(
sandbox_kind,
DenialKind::Exec,
&format!("${name}"),
&ToolError::denied(format!(
"variable ${name} is not in the confined shell's allowlist"
)),
));
// Every variable referenced must be on the env allowlist
// (no secret leak), checked by name before any spawn.
Arg::Var(segs) => {
for seg in segs {
if let Seg::Var(name) = seg {
if !is_allowed_var(name) {
return Ok(deny(
sandbox_kind,
DenialKind::Exec,
&format!("${name}"),
&ToolError::denied(format!(
"variable ${name} is not in the confined shell's allowlist"
)),
));
}
}
}
}
Arg::Var(_) | Arg::Lit(_) => {}
Arg::Lit(_) => {}
}
}
for redirect in &stage.redirects {
Expand Down Expand Up @@ -587,7 +597,19 @@ fn expand_stage_argv(stage: &Command, cwd: Option<&str>) -> Vec<String> {
match arg {
Arg::Lit(s) => argv.push(s.clone()),
Arg::Glob(pattern) => argv.extend(expand_glob(pattern, cwd, &list_dir)),
Arg::Var(name) => argv.push(std::env::var(name).unwrap_or_default()),
// Concatenate the segments: literals as-is, variables (already
// allowlisted in `invoke`) read from the env as a single literal —
// no re-split / no re-glob of the value.
Arg::Var(segs) => {
let mut word = String::new();
for seg in segs {
match seg {
Seg::Lit(s) => word.push_str(s),
Seg::Var(name) => word.push_str(&std::env::var(name).unwrap_or_default()),
}
}
argv.push(word);
}
}
}
argv
Expand Down Expand Up @@ -784,11 +806,12 @@ mod tests {
}
}

/// A stage's program word (argv[0]) for test assertions.
/// A stage's program word (argv[0]) for test assertions. (A variable in the
/// program position is denied in `invoke`, so it never reaches the spawner.)
fn prog(stage: &Command) -> &str {
match stage.argv.first() {
Some(Arg::Lit(s) | Arg::Glob(s) | Arg::Var(s)) => s,
None => "",
Some(Arg::Lit(s) | Arg::Glob(s)) => s,
Some(Arg::Var(_)) | None => "",
}
}

Expand Down Expand Up @@ -931,7 +954,10 @@ mod tests {
let c = calls(&mock);
assert_eq!(
c[0][0].argv,
vec![Arg::Lit("echo".into()), Arg::Var("HOME".into())]
vec![
Arg::Lit("echo".into()),
Arg::Var(vec![Seg::Var("HOME".into())]),
]
);
}

Expand Down
Loading
Loading