diff --git a/.changeset/fix-glob-matching.md b/.changeset/fix-glob-matching.md new file mode 100644 index 000000000..b414ae618 --- /dev/null +++ b/.changeset/fix-glob-matching.md @@ -0,0 +1,4 @@ +--- +"@anthropic-ai/agent-browser-cli": patch +--- +fix: implement glob pattern matching for wait --url and route matching diff --git a/cli/Cargo.lock b/cli/Cargo.lock index e53cabc55..bc974c12f 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -55,6 +55,7 @@ dependencies = [ "getrandom 0.2.17", "image", "libc", + "regex", "reqwest", "serde", "serde_json", @@ -70,6 +71,15 @@ dependencies = [ "zip", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "aligned" version = "0.4.3" @@ -1597,6 +1607,35 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "reqwest" version = "0.12.28" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 61f132362..17801a3aa 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -30,6 +30,7 @@ socket2 = "0.6" similar = "2" zip = { version = "8.2.0", default-features = false, features = ["deflate"] } time = { version = "0.3", features = ["formatting"] } +regex = "1" [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index f85460b2c..29df7bf45 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -1,3 +1,4 @@ +use regex::Regex; use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use std::env; @@ -104,6 +105,38 @@ pub struct RouteResponse { pub headers: Option>, } +fn glob_to_regex(pattern: &str) -> String { + let mut regex = String::from("^"); + let mut chars = pattern.chars().peekable(); + + while let Some(ch) = chars.next() { + match ch { + '*' => { + if chars.peek() == Some(&'*') { + chars.next(); + regex.push_str(".*"); + } else { + regex.push_str("[^/]*"); + } + } + '?' | '\\' | '.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | '[' | ']' => { + regex.push('\\'); + regex.push(ch); + } + _ => regex.push(ch), + } + } + + regex.push('$'); + regex +} + +fn glob_matches(pattern: &str, text: &str) -> bool { + Regex::new(&glob_to_regex(pattern)) + .map(|regex| regex.is_match(text)) + .unwrap_or(false) +} + #[derive(Clone, serde::Serialize)] pub struct TrackedRequest { pub url: String, @@ -2879,9 +2912,10 @@ async fn wait_for_url( pattern: &str, timeout_ms: u64, ) -> Result<(), String> { + let regex = glob_to_regex(pattern); let check_fn = format!( - "location.href.includes({})", - serde_json::to_string(pattern).unwrap_or_default() + "new RegExp({}).test(location.href)", + serde_json::to_string(®ex).unwrap_or_default() ); poll_until_true(client, session_id, &check_fn, timeout_ms).await } @@ -5550,7 +5584,7 @@ async fn handle_responsebody(cmd: &Value, state: &DaemonState) -> Result = route.url_pattern.split('*').collect(); - if parts.len() == 2 { - paused.url.starts_with(parts[0]) && paused.url.ends_with(parts[1]) - } else { - paused.url.contains(&route.url_pattern) - } - } else { - paused.url.contains(&route.url_pattern) - }; + let matches = glob_matches(&route.url_pattern, &paused.url); if matches { if route.abort { @@ -8180,6 +8203,68 @@ mod tests { ); } + #[test] + fn test_glob_to_regex_exact_match() { + assert_eq!( + super::glob_to_regex("https://example.com"), + "^https://example\\.com$" + ); + } + + #[test] + fn test_glob_to_regex_escapes_regex_metacharacters() { + assert_eq!( + super::glob_to_regex("https://example.com/a+b(c)[d]{e}|f^g$"), + "^https://example\\.com/a\\+b\\(c\\)\\[d\\]\\{e\\}\\|f\\^g\\$$" + ); + } + + #[test] + fn test_glob_matches_exact_match() { + assert!(super::glob_matches( + "https://example.com/path", + "https://example.com/path" + )); + assert!(!super::glob_matches( + "https://example.com/path", + "https://example.com/path/extra" + )); + } + + #[test] + fn test_glob_matches_single_star_does_not_cross_slash() { + assert!(super::glob_matches( + "https://example.com/*.js", + "https://example.com/app.js" + )); + assert!(!super::glob_matches( + "https://example.com/*.js", + "https://example.com/assets/app.js" + )); + } + + #[test] + fn test_glob_matches_double_star_crosses_slash() { + assert!(super::glob_matches( + "https://example.com/**/*.js", + "https://example.com/assets/app.js" + )); + } + + #[test] + fn test_glob_matches_question_mark_is_literal() { + // ? is the query string separator in URLs, not a glob wildcard + // Playwright's URL glob only supports * and ** + assert!(super::glob_matches( + "https://example.com/page?id=1", + "https://example.com/page?id=1" + )); + assert!(!super::glob_matches( + "https://example.com/page?id=1", + "https://example.com/pageXid=1" + )); + } + #[test] fn test_parse_key_chord_plain_key() { let (key, mods) = parse_key_chord("a");