diff --git a/README.md b/README.md index 3e543856e..0f219677b 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ agent-browser find role button click --name "Submit" ```bash agent-browser open # Navigate to URL (aliases: goto, navigate) agent-browser click # Click element (--new-tab to open in new tab) +agent-browser clickjs # Click element via JavaScript (bypasses coordinate issues) agent-browser dblclick # Double-click element agent-browser focus # Focus element agent-browser type # Type into element diff --git a/cli/src/commands.rs b/cli/src/commands.rs index cf95c1fd3..b16d844c9 100644 --- a/cli/src/commands.rs +++ b/cli/src/commands.rs @@ -151,6 +151,13 @@ pub fn parse_command(args: &[String], flags: &Flags) -> Result { + let sel = rest.first().ok_or_else(|| ParseError::MissingArguments { + context: "clickjs".to_string(), + usage: "clickjs ", + })?; + Ok(json!({ "id": id, "action": "clickjs", "selector": sel })) + } "dblclick" => { let sel = rest.first().ok_or_else(|| ParseError::MissingArguments { context: "dblclick".to_string(), diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index e50db6a33..566db921f 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -1047,6 +1047,7 @@ pub async fn execute_command(cmd: &Value, state: &mut DaemonState) -> Value { "snapshot" => handle_snapshot(cmd, state).await, "screenshot" => handle_screenshot(cmd, state).await, "click" => handle_click(cmd, state).await, + "clickjs" => handle_clickjs(cmd, state).await, "dblclick" => handle_dblclick(cmd, state).await, "fill" => handle_fill(cmd, state).await, "type" => handle_type(cmd, state).await, @@ -2159,6 +2160,35 @@ async fn handle_click(cmd: &Value, state: &mut DaemonState) -> Result Result { + let selector = cmd + .get("selector") + .and_then(|v| v.as_str()) + .ok_or("Missing 'selector' parameter")?; + + if let Some(ref wb) = state.webdriver_backend { + if state.browser.is_none() { + wb.click(selector).await?; + return Ok(json!({ "clicked": selector, "method": "javascript" })); + } + } + + let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + let session_id = mgr.active_session_id()?.to_string(); + + interaction::clickjs( + &mgr.client, + &session_id, + &state.ref_map, + selector, + &state.iframe_sessions, + ) + .await?; + Ok(json!({ "clicked": selector, "method": "javascript" })) +} + async fn handle_dblclick(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); diff --git a/cli/src/native/interaction.rs b/cli/src/native/interaction.rs index c18d226c5..0e973e5b5 100644 --- a/cli/src/native/interaction.rs +++ b/cli/src/native/interaction.rs @@ -26,6 +26,56 @@ pub async fn click( dispatch_click(client, &effective_session_id, x, y, button, click_count).await } +/// Clicks an element using JavaScript `element.click()` instead of CDP coordinate-based +/// mouse events. This bypasses coordinate resolution issues (overlapping elements, +/// viewport offsets, fixed-position elements) that can cause `click` to target the +/// wrong element or miss entirely. +/// +/// Note: CDP mouse events *do* trigger React synthetic events (React 17+ attaches at +/// the root). The benefit here is avoiding coordinate resolution problems, not event +/// system compatibility. +pub async fn clickjs( + client: &CdpClient, + session_id: &str, + ref_map: &RefMap, + selector_or_ref: &str, + iframe_sessions: &HashMap, +) -> Result<(), String> { + let (object_id, effective_session_id) = resolve_element_object_id( + client, + session_id, + ref_map, + selector_or_ref, + iframe_sessions, + ) + .await?; + + // Call element.click() via CDP Runtime.callFunctionOn to bypass coordinate resolution + let params = CallFunctionOnParams { + object_id: Some(object_id), + function_declaration: "function() { + // Scroll element into view first + this.scrollIntoView({ behavior: 'instant', block: 'center' }); + // Trigger the native click + this.click(); + }" + .to_string(), + arguments: None, + return_by_value: Some(true), + await_promise: None, + }; + + client + .send_command_typed::<_, Value>( + "Runtime.callFunctionOn", + ¶ms, + Some(&effective_session_id), + ) + .await?; + + Ok(()) +} + pub async fn dblclick( client: &CdpClient, session_id: &str, diff --git a/cli/src/output.rs b/cli/src/output.rs index 5d70ff2a4..e521a9a15 100644 --- a/cli/src/output.rs +++ b/cli/src/output.rs @@ -1036,6 +1036,35 @@ Examples: agent-browser click "button.primary" agent-browser click "//button[@type='submit']" agent-browser click @e3 --new-tab +"## + } + "clickjs" => { + r##" +agent-browser clickjs - Click an element using JavaScript + +Usage: agent-browser clickjs + +Clicks on the specified element using JavaScript element.click() instead of +coordinate-based CDP mouse events. This bypasses coordinate resolution issues +(overlapping elements, viewport offsets, fixed-position elements) that can +cause the standard click to target the wrong element or miss entirely. + +When to Use: + - The standard click reports success but nothing happens + - Elements with complex positioning (fixed, absolute, overlapping layers) + - Floating action buttons or elements behind overlays + +Note: CDP mouse events do trigger React synthetic events (React 17+ attaches +at the root). The benefit here is avoiding coordinate resolution problems. + +Global Options: + --json Output as JSON + --session Use specific session + +Examples: + agent-browser clickjs "button" + agent-browser clickjs @e1 + agent-browser clickjs "[data-testid='add-button']" "## } "dblclick" => {