Skip to content
10 changes: 9 additions & 1 deletion cli/src/native/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,13 @@ async fn connect_auto_with_fresh_tab() -> Result<BrowserManager, String> {
}

async fn auto_launch(state: &mut DaemonState) -> Result<(), String> {
let options = launch_options_from_env();
let mut options = launch_options_from_env();

// Use the stream server's viewport dimensions for --window-size so the
// content area matches the desired viewport from the start.
if let Some(ref server) = state.stream_server {
options.viewport_size = Some(server.viewport().await);
}
let engine = env::var("AGENT_BROWSER_ENGINE").ok();

// Store proxy credentials for Fetch.authRequired handling
Expand Down Expand Up @@ -1495,6 +1501,7 @@ fn launch_options_from_env() -> LaunchOptions {
.unwrap_or(false),
color_scheme: env::var("AGENT_BROWSER_COLOR_SCHEME").ok(),
download_path: env::var("AGENT_BROWSER_DOWNLOAD_PATH").ok(),
viewport_size: None,
}
}

Expand Down Expand Up @@ -1712,6 +1719,7 @@ async fn handle_launch(cmd: &Value, state: &mut DaemonState) -> Result<Value, St
.get("downloadPath")
.and_then(|v| v.as_str())
.map(String::from),
viewport_size: None,
};

// Store proxy credentials for Fetch.authRequired handling
Expand Down
33 changes: 33 additions & 0 deletions cli/src/native/browser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,39 @@ impl BrowserManager {
Some(session_id),
)
.await?;

// Screencast captures the actual content area, not the emulated CSS
// viewport, so resize the content area to match.
if let Ok(target_id) = self.active_target_id() {
if let Ok(window_info) = self
.client
.send_command(
"Browser.getWindowForTarget",
Some(json!({ "targetId": target_id })),
None,
)
.await
{
if let Some(window_id) = window_info.get("windowId").and_then(|v| v.as_i64()) {
if let Err(e) = self
.client
.send_command(
"Browser.setContentsSize",
Some(json!({
"windowId": window_id,
"width": width,
"height": height,
})),
None,
)
.await
{
eprintln!("Browser.setContentsSize failed (experimental CDP): {e}");
}
}
}
}

Ok(())
}

Expand Down
7 changes: 6 additions & 1 deletion cli/src/native/cdp/chrome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ pub struct LaunchOptions {
pub ignore_https_errors: bool,
pub color_scheme: Option<String>,
pub download_path: Option<String>,
/// Initial viewport dimensions used for `--window-size` so the content
/// area matches the desired viewport from the start.
pub viewport_size: Option<(u32, u32)>,
}

impl Default for LaunchOptions {
Expand All @@ -109,6 +112,7 @@ impl Default for LaunchOptions {
ignore_https_errors: false,
color_scheme: None,
download_path: None,
viewport_size: None,
}
}
}
Expand Down Expand Up @@ -196,7 +200,8 @@ fn build_chrome_args(options: &LaunchOptions) -> Result<ChromeArgs, String> {
.any(|a| a.starts_with("--start-maximized") || a.starts_with("--window-size="));

if !has_window_size && options.headless && !has_extensions {
args.push("--window-size=1280,720".to_string());
let (w, h) = options.viewport_size.unwrap_or((1280, 720));
args.push(format!("--window-size={},{}", w, h));
}

args.extend(options.args.iter().cloned());
Expand Down
142 changes: 142 additions & 0 deletions cli/src/native/e2e_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3755,3 +3755,145 @@ async fn e2e_externally_opened_tab_detected() {
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
assert_success(&resp);
}

// ---------------------------------------------------------------------------
// Stream: custom viewport is reflected in screencast frame metadata
// ---------------------------------------------------------------------------

#[tokio::test]
#[ignore]
async fn e2e_stream_frame_metadata_respects_custom_viewport() {
let guard = EnvGuard::new(&["AGENT_BROWSER_SOCKET_DIR", "AGENT_BROWSER_SESSION"]);
let socket_dir = std::env::temp_dir().join(format!(
"agent-browser-e2e-stream-viewport-{}-{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("system clock should be after unix epoch")
.as_nanos()
));
std::fs::create_dir_all(&socket_dir).expect("socket dir should be created");
guard.set(
"AGENT_BROWSER_SOCKET_DIR",
socket_dir.to_str().expect("socket dir should be utf-8"),
);
guard.set("AGENT_BROWSER_SESSION", "e2e-stream-viewport");

let mut state = DaemonState::new();

// Enable stream on an ephemeral port
let resp = execute_command(
&json!({ "id": "1", "action": "stream_enable", "port": 0 }),
&mut state,
)
.await;
assert_success(&resp);
let port = get_data(&resp)["port"]
.as_u64()
.expect("stream enable should report the bound port");

// Set a custom viewport before launching the browser
let resp = execute_command(
&json!({ "id": "2", "action": "viewport", "width": 800, "height": 600 }),
&mut state,
)
.await;
assert_success(&resp);

// Connect a WebSocket client
let (mut ws, _) = tokio_tungstenite::connect_async(format!("ws://127.0.0.1:{port}"))
.await
.expect("websocket client should connect to runtime stream");

// Navigate to trigger browser launch and screencast
let resp = execute_command(
&json!({ "id": "3", "action": "navigate", "url": "data:text/html,<h1>Viewport Test</h1>" }),
&mut state,
)
.await;
assert_success(&resp);

// Wait for a frame message and verify both metadata and actual image dimensions
let mut found_frame = false;
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(15);
while tokio::time::Instant::now() < deadline {
let msg = tokio::time::timeout(tokio::time::Duration::from_secs(3), ws.next()).await;
let Some(Ok(message)) = msg.ok().flatten() else {
continue;
};
if !message.is_text() {
continue;
}
let parsed: Value =
serde_json::from_str(message.to_text().expect("text message should be readable"))
.expect("stream payload should be valid JSON");
if parsed.get("type") == Some(&json!("frame")) {
let meta = &parsed["metadata"];
assert_eq!(
meta["deviceWidth"], 800,
"frame metadata deviceWidth should match custom viewport, got: {}",
meta
);
assert_eq!(
meta["deviceHeight"], 600,
"frame metadata deviceHeight should match custom viewport, got: {}",
meta
);

// Verify the actual JPEG image dimensions match the custom viewport.
let data_str = parsed
.get("data")
.and_then(|v| v.as_str())
.expect("frame message should include base64-encoded 'data' field");
{
use base64::Engine;
let bytes = base64::engine::general_purpose::STANDARD
.decode(data_str)
.expect("frame data should be valid base64");
let (img_w, img_h) = jpeg_dimensions(&bytes)
.expect("frame data should be a valid JPEG with SOF marker");
assert_eq!(
img_w, 800,
"JPEG image width should match custom viewport, got: {}",
img_w
);
assert_eq!(
img_h, 600,
"JPEG image height should match custom viewport, got: {}",
img_h
);
}

found_frame = true;
break;
}
}
assert!(
found_frame,
"should have received at least one frame message with correct viewport metadata"
);

// Cleanup
let resp = execute_command(
&json!({ "id": "4", "action": "stream_disable" }),
&mut state,
)
.await;
assert_success(&resp);

let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
assert_success(&resp);
let _ = std::fs::remove_dir_all(&socket_dir);
}

/// Extract width and height from a JPEG's SOF0 (0xFFC0) or SOF2 (0xFFC2) marker.
fn jpeg_dimensions(data: &[u8]) -> Option<(u32, u32)> {
for i in 0..data.len().saturating_sub(8) {
if data[i] == 0xFF && (data[i + 1] == 0xC0 || data[i + 1] == 0xC2) {
let height = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32;
let width = u16::from_be_bytes([data[i + 7], data[i + 8]]) as u32;
return Some((width, height));
}
}
None
}
19 changes: 13 additions & 6 deletions cli/src/native/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,18 @@ impl StreamServer {
*self.screencasting.lock().await
}

/// Update the stored viewport dimensions used by status messages and screencast.
/// Also notifies the screencast event loop to restart with the new dimensions.
/// Update the stored viewport dimensions and restart the active screencast (if any)
/// so frames are captured at the new size.
pub async fn set_viewport(&self, width: u32, height: u32) {
*self.viewport_width.lock().await = width;
*self.viewport_height.lock().await = height;
let mut vw = self.viewport_width.lock().await;
let mut vh = self.viewport_height.lock().await;
if *vw == width && *vh == height {
return;
}
*vw = width;
*vh = height;
drop(vw);
drop(vh);
self.client_notify.notify_one();
}

Expand Down Expand Up @@ -883,8 +890,8 @@ async fn cdp_event_loop(
"metadata": {
"offsetTop": meta.and_then(|m| m.get("offsetTop")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"pageScaleFactor": meta.and_then(|m| m.get("pageScaleFactor")).and_then(|v| v.as_f64()).unwrap_or(1.0),
"deviceWidth": meta.and_then(|m| m.get("deviceWidth")).and_then(|v| v.as_u64()).unwrap_or(1280),
"deviceHeight": meta.and_then(|m| m.get("deviceHeight")).and_then(|v| v.as_u64()).unwrap_or(720),
"deviceWidth": vw,
"deviceHeight": vh,
"scrollOffsetX": meta.and_then(|m| m.get("scrollOffsetX")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"scrollOffsetY": meta.and_then(|m| m.get("scrollOffsetY")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"timestamp": meta.and_then(|m| m.get("timestamp")).and_then(|v| v.as_u64()).unwrap_or(0),
Expand Down
Loading