diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index f85460b2c..032875ad5 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -1395,7 +1395,13 @@ async fn connect_auto_with_fresh_tab() -> Result { } async fn auto_launch(state: &mut DaemonState) -> Result<(), String> { - let options = launch_options_from_env(); + let mut options = launch_options_from_env(); + + // Use the stream server's viewport dimensions for --window-size so the + // content area matches the desired viewport from the start. + if let Some(ref server) = state.stream_server { + options.viewport_size = Some(server.viewport().await); + } let engine = env::var("AGENT_BROWSER_ENGINE").ok(); // Store proxy credentials for Fetch.authRequired handling @@ -1495,6 +1501,7 @@ fn launch_options_from_env() -> LaunchOptions { .unwrap_or(false), color_scheme: env::var("AGENT_BROWSER_COLOR_SCHEME").ok(), download_path: env::var("AGENT_BROWSER_DOWNLOAD_PATH").ok(), + viewport_size: None, } } @@ -1712,6 +1719,7 @@ async fn handle_launch(cmd: &Value, state: &mut DaemonState) -> Result, pub download_path: Option, + /// Initial viewport dimensions used for `--window-size` so the content + /// area matches the desired viewport from the start. + pub viewport_size: Option<(u32, u32)>, } impl Default for LaunchOptions { @@ -109,6 +112,7 @@ impl Default for LaunchOptions { ignore_https_errors: false, color_scheme: None, download_path: None, + viewport_size: None, } } } @@ -196,7 +200,8 @@ fn build_chrome_args(options: &LaunchOptions) -> Result { .any(|a| a.starts_with("--start-maximized") || a.starts_with("--window-size=")); if !has_window_size && options.headless && !has_extensions { - args.push("--window-size=1280,720".to_string()); + let (w, h) = options.viewport_size.unwrap_or((1280, 720)); + args.push(format!("--window-size={},{}", w, h)); } args.extend(options.args.iter().cloned()); diff --git a/cli/src/native/e2e_tests.rs b/cli/src/native/e2e_tests.rs index 013605205..ca30f42e0 100644 --- a/cli/src/native/e2e_tests.rs +++ b/cli/src/native/e2e_tests.rs @@ -3755,3 +3755,145 @@ async fn e2e_externally_opened_tab_detected() { let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await; assert_success(&resp); } + +// --------------------------------------------------------------------------- +// Stream: custom viewport is reflected in screencast frame metadata +// --------------------------------------------------------------------------- + +#[tokio::test] +#[ignore] +async fn e2e_stream_frame_metadata_respects_custom_viewport() { + let guard = EnvGuard::new(&["AGENT_BROWSER_SOCKET_DIR", "AGENT_BROWSER_SESSION"]); + let socket_dir = std::env::temp_dir().join(format!( + "agent-browser-e2e-stream-viewport-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("system clock should be after unix epoch") + .as_nanos() + )); + std::fs::create_dir_all(&socket_dir).expect("socket dir should be created"); + guard.set( + "AGENT_BROWSER_SOCKET_DIR", + socket_dir.to_str().expect("socket dir should be utf-8"), + ); + guard.set("AGENT_BROWSER_SESSION", "e2e-stream-viewport"); + + let mut state = DaemonState::new(); + + // Enable stream on an ephemeral port + let resp = execute_command( + &json!({ "id": "1", "action": "stream_enable", "port": 0 }), + &mut state, + ) + .await; + assert_success(&resp); + let port = get_data(&resp)["port"] + .as_u64() + .expect("stream enable should report the bound port"); + + // Set a custom viewport before launching the browser + let resp = execute_command( + &json!({ "id": "2", "action": "viewport", "width": 800, "height": 600 }), + &mut state, + ) + .await; + assert_success(&resp); + + // Connect a WebSocket client + let (mut ws, _) = tokio_tungstenite::connect_async(format!("ws://127.0.0.1:{port}")) + .await + .expect("websocket client should connect to runtime stream"); + + // Navigate to trigger browser launch and screencast + let resp = execute_command( + &json!({ "id": "3", "action": "navigate", "url": "data:text/html,

Viewport Test

" }), + &mut state, + ) + .await; + assert_success(&resp); + + // Wait for a frame message and verify both metadata and actual image dimensions + let mut found_frame = false; + let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(15); + while tokio::time::Instant::now() < deadline { + let msg = tokio::time::timeout(tokio::time::Duration::from_secs(3), ws.next()).await; + let Some(Ok(message)) = msg.ok().flatten() else { + continue; + }; + if !message.is_text() { + continue; + } + let parsed: Value = + serde_json::from_str(message.to_text().expect("text message should be readable")) + .expect("stream payload should be valid JSON"); + if parsed.get("type") == Some(&json!("frame")) { + let meta = &parsed["metadata"]; + assert_eq!( + meta["deviceWidth"], 800, + "frame metadata deviceWidth should match custom viewport, got: {}", + meta + ); + assert_eq!( + meta["deviceHeight"], 600, + "frame metadata deviceHeight should match custom viewport, got: {}", + meta + ); + + // Verify the actual JPEG image dimensions match the custom viewport. + let data_str = parsed + .get("data") + .and_then(|v| v.as_str()) + .expect("frame message should include base64-encoded 'data' field"); + { + use base64::Engine; + let bytes = base64::engine::general_purpose::STANDARD + .decode(data_str) + .expect("frame data should be valid base64"); + let (img_w, img_h) = jpeg_dimensions(&bytes) + .expect("frame data should be a valid JPEG with SOF marker"); + assert_eq!( + img_w, 800, + "JPEG image width should match custom viewport, got: {}", + img_w + ); + assert_eq!( + img_h, 600, + "JPEG image height should match custom viewport, got: {}", + img_h + ); + } + + found_frame = true; + break; + } + } + assert!( + found_frame, + "should have received at least one frame message with correct viewport metadata" + ); + + // Cleanup + let resp = execute_command( + &json!({ "id": "4", "action": "stream_disable" }), + &mut state, + ) + .await; + assert_success(&resp); + + let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await; + assert_success(&resp); + let _ = std::fs::remove_dir_all(&socket_dir); +} + +/// Extract width and height from a JPEG's SOF0 (0xFFC0) or SOF2 (0xFFC2) marker. +fn jpeg_dimensions(data: &[u8]) -> Option<(u32, u32)> { + for i in 0..data.len().saturating_sub(8) { + if data[i] == 0xFF && (data[i + 1] == 0xC0 || data[i + 1] == 0xC2) { + let height = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32; + let width = u16::from_be_bytes([data[i + 7], data[i + 8]]) as u32; + return Some((width, height)); + } + } + None +} diff --git a/cli/src/native/stream.rs b/cli/src/native/stream.rs index 2d4f67d47..a41a174ae 100644 --- a/cli/src/native/stream.rs +++ b/cli/src/native/stream.rs @@ -118,11 +118,18 @@ impl StreamServer { *self.screencasting.lock().await } - /// Update the stored viewport dimensions used by status messages and screencast. - /// Also notifies the screencast event loop to restart with the new dimensions. + /// Update the stored viewport dimensions and restart the active screencast (if any) + /// so frames are captured at the new size. pub async fn set_viewport(&self, width: u32, height: u32) { - *self.viewport_width.lock().await = width; - *self.viewport_height.lock().await = height; + let mut vw = self.viewport_width.lock().await; + let mut vh = self.viewport_height.lock().await; + if *vw == width && *vh == height { + return; + } + *vw = width; + *vh = height; + drop(vw); + drop(vh); self.client_notify.notify_one(); } @@ -883,8 +890,8 @@ async fn cdp_event_loop( "metadata": { "offsetTop": meta.and_then(|m| m.get("offsetTop")).and_then(|v| v.as_f64()).unwrap_or(0.0), "pageScaleFactor": meta.and_then(|m| m.get("pageScaleFactor")).and_then(|v| v.as_f64()).unwrap_or(1.0), - "deviceWidth": meta.and_then(|m| m.get("deviceWidth")).and_then(|v| v.as_u64()).unwrap_or(1280), - "deviceHeight": meta.and_then(|m| m.get("deviceHeight")).and_then(|v| v.as_u64()).unwrap_or(720), + "deviceWidth": vw, + "deviceHeight": vh, "scrollOffsetX": meta.and_then(|m| m.get("scrollOffsetX")).and_then(|v| v.as_f64()).unwrap_or(0.0), "scrollOffsetY": meta.and_then(|m| m.get("scrollOffsetY")).and_then(|v| v.as_f64()).unwrap_or(0.0), "timestamp": meta.and_then(|m| m.get("timestamp")).and_then(|v| v.as_u64()).unwrap_or(0),