Skip to content
30 changes: 30 additions & 0 deletions cli/src/native/browser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,36 @@ impl BrowserManager {
Some(session_id),
)
.await?;

// Screencast captures the actual content area, not the emulated CSS
// viewport, so resize the content area to match.
if let Ok(target_id) = self.active_target_id() {
if let Ok(window_info) = self
.client
.send_command(
"Browser.getWindowForTarget",
Some(json!({ "targetId": target_id })),
None,
)
.await
{
if let Some(window_id) = window_info.get("windowId").and_then(|v| v.as_i64()) {
let _ = self
.client
.send_command(
"Browser.setContentsSize",
Some(json!({
"windowId": window_id,
"width": width,
"height": height,
})),
None,
)
.await;
}
}
}

Ok(())
}

Expand Down
142 changes: 142 additions & 0 deletions cli/src/native/e2e_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3463,3 +3463,145 @@ async fn e2e_headers_case_insensitive_no_duplicates() {
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
assert_success(&resp);
}

// ---------------------------------------------------------------------------
// Stream: custom viewport is reflected in screencast frame metadata
// ---------------------------------------------------------------------------

#[tokio::test]
#[ignore]
async fn e2e_stream_frame_metadata_respects_custom_viewport() {
let guard = EnvGuard::new(&["AGENT_BROWSER_SOCKET_DIR", "AGENT_BROWSER_SESSION"]);
let socket_dir = std::env::temp_dir().join(format!(
"agent-browser-e2e-stream-viewport-{}-{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("system clock should be after unix epoch")
.as_nanos()
));
std::fs::create_dir_all(&socket_dir).expect("socket dir should be created");
guard.set(
"AGENT_BROWSER_SOCKET_DIR",
socket_dir.to_str().expect("socket dir should be utf-8"),
);
guard.set("AGENT_BROWSER_SESSION", "e2e-stream-viewport");

let mut state = DaemonState::new();

// Enable stream on an ephemeral port
let resp = execute_command(
&json!({ "id": "1", "action": "stream_enable", "port": 0 }),
&mut state,
)
.await;
assert_success(&resp);
let port = get_data(&resp)["port"]
.as_u64()
.expect("stream enable should report the bound port");

// Set a custom viewport before launching the browser
let resp = execute_command(
&json!({ "id": "2", "action": "viewport", "width": 800, "height": 600 }),
&mut state,
)
.await;
assert_success(&resp);

// Connect a WebSocket client
let (mut ws, _) = tokio_tungstenite::connect_async(format!("ws://127.0.0.1:{port}"))
.await
.expect("websocket client should connect to runtime stream");

// Navigate to trigger browser launch and screencast
let resp = execute_command(
&json!({ "id": "3", "action": "navigate", "url": "data:text/html,<h1>Viewport Test</h1>" }),
&mut state,
)
.await;
assert_success(&resp);

// Wait for a frame message and verify both metadata and actual image dimensions
let mut found_frame = false;
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(15);
while tokio::time::Instant::now() < deadline {
let msg = tokio::time::timeout(tokio::time::Duration::from_secs(3), ws.next()).await;
let Some(Ok(message)) = msg.ok().flatten() else {
continue;
};
if !message.is_text() {
continue;
}
let parsed: Value =
serde_json::from_str(message.to_text().expect("text message should be readable"))
.expect("stream payload should be valid JSON");
if parsed.get("type") == Some(&json!("frame")) {
let meta = &parsed["metadata"];
assert_eq!(
meta["deviceWidth"], 800,
"frame metadata deviceWidth should match custom viewport, got: {}",
meta
);
assert_eq!(
meta["deviceHeight"], 600,
"frame metadata deviceHeight should match custom viewport, got: {}",
meta
);

// Verify the actual JPEG image dimensions match the custom viewport.
let data_str = parsed
.get("data")
.and_then(|v| v.as_str())
.expect("frame message should include base64-encoded 'data' field");
{
use base64::Engine;
let bytes = base64::engine::general_purpose::STANDARD
.decode(data_str)
.expect("frame data should be valid base64");
let (img_w, img_h) = jpeg_dimensions(&bytes)
.expect("frame data should be a valid JPEG with SOF marker");
assert_eq!(
img_w, 800,
"JPEG image width should match custom viewport, got: {}",
img_w
);
assert_eq!(
img_h, 600,
"JPEG image height should match custom viewport, got: {}",
img_h
);
}

found_frame = true;
break;
}
}
assert!(
found_frame,
"should have received at least one frame message with correct viewport metadata"
);

// Cleanup
let resp = execute_command(
&json!({ "id": "4", "action": "stream_disable" }),
&mut state,
)
.await;
assert_success(&resp);

let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
assert_success(&resp);
let _ = std::fs::remove_dir_all(&socket_dir);
}

/// Extract width and height from a JPEG's SOF0 (0xFFC0) or SOF2 (0xFFC2) marker.
fn jpeg_dimensions(data: &[u8]) -> Option<(u32, u32)> {
for i in 0..data.len().saturating_sub(8) {
if data[i] == 0xFF && (data[i + 1] == 0xC0 || data[i + 1] == 0xC2) {
let height = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32;
let width = u16::from_be_bytes([data[i + 7], data[i + 8]]) as u32;
return Some((width, height));
}
}
None
}
31 changes: 25 additions & 6 deletions cli/src/native/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,18 @@ impl StreamServer {
*self.screencasting.lock().await
}

/// Update the stored viewport dimensions used by status messages and screencast.
/// Also notifies the screencast event loop to restart with the new dimensions.
/// Update the stored viewport dimensions and restart the active screencast (if any)
/// so frames are captured at the new size.
pub async fn set_viewport(&self, width: u32, height: u32) {
*self.viewport_width.lock().await = width;
*self.viewport_height.lock().await = height;
let mut vw = self.viewport_width.lock().await;
let mut vh = self.viewport_height.lock().await;
if *vw == width && *vh == height {
return;
}
*vw = width;
*vh = height;
drop(vw);
drop(vh);
self.client_notify.notify_one();
}

Expand Down Expand Up @@ -869,8 +876,8 @@ async fn cdp_event_loop(
"metadata": {
"offsetTop": meta.and_then(|m| m.get("offsetTop")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"pageScaleFactor": meta.and_then(|m| m.get("pageScaleFactor")).and_then(|v| v.as_f64()).unwrap_or(1.0),
"deviceWidth": meta.and_then(|m| m.get("deviceWidth")).and_then(|v| v.as_u64()).unwrap_or(1280),
"deviceHeight": meta.and_then(|m| m.get("deviceHeight")).and_then(|v| v.as_u64()).unwrap_or(720),
"deviceWidth": vw,
"deviceHeight": vh,
"scrollOffsetX": meta.and_then(|m| m.get("scrollOffsetX")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"scrollOffsetY": meta.and_then(|m| m.get("scrollOffsetY")).and_then(|v| v.as_f64()).unwrap_or(0.0),
"timestamp": meta.and_then(|m| m.get("timestamp")).and_then(|v| v.as_u64()).unwrap_or(0),
Expand Down Expand Up @@ -971,6 +978,18 @@ async fn cdp_event_loop(
client_notify.notify_one();
break;
}
// Viewport changed — restart screencast with new dimensions
let new_vw = *viewport_width.lock().await;
let new_vh = *viewport_height.lock().await;
if new_vw != vw || new_vh != vh {
let _ = client_arc
.send_command_no_params("Page.stopScreencast", session_id.as_deref())
.await;
let mut sc = screencasting.lock().await;
*sc = false;
client_notify.notify_one();
break;
}
}
}
}
Expand Down
Loading