Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,116 @@
}
}
},
"/tabs/{tabId}/mouse": {
"post": {
"tags": [
"Interaction"
],
"summary": "Send a mouse action at viewport coordinates",
"description": "Sends a low-level mouse action using viewport CSS-pixel coordinates. This is useful for human handoff UIs and visual automation flows where an external operator selects coordinates directly instead of an element ref or CSS selector.\n",
"parameters": [
{
"name": "tabId",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": [
"userId",
"x",
"y"
],
"properties": {
"userId": {
"type": "string"
},
"action": {
"type": "string",
"enum": [
"click",
"move",
"down",
"up"
],
"default": "click"
},
"x": {
"type": "number",
"description": "X coordinate in viewport CSS pixels."
},
"y": {
"type": "number",
"description": "Y coordinate in viewport CSS pixels."
},
"button": {
"type": "string",
"enum": [
"left",
"right",
"middle"
],
"default": "left"
},
"clickCount": {
"type": "integer",
"minimum": 1,
"default": 1
}
}
}
}
}
},
"responses": {
"200": {
"description": "Mouse action result.",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"ok": {
"type": "boolean"
},
"action": {
"type": "string"
},
"x": {
"type": "number"
},
"y": {
"type": "number"
}
}
}
}
}
},
"400": {
"description": "Invalid mouse request."
},
"404": {
"description": "Tab not found.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/tabs/{tabId}/viewport": {
"post": {
"tags": [
Expand Down
128 changes: 128 additions & 0 deletions server.js
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,134 @@ app.post('/tabs/:tabId/scroll', async (req, res) => {
}
});

// Mouse coordinates (for human handoff UIs; coordinates are viewport CSS pixels)
/**
* @openapi
* /tabs/{tabId}/mouse:
* post:
* tags: [Interaction]
* summary: Send a mouse action at viewport coordinates
* description: >
* Sends a low-level mouse action using viewport CSS-pixel coordinates.
* This is useful for human handoff UIs and visual automation flows where
* an external operator selects coordinates directly instead of an element
* ref or CSS selector.
* parameters:
* - name: tabId
* in: path
* required: true
* schema:
* type: string
* requestBody:
* required: true
* content:
* application/json:
* schema:
* type: object
* required: [userId, x, y]
* properties:
* userId:
* type: string
* action:
* type: string
* enum: [click, move, down, up]
* default: click
* x:
* type: number
* description: X coordinate in viewport CSS pixels.
* y:
* type: number
* description: Y coordinate in viewport CSS pixels.
* button:
* type: string
* enum: [left, right, middle]
* default: left
* clickCount:
* type: integer
* minimum: 1
* default: 1
* responses:
* 200:
* description: Mouse action result.
* content:
* application/json:
* schema:
* type: object
* properties:
* ok:
* type: boolean
* action:
* type: string
* x:
* type: number
* y:
* type: number
* 400:
* description: Invalid mouse request.
* 404:
* description: Tab not found.
* content:
* application/json:
* schema:
* $ref: '#/components/schemas/Error'
*/
app.post('/tabs/:tabId/mouse', async (req, res) => {
try {
const { userId, action = 'click', x, y, button = 'left', clickCount = 1 } = req.body;
if (!userId) return res.status(400).json({ error: 'userId required' });

const nx = Number(x);
const ny = Number(y);
if (!Number.isFinite(nx) || !Number.isFinite(ny)) {
return res.status(400).json({ error: 'numeric x and y required' });
}

const allowedActions = new Set(['click', 'move', 'down', 'up']);
if (!allowedActions.has(action)) {
return res.status(400).json({ error: "action must be 'click', 'move', 'down', or 'up'" });
}

const allowedButtons = new Set(['left', 'right', 'middle']);
if (!allowedButtons.has(button)) {
return res.status(400).json({ error: "button must be 'left', 'right', or 'middle'" });
}

const nClickCount = Math.floor(Number(clickCount));
if (!Number.isFinite(nClickCount) || nClickCount < 1) {
return res.status(400).json({ error: 'clickCount must be a positive integer' });
}

const session = sessions.get(normalizeUserId(userId));
const found = session && findTab(session, req.params.tabId);
if (!found) return tabNotFoundResponse(res, req.params.tabId || req.body?.tabId);

const { tabState } = found;
tabState.toolCalls++; tabState.consecutiveTimeouts = 0; tabState.consecutiveFailures = 0;

await withUserLimit(userId, () => withTabLock(req.params.tabId, async () => {
if (action === 'move') {
await tabState.page.mouse.move(nx, ny);
} else if (action === 'down') {
await tabState.page.mouse.move(nx, ny);
await tabState.page.mouse.down({ button });
} else if (action === 'up') {
await tabState.page.mouse.move(nx, ny);
await tabState.page.mouse.up({ button });
} else {
await tabState.page.mouse.click(nx, ny, { button, clickCount: nClickCount });
}
await tabState.page.waitForTimeout(150);
}));

pluginEvents.emit('tab:mouse', { userId, tabId: req.params.tabId, action, x: nx, y: ny });
res.json({ ok: true, action, x: nx, y: ny });
} catch (err) {
failuresTotal.labels(classifyError(err), 'mouse').inc();
log('error', 'mouse failed', { reqId: req.reqId, error: err.message });
handleRouteError(err, req, res);
}
});

// Viewport
/**
* @openapi
Expand Down
65 changes: 65 additions & 0 deletions tests/e2e/mouse.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { createClient } from '../helpers/client.js';
import { getSharedEnv } from './sharedEnv.js';

describe('Mouse endpoint', () => {
let serverUrl;
let testSiteUrl;

beforeAll(() => {
const env = getSharedEnv();
serverUrl = env.serverUrl;
testSiteUrl = env.testSiteUrl;
});

test('clicks viewport coordinates', async () => {
const client = createClient(serverUrl);

try {
const { tabId } = await client.createTab(`${testSiteUrl}/mouse`);

const result = await client.mouse(tabId, { x: 160, y: 120 });
expect(result).toMatchObject({ ok: true, action: 'click', x: 160, y: 120 });

const snapshot = await client.waitForSnapshotContains(tabId, 'Mouse clicked!');
expect(snapshot.snapshot).toContain('Mouse clicked!');
} finally {
await client.cleanup();
}
});

test('sends move/down/up actions at viewport coordinates', async () => {
const client = createClient(serverUrl);

try {
const { tabId } = await client.createTab(`${testSiteUrl}/mouse`);

await expect(client.mouse(tabId, { action: 'move', x: 160, y: 120 }))
.resolves.toMatchObject({ ok: true, action: 'move', x: 160, y: 120 });
let snapshot = await client.waitForSnapshotContains(tabId, 'Mouse moved!');
expect(snapshot.snapshot).toContain('Mouse moved!');

await expect(client.mouse(tabId, { action: 'down', x: 160, y: 120 }))
.resolves.toMatchObject({ ok: true, action: 'down', x: 160, y: 120 });
snapshot = await client.waitForSnapshotContains(tabId, 'Mouse down!');
expect(snapshot.snapshot).toContain('Mouse down!');

await expect(client.mouse(tabId, { action: 'up', x: 160, y: 120 }))
.resolves.toMatchObject({ ok: true, action: 'up', x: 160, y: 120 });
} finally {
await client.cleanup();
}
});

test('rejects invalid coordinates', async () => {
const client = createClient(serverUrl);

try {
const { tabId } = await client.createTab(`${testSiteUrl}/mouse`);

await expect(client.mouse(tabId, { x: 'not-a-number', y: 120 }))
.rejects.toThrow('numeric x and y required');
} finally {
await client.cleanup();
}
});
});
4 changes: 4 additions & 0 deletions tests/helpers/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ class BrowserClient {
async click(tabId, options) {
return this.request('POST', `/tabs/${tabId}/click`, { userId: this.userId, ...options });
}

async mouse(tabId, options) {
return this.request('POST', `/tabs/${tabId}/mouse`, { userId: this.userId, ...options });
}

async type(tabId, options) {
const { pressEnter, clear, ...typeOptions } = options;
Expand Down
25 changes: 25 additions & 0 deletions tests/helpers/testSite.js
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,31 @@ function createTestApp() {
</body></html>
`);
});

// Page with fixed coordinate target for mouse endpoint tests
app.get('/mouse', (req, res) => {
res.send(`
<!DOCTYPE html>
<html><head><title>Mouse Test</title>
<style>
body { margin: 0; padding: 0; }
#target { position: absolute; left: 100px; top: 80px; width: 120px; height: 80px; }
</style>
</head>
<body>
<button id="target">Mouse Target</button>
<div id="result"></div>
<script>
const result = document.getElementById('result');
const target = document.getElementById('target');
target.addEventListener('mousemove', () => { result.textContent = 'Mouse moved!'; });
target.addEventListener('mousedown', () => { result.textContent = 'Mouse down!'; });
target.addEventListener('mouseup', () => { result.textContent = 'Mouse up!'; });
target.addEventListener('click', () => { result.textContent = 'Mouse clicked!'; });
</script>
</body></html>
`);
});

// Echo endpoint for macro expansion testing - echoes the full request URL
app.get('/echo-url', (req, res) => {
Expand Down
Loading