From b7beb6c85ea410a904fe06d69e044635ba9ef737 Mon Sep 17 00:00:00 2001 From: Keira Date: Sun, 17 May 2026 21:51:23 +0700 Subject: [PATCH] feat: add coordinate mouse endpoint --- openapi.json | 110 ++++++++++++++++++++++++++++++++ server.js | 128 ++++++++++++++++++++++++++++++++++++++ tests/e2e/mouse.test.js | 65 +++++++++++++++++++ tests/helpers/client.js | 4 ++ tests/helpers/testSite.js | 25 ++++++++ 5 files changed, 332 insertions(+) create mode 100644 tests/e2e/mouse.test.js diff --git a/openapi.json b/openapi.json index a40b78d..86cd014 100644 --- a/openapi.json +++ b/openapi.json @@ -1128,6 +1128,116 @@ } } }, + "/tabs/{tabId}/mouse": { + "post": { + "tags": [ + "Interaction" + ], + "summary": "Send a mouse action at viewport coordinates", + "description": "Sends a low-level mouse action using viewport CSS-pixel coordinates. This is useful for human handoff UIs and visual automation flows where an external operator selects coordinates directly instead of an element ref or CSS selector.\n", + "parameters": [ + { + "name": "tabId", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "userId", + "x", + "y" + ], + "properties": { + "userId": { + "type": "string" + }, + "action": { + "type": "string", + "enum": [ + "click", + "move", + "down", + "up" + ], + "default": "click" + }, + "x": { + "type": "number", + "description": "X coordinate in viewport CSS pixels." + }, + "y": { + "type": "number", + "description": "Y coordinate in viewport CSS pixels." + }, + "button": { + "type": "string", + "enum": [ + "left", + "right", + "middle" + ], + "default": "left" + }, + "clickCount": { + "type": "integer", + "minimum": 1, + "default": 1 + } + } + } + } + } + }, + "responses": { + "200": { + "description": "Mouse action result.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "ok": { + "type": "boolean" + }, + "action": { + "type": "string" + }, + "x": { + "type": "number" + }, + "y": { + "type": "number" + } + } + } + } + } + }, + "400": { + "description": "Invalid mouse request." + }, + "404": { + "description": "Tab not found.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } + }, "/tabs/{tabId}/viewport": { "post": { "tags": [ diff --git a/server.js b/server.js index 4083899..17531e8 100644 --- a/server.js +++ b/server.js @@ -3634,6 +3634,134 @@ app.post('/tabs/:tabId/scroll', async (req, res) => { } }); +// Mouse coordinates (for human handoff UIs; coordinates are viewport CSS pixels) +/** + * @openapi + * /tabs/{tabId}/mouse: + * post: + * tags: [Interaction] + * summary: Send a mouse action at viewport coordinates + * description: > + * Sends a low-level mouse action using viewport CSS-pixel coordinates. + * This is useful for human handoff UIs and visual automation flows where + * an external operator selects coordinates directly instead of an element + * ref or CSS selector. + * parameters: + * - name: tabId + * in: path + * required: true + * schema: + * type: string + * requestBody: + * required: true + * content: + * application/json: + * schema: + * type: object + * required: [userId, x, y] + * properties: + * userId: + * type: string + * action: + * type: string + * enum: [click, move, down, up] + * default: click + * x: + * type: number + * description: X coordinate in viewport CSS pixels. + * y: + * type: number + * description: Y coordinate in viewport CSS pixels. + * button: + * type: string + * enum: [left, right, middle] + * default: left + * clickCount: + * type: integer + * minimum: 1 + * default: 1 + * responses: + * 200: + * description: Mouse action result. + * content: + * application/json: + * schema: + * type: object + * properties: + * ok: + * type: boolean + * action: + * type: string + * x: + * type: number + * y: + * type: number + * 400: + * description: Invalid mouse request. + * 404: + * description: Tab not found. + * content: + * application/json: + * schema: + * $ref: '#/components/schemas/Error' + */ +app.post('/tabs/:tabId/mouse', async (req, res) => { + try { + const { userId, action = 'click', x, y, button = 'left', clickCount = 1 } = req.body; + if (!userId) return res.status(400).json({ error: 'userId required' }); + + const nx = Number(x); + const ny = Number(y); + if (!Number.isFinite(nx) || !Number.isFinite(ny)) { + return res.status(400).json({ error: 'numeric x and y required' }); + } + + const allowedActions = new Set(['click', 'move', 'down', 'up']); + if (!allowedActions.has(action)) { + return res.status(400).json({ error: "action must be 'click', 'move', 'down', or 'up'" }); + } + + const allowedButtons = new Set(['left', 'right', 'middle']); + if (!allowedButtons.has(button)) { + return res.status(400).json({ error: "button must be 'left', 'right', or 'middle'" }); + } + + const nClickCount = Math.floor(Number(clickCount)); + if (!Number.isFinite(nClickCount) || nClickCount < 1) { + return res.status(400).json({ error: 'clickCount must be a positive integer' }); + } + + const session = sessions.get(normalizeUserId(userId)); + const found = session && findTab(session, req.params.tabId); + if (!found) return tabNotFoundResponse(res, req.params.tabId || req.body?.tabId); + + const { tabState } = found; + tabState.toolCalls++; tabState.consecutiveTimeouts = 0; tabState.consecutiveFailures = 0; + + await withUserLimit(userId, () => withTabLock(req.params.tabId, async () => { + if (action === 'move') { + await tabState.page.mouse.move(nx, ny); + } else if (action === 'down') { + await tabState.page.mouse.move(nx, ny); + await tabState.page.mouse.down({ button }); + } else if (action === 'up') { + await tabState.page.mouse.move(nx, ny); + await tabState.page.mouse.up({ button }); + } else { + await tabState.page.mouse.click(nx, ny, { button, clickCount: nClickCount }); + } + await tabState.page.waitForTimeout(150); + })); + + pluginEvents.emit('tab:mouse', { userId, tabId: req.params.tabId, action, x: nx, y: ny }); + res.json({ ok: true, action, x: nx, y: ny }); + } catch (err) { + failuresTotal.labels(classifyError(err), 'mouse').inc(); + log('error', 'mouse failed', { reqId: req.reqId, error: err.message }); + handleRouteError(err, req, res); + } +}); + // Viewport /** * @openapi diff --git a/tests/e2e/mouse.test.js b/tests/e2e/mouse.test.js new file mode 100644 index 0000000..029643c --- /dev/null +++ b/tests/e2e/mouse.test.js @@ -0,0 +1,65 @@ +import { createClient } from '../helpers/client.js'; +import { getSharedEnv } from './sharedEnv.js'; + +describe('Mouse endpoint', () => { + let serverUrl; + let testSiteUrl; + + beforeAll(() => { + const env = getSharedEnv(); + serverUrl = env.serverUrl; + testSiteUrl = env.testSiteUrl; + }); + + test('clicks viewport coordinates', async () => { + const client = createClient(serverUrl); + + try { + const { tabId } = await client.createTab(`${testSiteUrl}/mouse`); + + const result = await client.mouse(tabId, { x: 160, y: 120 }); + expect(result).toMatchObject({ ok: true, action: 'click', x: 160, y: 120 }); + + const snapshot = await client.waitForSnapshotContains(tabId, 'Mouse clicked!'); + expect(snapshot.snapshot).toContain('Mouse clicked!'); + } finally { + await client.cleanup(); + } + }); + + test('sends move/down/up actions at viewport coordinates', async () => { + const client = createClient(serverUrl); + + try { + const { tabId } = await client.createTab(`${testSiteUrl}/mouse`); + + await expect(client.mouse(tabId, { action: 'move', x: 160, y: 120 })) + .resolves.toMatchObject({ ok: true, action: 'move', x: 160, y: 120 }); + let snapshot = await client.waitForSnapshotContains(tabId, 'Mouse moved!'); + expect(snapshot.snapshot).toContain('Mouse moved!'); + + await expect(client.mouse(tabId, { action: 'down', x: 160, y: 120 })) + .resolves.toMatchObject({ ok: true, action: 'down', x: 160, y: 120 }); + snapshot = await client.waitForSnapshotContains(tabId, 'Mouse down!'); + expect(snapshot.snapshot).toContain('Mouse down!'); + + await expect(client.mouse(tabId, { action: 'up', x: 160, y: 120 })) + .resolves.toMatchObject({ ok: true, action: 'up', x: 160, y: 120 }); + } finally { + await client.cleanup(); + } + }); + + test('rejects invalid coordinates', async () => { + const client = createClient(serverUrl); + + try { + const { tabId } = await client.createTab(`${testSiteUrl}/mouse`); + + await expect(client.mouse(tabId, { x: 'not-a-number', y: 120 })) + .rejects.toThrow('numeric x and y required'); + } finally { + await client.cleanup(); + } + }); +}); diff --git a/tests/helpers/client.js b/tests/helpers/client.js index 692341d..acf42b9 100644 --- a/tests/helpers/client.js +++ b/tests/helpers/client.js @@ -99,6 +99,10 @@ class BrowserClient { async click(tabId, options) { return this.request('POST', `/tabs/${tabId}/click`, { userId: this.userId, ...options }); } + + async mouse(tabId, options) { + return this.request('POST', `/tabs/${tabId}/mouse`, { userId: this.userId, ...options }); + } async type(tabId, options) { const { pressEnter, clear, ...typeOptions } = options; diff --git a/tests/helpers/testSite.js b/tests/helpers/testSite.js index 97ad14a..481d17a 100644 --- a/tests/helpers/testSite.js +++ b/tests/helpers/testSite.js @@ -174,6 +174,31 @@ function createTestApp() { `); }); + + // Page with fixed coordinate target for mouse endpoint tests + app.get('/mouse', (req, res) => { + res.send(` + + Mouse Test + + + + +
+ + + `); + }); // Echo endpoint for macro expansion testing - echoes the full request URL app.get('/echo-url', (req, res) => {