diff --git a/docs/browser-mode.md b/docs/browser-mode.md index c45c15e21..e89684f6f 100644 --- a/docs/browser-mode.md +++ b/docs/browser-mode.md @@ -49,7 +49,7 @@ You can pass the same payload inline (`--browser-inline-cookies '` (or `oracle status `) shows a marker for the background Chrome process. 4. **Usage accounting** – we estimate input tokens with the same tokenizer used for API runs and estimate output tokens via `estimateTokenCount`. `oracle status` therefore shows comparable cost/timing info even though the call ran through the browser. @@ -228,7 +228,7 @@ This mode is ideal when you have a macOS VM (or spare Mac mini) logged into Chat ## Limitations / Follow-Up Plan -- **Attachment lifecycle** – in `auto` mode we prefer inlining files into the composer (fewer moving parts). When we do upload, each `--file` path is uploaded separately (or bundled) so ChatGPT can ingest filenames/content. The automation waits for uploads to finish (send button enabled, upload chips visible) before submitting. When inline paste is rejected by ChatGPT (too large), Oracle retries automatically with uploads. +- **Attachment lifecycle** – in `auto` mode we prefer inlining files into the composer (fewer moving parts). When we do upload, each `--file` path is uploaded separately (or bundled) so ChatGPT can ingest filenames/content. The automation treats upload completion and send readiness as separate gates: it first waits for stable attachment evidence, then after the prompt is in the composer it waits for the same composer’s send button to become clickable before clicking it. If attachment evidence never stabilizes, Oracle fails the run instead of degrading into a plain-text Enter submit. When inline paste is rejected by ChatGPT (too large), Oracle retries automatically with uploads. - **Model picker drift** – we rely on heuristics to pick GPT-5.4 / GPT-5.2 variants. If OpenAI changes the DOM we need to refresh the selectors quickly. Consider snapshot tests or a small “self check” command. - **Non-mac platforms** – window hiding uses AppleScript today; Linux/Windows just ignore the flag. We should detect platforms explicitly and document the behavior. - **Streaming UX** – browser runs cannot stream tokens, so we log a warning before launching Chrome. Investigate whether we can stream clipboard deltas via mutation observers for a closer UX. diff --git a/docs/manual-tests.md b/docs/manual-tests.md index 6cf5d6d21..7eed1fdeb 100644 --- a/docs/manual-tests.md +++ b/docs/manual-tests.md @@ -158,6 +158,18 @@ Run these four smoke tests whenever we touch browser automation: Prepare `/tmp/browser-report.txt` with faux metrics, then run `pnpm run oracle -- --engine browser --model gpt-5.2 --prompt "Use the attachment to report current CPU and memory figures" --file /tmp/browser-report.txt --verbose` Verify verbose logs show attachment upload and the final answer matches the file data. + Expected attachment-send logs: + - `Attachment queued` + - `All attachments uploaded` + - `Clicked send button` + - no `Submitted prompt via Enter key` after the attachment upload stage + +5. **Attachment send race guard** + Prepare a small text file, then run + `pnpm run oracle -- --engine browser --model gpt-5.2 --prompt "Reply exactly with OK." --file /tmp/browser-report.txt --verbose` + Validate one of these outcomes: + - success path: `All attachments uploaded` followed by `Clicked send button`, then the assistant answer + - fail-fast path: an explicit attachment/browser automation error before send, with no Enter fallback Record session IDs and outcomes in the PR description (pass/fail, notable delays). This ensures reviewers can audit real runs. diff --git a/src/browser/actions/attachments.ts b/src/browser/actions/attachments.ts index 89988e839..17876f01d 100644 --- a/src/browser/actions/attachments.ts +++ b/src/browser/actions/attachments.ts @@ -9,6 +9,12 @@ import { import { delay } from "../utils.js"; import { logDomFailure } from "../domDebug.js"; import { transferAttachmentViaDataTransfer } from "./attachmentDataTransfer.js"; +import { + evaluateComposerAttachmentEvidence, + hasAttachmentCompletionEvidence, + readComposerSendReadiness, + summarizeComposerSendReadiness, +} from "./composerSendReadiness.js"; export async function uploadAttachmentFile( deps: { @@ -1334,346 +1340,44 @@ export async function waitForAttachmentCompletion( logger?: BrowserLogger, ): Promise { const deadline = Date.now() + timeoutMs; - const expectedNormalized = expectedNames.map((name) => name.toLowerCase()); - let inputMatchSince: number | null = null; - let sawInputMatch = false; - let attachmentMatchSince: number | null = null; + let evidenceStableSince: number | null = null; let lastVerboseLog = 0; - const expression = `(() => { - const sendSelectors = ${JSON.stringify(SEND_BUTTON_SELECTORS)}; - const promptSelectors = ${JSON.stringify(INPUT_SELECTORS)}; - const findPromptNode = () => { - for (const selector of promptSelectors) { - const nodes = Array.from(document.querySelectorAll(selector)); - for (const node of nodes) { - if (!(node instanceof HTMLElement)) continue; - const rect = node.getBoundingClientRect(); - if (rect.width > 0 && rect.height > 0) return node; - } - } - for (const selector of promptSelectors) { - const node = document.querySelector(selector); - if (node) return node; - } - return null; - }; - const attachmentSelectors = [ - 'input[type="file"]', - '[data-testid*="attachment"]', - '[data-testid*="upload"]', - '[aria-label*="Remove"]', - '[aria-label*="remove"]', - ]; - const locateComposerRoot = () => { - const promptNode = findPromptNode(); - if (promptNode) { - const initial = - promptNode.closest('[data-testid*="composer"]') ?? - promptNode.closest('form') ?? - promptNode.parentElement ?? - document.body; - let current = initial; - let fallback = initial; - while (current && current !== document.body) { - const hasSend = sendSelectors.some((selector) => current.querySelector(selector)); - if (hasSend) { - fallback = current; - const hasAttachment = attachmentSelectors.some((selector) => current.querySelector(selector)); - if (hasAttachment) { - return current; - } - } - current = current.parentElement; - } - return fallback ?? initial; - } - return document.querySelector('form') ?? document.body; - }; - const composerRoot = locateComposerRoot(); - const composerScope = (() => { - if (!composerRoot) return document; - const parent = composerRoot.parentElement; - const parentHasSend = parent && sendSelectors.some((selector) => parent.querySelector(selector)); - return parentHasSend ? parent : composerRoot; - })(); - let button = null; - for (const selector of sendSelectors) { - button = document.querySelector(selector); - if (button) break; - } - const disabled = button - ? button.hasAttribute('disabled') || - button.getAttribute('aria-disabled') === 'true' || - button.getAttribute('data-disabled') === 'true' || - window.getComputedStyle(button).pointerEvents === 'none' - : null; - const uploadingSelectors = ${JSON.stringify(UPLOAD_STATUS_SELECTORS)}; - const uploading = uploadingSelectors.some((selector) => { - return Array.from(document.querySelectorAll(selector)).some((node) => { - const ariaBusy = node.getAttribute?.('aria-busy'); - const dataState = node.getAttribute?.('data-state'); - if (ariaBusy === 'true' || dataState === 'loading' || dataState === 'uploading' || dataState === 'pending') { - return true; - } - // Avoid false positives from user prompts ("upload:") or generic UI copy; only treat explicit progress strings as uploading. - const text = node.textContent?.toLowerCase?.() ?? ''; - return /\buploading\b/.test(text) || /\bprocessing\b/.test(text); - }); - }); - const attachmentChipSelectors = [ - '[data-testid*="chip"]', - '[data-testid*="attachment"]', - '[data-testid*="upload"]', - '[data-testid*="file"]', - '[aria-label*="Remove"]', - 'button[aria-label*="Remove"]', - ]; - const attachedNames = []; - for (const selector of attachmentChipSelectors) { - for (const node of Array.from(composerScope.querySelectorAll(selector))) { - if (!node) continue; - const text = node.textContent ?? ''; - const aria = node.getAttribute?.('aria-label') ?? ''; - const title = node.getAttribute?.('title') ?? ''; - const parentText = node.parentElement?.parentElement?.innerText ?? ''; - for (const value of [text, aria, title, parentText]) { - const normalized = value?.toLowerCase?.(); - if (normalized) attachedNames.push(normalized); - } - } - } - const cardTexts = Array.from(composerScope.querySelectorAll('[aria-label*="Remove"]')).map((btn) => - btn?.parentElement?.parentElement?.innerText?.toLowerCase?.() ?? '', - ); - attachedNames.push(...cardTexts.filter(Boolean)); - - const inputNames = []; - const inputScope = composerScope ? Array.from(composerScope.querySelectorAll('input[type="file"]')) : []; - const inputNodes = []; - const inputSeen = new Set(); - for (const el of [...inputScope, ...Array.from(document.querySelectorAll('input[type="file"]'))]) { - if (!inputSeen.has(el)) { - inputSeen.add(el); - inputNodes.push(el); - } - } - for (const input of inputNodes) { - if (!(input instanceof HTMLInputElement) || !input.files?.length) continue; - for (const file of Array.from(input.files)) { - if (file?.name) inputNames.push(file.name.toLowerCase()); - } - } - const countRegex = /(?:^|\\b)(\\d+)\\s+(?:files?|attachments?)\\b/; - const fileCountSelectors = [ - 'button', - '[role="button"]', - '[data-testid*="file"]', - '[data-testid*="upload"]', - '[data-testid*="attachment"]', - '[data-testid*="chip"]', - '[aria-label*="file"]', - '[title*="file"]', - '[aria-label*="attachment"]', - '[title*="attachment"]', - ].join(','); - const collectFileCount = (nodes) => { - let count = 0; - for (const node of nodes) { - if (!(node instanceof HTMLElement)) continue; - if (node.matches('textarea,input,[contenteditable="true"]')) continue; - const dataTestId = node.getAttribute?.('data-testid') ?? ''; - const aria = node.getAttribute?.('aria-label') ?? ''; - const title = node.getAttribute?.('title') ?? ''; - const tooltip = - node.getAttribute?.('data-tooltip') ?? node.getAttribute?.('data-tooltip-content') ?? ''; - const text = node.textContent ?? ''; - const parent = node.parentElement; - const parentText = parent?.textContent ?? ''; - const parentAria = parent?.getAttribute?.('aria-label') ?? ''; - const parentTitle = parent?.getAttribute?.('title') ?? ''; - const parentTooltip = - parent?.getAttribute?.('data-tooltip') ?? parent?.getAttribute?.('data-tooltip-content') ?? ''; - const parentTestId = parent?.getAttribute?.('data-testid') ?? ''; - const candidates = [ - text, - aria, - title, - tooltip, - dataTestId, - parentText, - parentAria, - parentTitle, - parentTooltip, - parentTestId, - ]; - let hasFileHint = false; - for (const raw of candidates) { - if (!raw) continue; - const lowered = String(raw).toLowerCase(); - if (lowered.includes('file') || lowered.includes('attachment')) { - hasFileHint = true; - break; - } - } - if (!hasFileHint) continue; - for (const raw of candidates) { - if (!raw) continue; - const match = String(raw).toLowerCase().match(countRegex); - if (match) { - const parsed = Number(match[1]); - if (Number.isFinite(parsed)) { - count = Math.max(count, parsed); - } - } - } - } - return count; - }; - const localFileCountNodes = composerScope - ? Array.from(composerScope.querySelectorAll(fileCountSelectors)) - : []; - let fileCount = collectFileCount(localFileCountNodes); - if (!fileCount) { - fileCount = collectFileCount(Array.from(document.querySelectorAll(fileCountSelectors))); - } - const filesAttached = attachedNames.length > 0 || fileCount > 0; - return { - state: button ? (disabled ? 'disabled' : 'ready') : 'missing', - uploading, - filesAttached, - attachedNames, - inputNames, - fileCount, - }; - })()`; while (Date.now() < deadline) { - const response = await Runtime.evaluate({ expression, returnByValue: true }); - const { result } = response; - const value = result?.value as - | { - state?: string; - uploading?: boolean; - filesAttached?: boolean; - attachedNames?: string[]; - inputNames?: string[]; - fileCount?: number; - } - | undefined; - if (!value && logger?.verbose) { - const exception = ( - response as { exceptionDetails?: { text?: string; exception?: { description?: string } } } - )?.exceptionDetails; - if (exception) { - const details = [exception.text, exception.exception?.description] - .filter((part) => Boolean(part)) - .join(" - "); - logger(`Attachment wait eval failed: ${details || "unknown error"}`); - } - } + const value = await readComposerSendReadiness(Runtime); if (value) { if (logger?.verbose) { const now = Date.now(); if (now - lastVerboseLog > 3000) { lastVerboseLog = now; logger( - `Attachment wait state: ${JSON.stringify({ - state: value.state, - uploading: value.uploading, - filesAttached: value.filesAttached, - attachedNames: (value.attachedNames ?? []).slice(0, 3), - inputNames: (value.inputNames ?? []).slice(0, 3), - fileCount: value.fileCount ?? 0, - })}`, + `Attachment wait state: ${JSON.stringify(summarizeComposerSendReadiness(value, expectedNames))}`, ); } } - const attachedNames = (value.attachedNames ?? []) - .map((name) => name.toLowerCase().replace(/\s+/g, " ").trim()) - .filter(Boolean); - const inputNames = (value.inputNames ?? []) - .map((name) => name.toLowerCase().replace(/\s+/g, " ").trim()) - .filter(Boolean); - const fileCount = typeof value.fileCount === "number" ? value.fileCount : 0; - const fileCountSatisfied = - expectedNormalized.length > 0 && fileCount >= expectedNormalized.length; - const matchesExpected = (expected: string): boolean => { - const baseName = expected.split("/").pop()?.split("\\").pop() ?? expected; - const normalizedExpected = baseName.toLowerCase().replace(/\s+/g, " ").trim(); - const expectedNoExt = normalizedExpected.replace(/\.[a-z0-9]{1,10}$/i, ""); - return attachedNames.some((raw) => { - if (raw.includes(normalizedExpected)) return true; - if (expectedNoExt.length >= 6 && raw.includes(expectedNoExt)) return true; - if (raw.includes("…") || raw.includes("...")) { - const marker = raw.includes("…") ? "…" : "..."; - const [prefixRaw, suffixRaw] = raw.split(marker); - const prefix = prefixRaw.trim(); - const suffix = suffixRaw.trim(); - const target = expectedNoExt.length >= 6 ? expectedNoExt : normalizedExpected; - const matchesPrefix = !prefix || target.includes(prefix); - const matchesSuffix = !suffix || target.includes(suffix); - return matchesPrefix && matchesSuffix; - } - return false; - }); - }; - const missing = expectedNormalized.filter((expected) => !matchesExpected(expected)); - if (missing.length === 0 || fileCountSatisfied) { - const stableThresholdMs = value.uploading ? 3000 : 1500; - if (attachmentMatchSince === null) { - attachmentMatchSince = Date.now(); - } - const stable = Date.now() - attachmentMatchSince > stableThresholdMs; - if (stable && value.state === "ready") { + const evidence = evaluateComposerAttachmentEvidence(value, expectedNames); + if (hasAttachmentCompletionEvidence(value, expectedNames)) { + if (expectedNames.length === 0 && !value.uploading) { return; } - // Don't treat disabled button as complete - wait for it to become 'ready'. - // The spinner detection is unreliable, so a disabled button likely means upload is in progress. - if (value.state === "missing" && (value.filesAttached || fileCountSatisfied)) { - return; + if (evidenceStableSince === null) { + evidenceStableSince = Date.now(); } - // If files are attached but button isn't ready yet, give it more time but don't fail immediately. - if (value.filesAttached || fileCountSatisfied) { - await delay(500); - continue; + const stableThresholdMs = value.uploading ? 3000 : 1500; + if (Date.now() - evidenceStableSince > stableThresholdMs) { + return; } } else { - attachmentMatchSince = null; - } - - // Fallback: if the file input has the expected names, allow progress once that condition is stable. - // Some ChatGPT surfaces only render the filename after sending the message. - const inputMissing = expectedNormalized.filter((expected) => { - const baseName = expected.split("/").pop()?.split("\\").pop() ?? expected; - const normalizedExpected = baseName.toLowerCase().replace(/\s+/g, " ").trim(); - const expectedNoExt = normalizedExpected.replace(/\.[a-z0-9]{1,10}$/i, ""); - return !inputNames.some( - (raw) => - raw.includes(normalizedExpected) || - (expectedNoExt.length >= 6 && raw.includes(expectedNoExt)), - ); - }); - // Don't include 'disabled' - a disabled button likely means upload is still in progress. - const inputStateOk = value.state === "ready" || value.state === "missing"; - const inputSeenNow = inputMissing.length === 0 || fileCountSatisfied; - const inputEvidenceOk = - Boolean(value.filesAttached) || Boolean(value.uploading) || fileCountSatisfied; - const stableThresholdMs = value.uploading ? 3000 : 1500; - if (inputSeenNow && inputStateOk && inputEvidenceOk) { - if (inputMatchSince === null) { - inputMatchSince = Date.now(); + evidenceStableSince = null; + if (logger?.verbose && expectedNames.length > 0) { + logger( + `Attachment evidence missing: ${JSON.stringify({ + attachedMatch: evidence.attachedMatch, + inputMatch: evidence.inputMatch, + fileCountSatisfied: evidence.fileCountSatisfied, + attachmentUiSatisfied: evidence.attachmentUiSatisfied, + })}`, + ); } - sawInputMatch = true; - } - if ( - inputMatchSince !== null && - inputStateOk && - inputEvidenceOk && - Date.now() - inputMatchSince > stableThresholdMs - ) { - return; - } - if (!inputSeenNow && !sawInputMatch) { - inputMatchSince = null; } } await delay(250); diff --git a/src/browser/actions/composerSendReadiness.ts b/src/browser/actions/composerSendReadiness.ts new file mode 100644 index 000000000..9bc4b4b3e --- /dev/null +++ b/src/browser/actions/composerSendReadiness.ts @@ -0,0 +1,408 @@ +import type { ChromeClient } from "../types.js"; +import { INPUT_SELECTORS, SEND_BUTTON_SELECTORS, UPLOAD_STATUS_SELECTORS } from "../constants.js"; +import { buildClickDispatcher } from "./domEvents.js"; + +export type ComposerSendButtonState = "ready" | "disabled" | "missing"; + +export interface ComposerSendReadinessState { + state: ComposerSendButtonState; + uploading: boolean; + filesAttached: boolean; + attachedNames: string[]; + inputNames: string[]; + fileCount: number; + attachmentUiCount: number; +} + +export interface ComposerAttachmentEvidence { + expectedNormalized: string[]; + attachedNames: string[]; + inputNames: string[]; + attachedMatch: boolean; + inputMatch: boolean; + fileCountSatisfied: boolean; + attachmentUiSatisfied: boolean; +} + +function normalizeToken(value: string): string { + return value.toLowerCase().replace(/\s+/g, " ").trim(); +} + +function normalizeExpectedName(value: string): string { + const baseName = value.split("/").pop()?.split("\\").pop() ?? value; + return normalizeToken(baseName); +} + +function matchesExpected(raw: string, expected: string): boolean { + if (raw.includes(expected)) { + return true; + } + const expectedNoExt = expected.replace(/\.[a-z0-9]{1,10}$/i, ""); + if (expectedNoExt.length >= 6 && raw.includes(expectedNoExt)) { + return true; + } + if (raw.includes("…") || raw.includes("...")) { + const marker = raw.includes("…") ? "…" : "..."; + const [prefixRaw, suffixRaw] = raw.split(marker); + const prefix = prefixRaw.trim(); + const suffix = suffixRaw.trim(); + const target = expectedNoExt.length >= 6 ? expectedNoExt : expected; + const matchesPrefix = !prefix || target.includes(prefix); + const matchesSuffix = !suffix || target.includes(suffix); + return matchesPrefix && matchesSuffix; + } + return false; +} + +function buildComposerScopeHelpersExpression(): string { + return ` + const sendSelectors = ${JSON.stringify(SEND_BUTTON_SELECTORS)}; + const promptSelectors = ${JSON.stringify(INPUT_SELECTORS)}; + const attachmentSelectors = [ + 'input[type="file"]', + '[data-testid*="attachment"]', + '[data-testid*="upload"]', + '[aria-label*="Remove"]', + '[aria-label*="remove"]', + ]; + const attachmentChipSelectors = [ + '[data-testid*="chip"]', + '[data-testid*="attachment"]', + '[data-testid*="upload"]', + '[data-testid*="file"]', + '[aria-label*="Remove"]', + 'button[aria-label*="Remove"]', + ]; + const fileCountSelectors = [ + 'button', + '[role="button"]', + '[data-testid*="file"]', + '[data-testid*="upload"]', + '[data-testid*="attachment"]', + '[data-testid*="chip"]', + '[aria-label*="file"]', + '[title*="file"]', + '[aria-label*="attachment"]', + '[title*="attachment"]', + ].join(','); + const countRegex = /(?:^|\\b)(\\d+)\\s+(?:files?|attachments?)\\b/; + const isVisible = (node) => { + if (!(node instanceof HTMLElement)) return false; + const rect = node.getBoundingClientRect(); + if (rect.width <= 0 || rect.height <= 0) return false; + const style = window.getComputedStyle(node); + return style.display !== 'none' && style.visibility !== 'hidden'; + }; + const findPromptNode = () => { + for (const selector of promptSelectors) { + const nodes = Array.from(document.querySelectorAll(selector)); + for (const node of nodes) { + if (isVisible(node)) return node; + } + } + for (const selector of promptSelectors) { + const node = document.querySelector(selector); + if (node) return node; + } + return null; + }; + const locateComposerRoot = () => { + const promptNode = findPromptNode(); + if (promptNode) { + const initial = + promptNode.closest('[data-testid*="composer"]') ?? + promptNode.closest('form') ?? + promptNode.parentElement ?? + document.body; + let current = initial; + let fallback = initial; + while (current && current !== document.body) { + const hasSend = sendSelectors.some((selector) => current.querySelector(selector)); + if (hasSend) { + fallback = current; + const hasAttachment = attachmentSelectors.some((selector) => current.querySelector(selector)); + if (hasAttachment) { + return current; + } + } + current = current.parentElement; + } + return fallback ?? initial; + } + return document.querySelector('form') ?? document.body; + }; + const composerRoot = locateComposerRoot(); + const composerScope = (() => { + if (!composerRoot) return document.body; + const parent = composerRoot.parentElement; + const parentHasSend = parent && sendSelectors.some((selector) => parent.querySelector(selector)); + return parentHasSend ? parent : composerRoot; + })(); + const findSendButton = () => { + const seen = new Set(); + const candidates = []; + const scopes = [composerScope, composerRoot, document.body]; + for (const scope of scopes) { + if (!scope || typeof scope.querySelectorAll !== 'function') continue; + for (const selector of sendSelectors) { + for (const node of Array.from(scope.querySelectorAll(selector))) { + if (!(node instanceof HTMLElement) || seen.has(node)) continue; + seen.add(node); + candidates.push(node); + } + } + if (candidates.length > 0) break; + } + return candidates.find((node) => isVisible(node)) ?? candidates[0] ?? null; + }; + const collectAttachmentNodes = () => { + const nodes = []; + const seen = new Set(); + for (const selector of attachmentChipSelectors) { + for (const node of Array.from(composerScope.querySelectorAll(selector))) { + if (!node || seen.has(node)) continue; + seen.add(node); + nodes.push(node); + } + } + return nodes; + }; + const collectFileCount = (nodes) => { + let count = 0; + for (const node of nodes) { + if (!(node instanceof HTMLElement)) continue; + if (node.matches('textarea,input,[contenteditable="true"]')) continue; + const dataTestId = node.getAttribute?.('data-testid') ?? ''; + const aria = node.getAttribute?.('aria-label') ?? ''; + const title = node.getAttribute?.('title') ?? ''; + const tooltip = + node.getAttribute?.('data-tooltip') ?? node.getAttribute?.('data-tooltip-content') ?? ''; + const text = node.textContent ?? ''; + const parent = node.parentElement; + const parentText = parent?.textContent ?? ''; + const parentAria = parent?.getAttribute?.('aria-label') ?? ''; + const parentTitle = parent?.getAttribute?.('title') ?? ''; + const parentTooltip = + parent?.getAttribute?.('data-tooltip') ?? parent?.getAttribute?.('data-tooltip-content') ?? ''; + const parentTestId = parent?.getAttribute?.('data-testid') ?? ''; + const candidates = [ + text, + aria, + title, + tooltip, + dataTestId, + parentText, + parentAria, + parentTitle, + parentTooltip, + parentTestId, + ]; + let hasFileHint = false; + for (const raw of candidates) { + if (!raw) continue; + const lowered = String(raw).toLowerCase(); + if (lowered.includes('file') || lowered.includes('attachment')) { + hasFileHint = true; + break; + } + } + if (!hasFileHint) continue; + for (const raw of candidates) { + if (!raw) continue; + const match = String(raw).toLowerCase().match(countRegex); + if (match) { + const parsed = Number(match[1]); + if (Number.isFinite(parsed)) { + count = Math.max(count, parsed); + } + } + } + } + return count; + }; + `; +} + +export function buildComposerSendReadinessExpression(): string { + return `(() => { + ${buildComposerScopeHelpersExpression()} + const button = findSendButton(); + const style = button ? window.getComputedStyle(button) : null; + const disabled = button + ? button.hasAttribute('disabled') || + button.getAttribute('aria-disabled') === 'true' || + button.getAttribute('data-disabled') === 'true' || + style.pointerEvents === 'none' || + style.display === 'none' || + style.visibility === 'hidden' + : null; + const uploadingSelectors = ${JSON.stringify(UPLOAD_STATUS_SELECTORS)}; + const uploading = uploadingSelectors.some((selector) => { + return Array.from(composerScope.querySelectorAll(selector)).some((node) => { + const ariaBusy = node.getAttribute?.('aria-busy'); + const dataState = node.getAttribute?.('data-state'); + if ( + ariaBusy === 'true' || + dataState === 'loading' || + dataState === 'uploading' || + dataState === 'pending' + ) { + return true; + } + const text = node.textContent?.toLowerCase?.() ?? ''; + return /\\buploading\\b/.test(text) || /\\bprocessing\\b/.test(text); + }); + }); + const attachmentNodes = collectAttachmentNodes(); + const attachedNames = []; + for (const node of attachmentNodes) { + const text = node.textContent ?? ''; + const aria = node.getAttribute?.('aria-label') ?? ''; + const title = node.getAttribute?.('title') ?? ''; + const parentText = node.parentElement?.parentElement?.innerText ?? ''; + for (const value of [text, aria, title, parentText]) { + const normalized = value?.toLowerCase?.(); + if (normalized) attachedNames.push(normalized); + } + } + const cardTexts = Array.from(composerScope.querySelectorAll('[aria-label*="Remove"]')).map((btn) => + btn?.parentElement?.parentElement?.innerText?.toLowerCase?.() ?? '', + ); + attachedNames.push(...cardTexts.filter(Boolean)); + const inputNames = []; + const inputScope = Array.from(composerScope.querySelectorAll('input[type="file"]')); + const inputNodes = []; + const inputSeen = new Set(); + for (const el of [...inputScope, ...Array.from(document.querySelectorAll('input[type="file"]'))]) { + if (!inputSeen.has(el)) { + inputSeen.add(el); + inputNodes.push(el); + } + } + for (const input of inputNodes) { + if (!(input instanceof HTMLInputElement) || !input.files?.length) continue; + for (const file of Array.from(input.files)) { + if (file?.name) inputNames.push(file.name.toLowerCase()); + } + } + const localFileCountNodes = Array.from(composerScope.querySelectorAll(fileCountSelectors)); + let fileCount = collectFileCount(localFileCountNodes); + if (!fileCount) { + fileCount = collectFileCount(Array.from(document.querySelectorAll(fileCountSelectors))); + } + const attachmentUiCount = attachmentNodes.length; + const filesAttached = attachedNames.length > 0 || fileCount > 0 || attachmentUiCount > 0; + return { + state: button ? (disabled ? 'disabled' : 'ready') : 'missing', + uploading, + filesAttached, + attachedNames, + inputNames, + fileCount, + attachmentUiCount, + }; + })()`; +} + +export function buildComposerSendClickExpression(): string { + return `(() => { + ${buildClickDispatcher()} + ${buildComposerScopeHelpersExpression()} + const button = findSendButton(); + if (!button) return 'missing'; + const style = window.getComputedStyle(button); + const disabled = + button.hasAttribute('disabled') || + button.getAttribute('aria-disabled') === 'true' || + button.getAttribute('data-disabled') === 'true' || + style.pointerEvents === 'none' || + style.display === 'none' || + style.visibility === 'hidden'; + if (disabled) return 'disabled'; + dispatchClickSequence(button); + return 'clicked'; + })()`; +} + +export async function readComposerSendReadiness( + Runtime: ChromeClient["Runtime"], +): Promise { + const response = await Runtime.evaluate({ + expression: buildComposerSendReadinessExpression(), + returnByValue: true, + }); + return (response.result?.value as ComposerSendReadinessState | undefined) ?? null; +} + +export function evaluateComposerAttachmentEvidence( + state: ComposerSendReadinessState, + expectedNames: string[] = [], +): ComposerAttachmentEvidence { + const expectedNormalized = expectedNames.map(normalizeExpectedName).filter(Boolean); + const attachedNames = (state.attachedNames ?? []).map(normalizeToken).filter(Boolean); + const inputNames = (state.inputNames ?? []).map(normalizeToken).filter(Boolean); + if (expectedNormalized.length === 0) { + const attached = Boolean( + state.filesAttached || state.fileCount > 0 || state.attachmentUiCount > 0, + ); + const input = inputNames.length > 0; + return { + expectedNormalized, + attachedNames, + inputNames, + attachedMatch: attached, + inputMatch: input, + fileCountSatisfied: attached, + attachmentUiSatisfied: attached, + }; + } + return { + expectedNormalized, + attachedNames, + inputNames, + attachedMatch: expectedNormalized.every((expected) => + attachedNames.some((raw) => matchesExpected(raw, expected)), + ), + inputMatch: expectedNormalized.every((expected) => + inputNames.some((raw) => matchesExpected(raw, expected)), + ), + fileCountSatisfied: state.fileCount >= expectedNormalized.length, + attachmentUiSatisfied: state.attachmentUiCount >= expectedNormalized.length, + }; +} + +export function hasAttachmentCompletionEvidence( + state: ComposerSendReadinessState, + expectedNames: string[] = [], +): boolean { + const evidence = evaluateComposerAttachmentEvidence(state, expectedNames); + return ( + evidence.attachedMatch || + evidence.inputMatch || + evidence.fileCountSatisfied || + evidence.attachmentUiSatisfied + ); +} + +export function summarizeComposerSendReadiness( + state: ComposerSendReadinessState | null, + expectedNames: string[] = [], +): Record { + if (!state) { + return { state: "unavailable" }; + } + const evidence = evaluateComposerAttachmentEvidence(state, expectedNames); + return { + state: state.state, + uploading: state.uploading, + filesAttached: state.filesAttached, + fileCount: state.fileCount, + attachmentUiCount: state.attachmentUiCount, + attachedNames: evidence.attachedNames.slice(0, 3), + inputNames: evidence.inputNames.slice(0, 3), + attachedMatch: evidence.attachedMatch, + inputMatch: evidence.inputMatch, + fileCountSatisfied: evidence.fileCountSatisfied, + attachmentUiSatisfied: evidence.attachmentUiSatisfied, + }; +} diff --git a/src/browser/actions/promptComposer.ts b/src/browser/actions/promptComposer.ts index e5eccb3c8..d3648ca8b 100644 --- a/src/browser/actions/promptComposer.ts +++ b/src/browser/actions/promptComposer.ts @@ -3,7 +3,6 @@ import { INPUT_SELECTORS, PROMPT_PRIMARY_SELECTOR, PROMPT_FALLBACK_SELECTOR, - SEND_BUTTON_SELECTORS, CONVERSATION_TURN_SELECTOR, STOP_BUTTON_SELECTOR, ASSISTANT_ROLE_SELECTOR, @@ -12,6 +11,13 @@ import { delay } from "../utils.js"; import { logDomFailure } from "../domDebug.js"; import { buildClickDispatcher } from "./domEvents.js"; import { BrowserAutomationError } from "../../oracle/errors.js"; +import { + buildComposerSendClickExpression, + buildComposerSendReadinessExpression, + hasAttachmentCompletionEvidence, + readComposerSendReadiness, + summarizeComposerSendReadiness, +} from "./composerSendReadiness.js"; const ENTER_KEY_EVENT = { key: "Enter", @@ -201,7 +207,12 @@ export async function submitPrompt( ); } - const clicked = await attemptSendButton(runtime, logger, deps?.attachmentNames); + const clicked = await attemptSendButton( + runtime, + logger, + deps?.attachmentNames, + deps.inputTimeoutMs ?? undefined, + ); if (!clicked) { await input.dispatchKeyEvent({ type: "keyDown", @@ -306,97 +317,82 @@ async function waitForDomReady( logger?.(`Page did not reach ready/composer state within ${timeoutMs}ms; continuing cautiously.`); } -function buildAttachmentReadyExpression(attachmentNames: string[]): string { - const namesLiteral = JSON.stringify(attachmentNames.map((name) => name.toLowerCase())); - return `(() => { - const names = ${namesLiteral}; - const composer = - document.querySelector('[data-testid*="composer"]') || - document.querySelector('form') || - document.body || - document; - const match = (node, name) => (node?.textContent || '').toLowerCase().includes(name); - - // Restrict to attachment affordances; never scan generic div/span nodes (prompt text can contain the file name). - const attachmentSelectors = [ - '[data-testid*="chip"]', - '[data-testid*="attachment"]', - '[data-testid*="upload"]', - '[aria-label="Remove file"]', - 'button[aria-label="Remove file"]', - ]; - - const chipsReady = names.every((name) => - Array.from(composer.querySelectorAll(attachmentSelectors.join(','))).some((node) => match(node, name)), - ); - const inputsReady = names.every((name) => - Array.from(composer.querySelectorAll('input[type="file"]')).some((el) => - Array.from((el instanceof HTMLInputElement ? el.files : []) || []).some((file) => - file?.name?.toLowerCase?.().includes(name), - ), - ), - ); - - return chipsReady || inputsReady; - })()`; -} - -export function buildAttachmentReadyExpressionForTest(attachmentNames: string[]) { - return buildAttachmentReadyExpression(attachmentNames); +export function buildComposerSendReadinessExpressionForTest() { + return buildComposerSendReadinessExpression(); } async function attemptSendButton( Runtime: ChromeClient["Runtime"], - _logger?: BrowserLogger, + logger?: BrowserLogger, attachmentNames?: string[], + inputTimeoutMs?: number, ): Promise { - const script = `(() => { - ${buildClickDispatcher()} - const selectors = ${JSON.stringify(SEND_BUTTON_SELECTORS)}; - let button = null; - for (const selector of selectors) { - button = document.querySelector(selector); - if (button) break; - } - if (!button) return 'missing'; - const ariaDisabled = button.getAttribute('aria-disabled'); - const dataDisabled = button.getAttribute('data-disabled'); - const style = window.getComputedStyle(button); - const disabled = - button.hasAttribute('disabled') || - ariaDisabled === 'true' || - dataDisabled === 'true' || - style.pointerEvents === 'none' || - style.display === 'none'; - // Learned: some send buttons render but are inert; only click when truly enabled. - if (disabled) return 'disabled'; - // Use unified pointer/mouse sequence to satisfy React handlers. - dispatchClickSequence(button); - return 'clicked'; - })()`; - - const deadline = Date.now() + 8_000; + const needAttachment = Array.isArray(attachmentNames) && attachmentNames.length > 0; + const deadline = + Date.now() + + (needAttachment ? Math.max(15_000, Math.min(inputTimeoutMs ?? 30_000, 30_000)) : 8_000); + let readinessStableSince: number | null = null; + let lastReadinessLog = 0; while (Date.now() < deadline) { - const needAttachment = Array.isArray(attachmentNames) && attachmentNames.length > 0; if (needAttachment) { - const ready = await Runtime.evaluate({ - expression: buildAttachmentReadyExpression(attachmentNames), - returnByValue: true, - }); - if (!ready?.result?.value) { + const readiness = await readComposerSendReadiness(Runtime); + if (logger?.verbose) { + const now = Date.now(); + if (now - lastReadinessLog > 3000) { + lastReadinessLog = now; + logger( + `Attachment send readiness: ${JSON.stringify( + summarizeComposerSendReadiness(readiness, attachmentNames), + )}`, + ); + } + } + const canSend = + readiness && + readiness.state === "ready" && + hasAttachmentCompletionEvidence(readiness, attachmentNames); + if (!canSend) { + readinessStableSince = null; + await delay(150); + continue; + } + if (readinessStableSince === null) { + readinessStableSince = Date.now(); + } + const stableThresholdMs = readiness.uploading ? 3000 : 750; + if (Date.now() - readinessStableSince < stableThresholdMs) { await delay(150); continue; } } - const { result } = await Runtime.evaluate({ expression: script, returnByValue: true }); + const { result } = await Runtime.evaluate({ + expression: buildComposerSendClickExpression(), + returnByValue: true, + }); if (result.value === "clicked") { return true; } - if (result.value === "missing") { + if (!needAttachment && result.value === "missing") { break; } await delay(100); } + if (needAttachment) { + const readiness = await readComposerSendReadiness(Runtime).catch(() => null); + logger?.( + `Attachment send readiness timed out: ${JSON.stringify( + summarizeComposerSendReadiness(readiness, attachmentNames), + )}`, + ); + throw new BrowserAutomationError( + "Attachments never reached a send-ready state before timeout.", + { + stage: "submit-prompt", + code: "attachment-send-not-ready", + readiness: summarizeComposerSendReadiness(readiness, attachmentNames), + }, + ); + } return false; } @@ -567,5 +563,6 @@ async function verifyPromptCommitted( // biome-ignore lint/style/useNamingConvention: test-only export used in vitest suite export const __test__ = { + attemptSendButton, verifyPromptCommitted, }; diff --git a/src/browser/index.ts b/src/browser/index.ts index b89cd19e9..95ace4325 100644 --- a/src/browser/index.ts +++ b/src/browser/index.ts @@ -504,7 +504,6 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise path.basename(a.path)); - let attachmentWaitTimedOut = false; let inputOnlyAttachments = false; if (submissionAttachments.length > 0) { if (!DOM) { @@ -534,24 +533,11 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise = { runtime: Runtime, input: Input, @@ -559,7 +545,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise 0) { - if (attachmentWaitTimedOut) { - logger("Attachment confirmation timed out; skipping user-turn attachment verification."); - } else if (inputOnlyAttachments) { + if (inputOnlyAttachments) { logger( "Attachment UI did not render before send; skipping user-turn attachment verification.", ); diff --git a/tests/browser/attachmentsCompletion.test.ts b/tests/browser/attachmentsCompletion.test.ts index 6df12a72b..151c18aca 100644 --- a/tests/browser/attachmentsCompletion.test.ts +++ b/tests/browser/attachmentsCompletion.test.ts @@ -84,7 +84,7 @@ describe("attachment completion fallbacks", () => { useRealTime(); }); - test("waitForAttachmentCompletion times out when send button stays disabled (upload likely in progress)", async () => { + test("waitForAttachmentCompletion resolves when attachment UI count is stable even if send button stays disabled", async () => { useFakeTime(); const runtime = { @@ -94,8 +94,35 @@ describe("attachment completion fallbacks", () => { state: "disabled", uploading: false, filesAttached: true, - attachedNames: ["oracle-attach-verify.txt"], + attachedNames: ["remove file"], inputNames: [], + fileCount: 0, + attachmentUiCount: 1, + }, + }, + }), + } as unknown as ChromeClient["Runtime"]; + + const promise = waitForAttachmentCompletion(runtime, 10_000, ["oracle-attach-verify.txt"]); + await vi.advanceTimersByTimeAsync(2_000); + await expect(promise).resolves.toBeUndefined(); + useRealTime(); + }); + + test("waitForAttachmentCompletion times out when neither attachment UI nor file input matches", async () => { + useFakeTime(); + + const runtime = { + evaluate: vi.fn().mockResolvedValue({ + result: { + value: { + state: "disabled", + uploading: false, + filesAttached: true, + attachedNames: ["remove file"], + inputNames: [], + fileCount: 0, + attachmentUiCount: 0, }, }, }), diff --git a/tests/browser/promptComposer.test.ts b/tests/browser/promptComposer.test.ts index c6f31c3a8..94c6d14b5 100644 --- a/tests/browser/promptComposer.test.ts +++ b/tests/browser/promptComposer.test.ts @@ -72,4 +72,47 @@ describe("promptComposer", () => { promptComposer.verifyPromptCommitted(runtime as never, "hello", 150), ).resolves.toBe(1); }); + + test("attachment sends time out instead of falling back when composer never becomes send-ready", async () => { + vi.useFakeTimers(); + try { + const runtime = { + evaluate: vi.fn().mockImplementation(async ({ expression }: { expression: string }) => { + if (expression.includes("dispatchClickSequence")) { + return { result: { value: "clicked" } }; + } + return { + result: { + value: { + state: "disabled", + uploading: false, + filesAttached: true, + attachedNames: ["remove file"], + inputNames: [], + fileCount: 0, + attachmentUiCount: 1, + }, + }, + }; + }), + } as unknown as { + evaluate: (args: { expression: string; returnByValue?: boolean }) => Promise; + }; + + const promise = promptComposer.attemptSendButton( + runtime as never, + (() => undefined) as never, + ["oracle-attach-verify.txt"], + 250, + ); + const assertion = expect(promise).rejects.toThrow(/send-ready state/i); + await vi.advanceTimersByTimeAsync(20_000); + await assertion; + expect(runtime.evaluate).not.toHaveBeenCalledWith( + expect.objectContaining({ expression: expect.stringContaining("dispatchClickSequence") }), + ); + } finally { + vi.useRealTimers(); + } + }); }); diff --git a/tests/browser/promptComposerExpressions.test.ts b/tests/browser/promptComposerExpressions.test.ts index 72519c258..6af8d867f 100644 --- a/tests/browser/promptComposerExpressions.test.ts +++ b/tests/browser/promptComposerExpressions.test.ts @@ -1,12 +1,13 @@ import { describe, expect, test } from "vitest"; -import { buildAttachmentReadyExpressionForTest } from "../../src/browser/actions/promptComposer.ts"; +import { buildComposerSendReadinessExpressionForTest } from "../../src/browser/actions/promptComposer.ts"; describe("prompt composer attachment expressions", () => { - test("attachment ready check does not match prompt text", () => { - const expression = buildAttachmentReadyExpressionForTest(["oracle-attach-verify.txt"]); - expect(expression).toContain("document.querySelector('[data-testid*=\"composer\"]')"); - expect(expression).toContain("composer.querySelectorAll"); + test("composer readiness check scopes attachment evidence to the composer", () => { + const expression = buildComposerSendReadinessExpressionForTest(); + expect(expression).toContain("locateComposerRoot"); + expect(expression).toContain("composerScope.querySelectorAll"); expect(expression).toContain('input[type="file"]'); + expect(expression).toContain("attachmentUiCount"); expect(expression).not.toContain("a,div,span"); expect(expression).not.toContain( 'document.querySelectorAll(\'[data-testid*="chip"],[data-testid*="attachment"],a,div,span\')',