Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
64e7170
feat: upload method
udaysvc Aug 27, 2025
38d2a3b
refactor: streamline file upload process and enhance error handling
udaysvc Aug 28, 2025
448ff2f
quick tune
udaysvc Aug 28, 2025
318aef5
chore: add environment configuration for Stagehand
udaysvc Aug 28, 2025
0ff9eb5
greptile comment fixes
udaysvc Aug 28, 2025
5bcbcb1
add changeset for upload method feature
udaysvc Aug 28, 2025
ecd7798
clean
udaysvc Aug 28, 2025
ca9ab89
Merge branch 'browserbase:main' into uday/upload
udaysvc Aug 29, 2025
cb4ebfb
Merge remote-tracking branch 'origin/main' into uday/upload
udaysvc Sep 2, 2025
3d4f29e
Merge branch 'browserbase:main' into uday/upload
udaybuilds47 Sep 3, 2025
4fc2fac
Merge branch 'browserbase:main' into uday/upload
udaybuilds47 Sep 6, 2025
196fc0b
Merge branch 'browserbase:main' into uday/upload
udaysvc Sep 10, 2025
9daa584
add playwright arguments to agent (#1066)
tkattkat Sep 10, 2025
cf0438c
Merge branch 'browserbase:main' into uday/upload
udaysvc Sep 11, 2025
f6f05b0
[docs] add info on not needing project id in browserbase session para…
chrisreadsf Sep 11, 2025
bce40bc
Merge branch 'browserbase:main' into uday/upload
udaysvc Sep 12, 2025
c886544
Export aisdk (#1058)
chrisreadsf Sep 15, 2025
87505a3
docs: update fingerprint settings to reflect the new session create c…
Kylejeong2 Sep 15, 2025
5bb68b9
Resolve merge conflict in lib/index.ts: export both LLMProvider and a…
udaysvc Sep 15, 2025
3c39a05
[docs] export aisdk (#1074)
chrisreadsf Sep 16, 2025
bf2d0e7
Fix zod peer dependency support (#1032)
miguelg719 Sep 16, 2025
7f38b3a
add stagehand agent to api (#1077)
tkattkat Sep 16, 2025
3a0dc58
add playwright screenshot option for browserbase env (#1070)
derekmeegan Sep 17, 2025
b7be89e
add webbench, chrome-based OS world, and ground truth to web voyager …
filip-michalsky Sep 18, 2025
df76f7a
Fix python installation instructions (#1087)
rsbryan Sep 19, 2025
b9c8102
update xpath in `observe_vantechjournal` (#1088)
seanmcguire12 Sep 20, 2025
536f366
Fix session create logs on api (#1089)
miguelg719 Sep 21, 2025
8ff5c5a
Improve failed act logs (#1090)
miguelg719 Sep 21, 2025
569e444
[docs] add aisdk workaround before npm release + add versions to work…
chrisreadsf Sep 22, 2025
72a3a4d
Merge branch 'browserbase:main' into uday/upload
udaysvc Sep 22, 2025
8c0fd01
pass stagehand, instead of stagehandPage to agent (#1082)
tkattkat Sep 22, 2025
dc2d420
img diff algo for screenshots (#1072)
filip-michalsky Sep 23, 2025
f89b13e
Eval metadata (#1092)
miguelg719 Sep 23, 2025
86ee6c3
Merge branch 'browserbase:main' into uday/upload
udaysvc Sep 24, 2025
108de3c
update evals cli docs (#1096)
miguelg719 Sep 26, 2025
e0e6b30
adding support for new claude 4.5 sonnet agent model (#1099)
Kylejeong2 Sep 29, 2025
889cb6c
properly convert custom / mcp tools to anthropic cua format (#1103)
tkattkat Oct 1, 2025
a99aa48
Add current date and page url to agent context (#1102)
miguelg719 Oct 1, 2025
a1ad06c
Additional agent logging (#1104)
miguelg719 Oct 1, 2025
a5be4c9
Merge branch 'browserbase:main' into uday/upload
udaybuilds47 Oct 3, 2025
0791404
Include import statements in extract code examples (#1105)
victlue Oct 4, 2025
3ccf335
fix: missing URLs for `extract()` with array schema (#1107)
seanmcguire12 Oct 6, 2025
dda52f1
Support for new Gemini Computer Use Models (#1110)
miguelg719 Oct 7, 2025
9a29937
google cua docs (#1111)
jay-sahnan Oct 7, 2025
34da7d3
Version Packages (#1062)
github-actions[bot] Oct 7, 2025
ec5317c
Fix Python example in observe.mdx (#1113)
rsbryan Oct 7, 2025
c0fbc51
set default viewport when running on browserbase (#1114)
seanmcguire12 Oct 8, 2025
7da5b55
Version Packages (#1115)
github-actions[bot] Oct 8, 2025
00a8897
Merge branch 'browserbase:main' into uday/upload
udaysvc Oct 8, 2025
89f9237
Fix Quickstart Guide link in README (#1119)
renl Oct 10, 2025
a137da5
Merge branch 'browserbase:main' into uday/upload
udaysvc Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions examples/upload_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Import directly from local dist to ensure latest build is used
import { Stagehand } from "../dist";
import type { Page as PlaywrightPage } from "playwright";
import StagehandConfig from "../stagehand.config";

// Load environment variables
import dotenv from "dotenv";
dotenv.config({ path: "../.env" });

async function main() {
// Accept file URL as command line argument or use default
const fileUrl = process.argv[2] || "https://www.orimi.com/pdf-test.pdf";
const targetPage =
process.argv[3] || "https://ps.uci.edu/~franklin/doc/file_upload.html";

console.log(`File URL: ${fileUrl}`);
console.log(`Target page: ${targetPage}`);

const stagehand = new Stagehand({
...StagehandConfig,
env: "BROWSERBASE",
verbose: 1,
modelName: "openai/gpt-4o-mini",
});
await stagehand.init();
const page = stagehand.page;

try {
// Navigate to the target page
await page.goto(targetPage, {
waitUntil: "domcontentloaded",
});

// Debug: check presence of file inputs before calling upload
const count = await page.locator('input[type="file"]').count();
console.log("file input count:", count);

// Debug: log accessibility tree (full)
try {
const ax = await page.evaluate(() => {
if (typeof window.getComputedStyle !== 'undefined') {
return document.querySelector('body')?.innerHTML || 'No body content';
}
return 'Accessibility snapshot not available';
});
console.log("Page content:");
console.log(ax);
} catch (e) {
console.log("Failed to get page content:", e);
}

// Upload using the new helper - let observe find the right input
// Now we can pass the URL directly since upload() handles URLs
const result = await stagehand.upload("Upload this file", fileUrl);
console.log("upload result:", result);

// Try to submit the form using observe to find the submit button
try {
const [submitAction] = await page.observe(
"Find and click the submit or send button",
);
if (submitAction?.selector) {
console.log(
`Found submit button with selector: ${submitAction.selector}`,
);

// Avoid mixed-content warning by upgrading http action → https when possible
try {
await page.evaluate(() => {
const form = document.querySelector(
"form",
) as HTMLFormElement | null;
if (
form &&
typeof form.action === "string" &&
form.action.startsWith("http://")
) {
form.action = form.action.replace("http://", "https://");
}
});
} catch {
// ignore non-fatal submit upgrade errors
}

await page.act(submitAction);
console.log("Form submitted successfully");
} else {
console.log("No submit button found via observe");
}
} catch (e) {
console.log("Failed to find submit button via observe:", e);
}

await page.waitForTimeout(1500);
} finally {
await stagehand.close();
}
}

main().catch((err) => {
console.error(err);
process.exit(1);
});
222 changes: 221 additions & 1 deletion lib/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Browserbase } from "@browserbasehq/sdk";
import { Browser, chromium } from "playwright";
import { Browser, chromium, FileChooser } from "playwright";
import dotenv from "dotenv";
import fs from "fs";
import os from "os";
Expand All @@ -20,6 +20,8 @@ import {
ActOptions,
ExtractOptions,
ObserveOptions,
FileSpec,
UploadResult,
} from "../types/stagehand";
import { StagehandContext } from "./StagehandContext";
import { StagehandPage } from "./StagehandPage";
Expand All @@ -34,6 +36,10 @@ import { AgentExecuteOptions, AgentResult } from "../types/agent";
import { StagehandAgentHandler } from "./handlers/agentHandler";
import { StagehandOperatorHandler } from "./handlers/operatorHandler";
import { StagehandLogger } from "./logger";
import {
deepLocator,
deepLocatorWithShadow,
} from "./handlers/handlerUtils/actHandlerUtils";

import {
StagehandError,
Expand Down Expand Up @@ -892,6 +898,7 @@ export class Stagehand {
| ExtractOptions<z.AnyZodObject>
| ObserveOptions
| { url: string; options: GotoOptions }
| { hint: string; file: FileSpec }
| string,
result?: unknown,
): void {
Expand All @@ -903,6 +910,218 @@ export class Stagehand {
});
}

/**
* Upload a file to an upload control identified by a natural language hint.
* This method will attempt, in order:
* 1) Directly set files on a matching <input type="file"> (visible or hidden).
* 2) Trigger a file chooser by clicking the hinted control, then set files.
* 3) Heuristically locate an associated file input near/within the hinted element.
*/
public async upload(hint: string, file: FileSpec): Promise<UploadResult> {
const page = this.stagehandPage.page;

const toSetInputArg = async (f: FileSpec) => {
if (typeof f === "string") {
// If it's a URL, download and return a Buffer payload
if (/^https?:\/\//i.test(f)) {
const res = await fetch(f);
if (!res.ok) {
throw new StagehandError(
`Failed to download file: ${res.status} ${res.statusText}`,
);
}
const mimeType =
res.headers.get("content-type") || "application/octet-stream";
const urlPath = new URL(f).pathname;
const name = urlPath.split("/").pop() || "uploaded_file";
const arrayBuf = await res.arrayBuffer();
return {
name,
mimeType,
buffer: Buffer.from(arrayBuf),
} as { name: string; mimeType: string; buffer: Buffer };
}
// Otherwise treat as a local path if provided as string (kept for compatibility)
return f;
}
if (f?.path) return f.path;
if (f?.buffer && f?.name && f?.mimeType) {
return { name: f.name, mimeType: f.mimeType, buffer: f.buffer } as {
name: string;
mimeType: string;
buffer: Buffer;
};
}
throw new StagehandError(
"Invalid FileSpec. Provide an http(s) URL, a path, or { buffer, name, mimeType }",
);
};

const filesArg = await toSetInputArg(file);

const finish = async (result: UploadResult): Promise<UploadResult> => {
this.addToHistory("upload", { hint, file }, result);
return result;
};

// Use NL→selector to locate the upload control strictly
try {
const [candidate] = await page.observe(
"Find the file upload control or input for: " + String(hint),
);
if (candidate?.selector) {
const raw = candidate.selector.replace(/^xpath=/i, "").trim();
const locator = this.experimental
? await deepLocatorWithShadow(page, raw)
: deepLocator(page, raw);

// If this is a file input → set directly
const isFileInput = await locator
.evaluate(
(el): boolean => {
const tagName = el.tagName.toLowerCase();
const type = (el as HTMLInputElement).type;
console.log(`DEBUG: Element tagName=${tagName}, type=${type}`);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Debug console.log statements should be removed from production code

Suggested change
console.log(`DEBUG: Element tagName=${tagName}, type=${type}`);
const tagName = el.tagName.toLowerCase();
const type = (el as HTMLInputElement).type;
return tagName === "input" && type === "file";

return tagName === "input" && type === "file";
},
)
.catch((e) => {
console.log(`DEBUG: evaluate failed:`, e);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Debug console.log statements should be removed from production code

Suggested change
console.log(`DEBUG: evaluate failed:`, e);
.catch((e) => {
return false;
});

return false;
});

if (isFileInput) {
await locator.setInputFiles(filesArg);
await this.stagehandPage._waitForSettledDom();
return finish({
success: true,
method: "input",
hint,
selector: candidate.selector,
fileName:
typeof file === "string" && /^https?:/i.test(file)
? new URL(file).pathname.split("/").pop() || undefined
: typeof file === "string"
? file.split("/").pop()
: file?.name,
message: "File attached via direct input",
});
}

// Otherwise attempt to trigger a file chooser via the hinted control
try {
const chooserPromise = page.waitForEvent("filechooser", {
timeout: 8000,
});
await locator.click({ timeout: 3000 }).catch((): void => {});
const chooser = await chooserPromise.catch(
(): FileChooser | undefined => undefined,
);
if (chooser) {
await chooser.setFiles(filesArg);
await this.stagehandPage._waitForSettledDom();
return finish({
success: true,
method: "chooser",
hint,
selector: candidate.selector,
fileName:
typeof file === "string" && /^https?:/i.test(file)
? new URL(file).pathname.split("/").pop() || undefined
: typeof file === "string"
? file.split("/").pop()
: file?.name,
message: "File attached via file chooser",
});
}
} catch {
void 0;
}

// Heuristic: find an associated file input near/within the hinted control
try {
const elementHandle = await locator.elementHandle();
if (elementHandle) {
const inputHandle = await elementHandle.evaluateHandle(
(el: Element): HTMLInputElement | null => {
const doc: Document = el.ownerDocument || document;
const asInput = (n: Element | null): HTMLInputElement | null =>
n &&
n.tagName.toLowerCase() === "input" &&
(n as HTMLInputElement).type === "file"
? (n as HTMLInputElement)
: null;

// 1) Self
const self = asInput(el as Element);
if (self) return self;

// 2) Label association
const label = (el as HTMLElement).closest("label");
if (label?.htmlFor) {
const byId = doc.getElementById(label.htmlFor);
const a = asInput(byId);
if (a) return a;
}

// 3) Descendants
const desc = (el as Element).querySelector(
'input[type="file"]',
);
const aDesc = asInput(desc);
if (aDesc) return aDesc;

// 4) Up to 5 ancestors, then search within each ancestor subtree
let cur: Element | null = (el as Element).parentElement;
for (let i = 0; i < 5 && cur; i++) {
const within = cur.querySelector('input[type="file"]');
const a = asInput(within);
if (a) return a;
cur = cur.parentElement;
}
return null;
},
undefined as unknown,
);

const el = inputHandle.asElement?.();
if (el) {
const fileEl = el as unknown as {
setInputFiles: (files: unknown) => Promise<void>;
};
await fileEl.setInputFiles(filesArg);
await this.stagehandPage._waitForSettledDom();
return finish({
success: true,
method: "fallback",
hint,
selector: candidate.selector,
fileName:
typeof file === "string" && /^https?:/i.test(file)
? new URL(file).pathname.split("/").pop() || undefined
: typeof file === "string"
? file.split("/").pop()
: file?.name,
message: "File attached via heuristic input lookup",
});
}
}
} catch {
void 0;
}
}
} catch {
void 0;
}

return finish({
success: false,
method: "fallback",
hint,
message: "Could not locate a file input or trigger a chooser via observe",
});
}

/**
* Create an agent instance that can be executed with different instructions
* @returns An agent instance with execute() method
Expand Down Expand Up @@ -1002,5 +1221,6 @@ export * from "../types/stagehand";
export * from "../types/operator";
export * from "../types/agent";
export * from "./llm/LLMClient";
export * from "./llm/LLMProvider";
export * from "../types/stagehandErrors";
export * from "../types/stagehandApiErrors";
6 changes: 3 additions & 3 deletions lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
"version": "2.4.1",
"private": true,
"description": "Core Stagehand library sources",
"main": "../dist/index.js",
"module": "../dist/index.js",
"types": "../dist/index.d.ts",
"main": "./index.js",
"module": "./index.js",
"types": "./index.d.ts",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Path change from '../dist/' to './' may break builds if corresponding build scripts haven't been updated. Verify that 'pnpm run build-js' outputs to lib/ instead of dist/.

"scripts": {
"build-dom-scripts": "tsx dom/genDomScripts.ts",
"build-js": "tsup index.ts --dts",
Expand Down
Loading