browserbase · tkattkat · Sep 10, 2025 · Sep 11, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/.changeset/chatty-pans-create.md b/.changeset/chatty-pans-create.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand-evals": patch
+---
+
+Paramaterize execution model in agent evals
diff --git a/.changeset/curly-boats-push.md b/.changeset/curly-boats-push.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand-evals": patch
+---
+
+improve evals screenshot service - add img hashing diff to add screenshots and change to screenshot intercepts from the agent
diff --git a/.changeset/dark-crabs-repair.md b/.changeset/dark-crabs-repair.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand-evals": minor
+---
+
+added web voyager ground truth (optional), added web bench, and subset of OSWorld evals which run on a browser
diff --git a/.changeset/few-frogs-smoke.md b/.changeset/few-frogs-smoke.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+Pass stagehand object to agent instead of stagehand page
diff --git a/.changeset/icy-toes-obey.md b/.changeset/icy-toes-obey.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+Add playwright arguments to agent execute response
diff --git a/.changeset/loud-waves-think.md b/.changeset/loud-waves-think.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+adds support for stagehand agent in the api
diff --git a/.changeset/many-rats-punch.md b/.changeset/many-rats-punch.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+Fix for zod peer dependency support
diff --git a/.changeset/purple-squids-know.md b/.changeset/purple-squids-know.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+Fixed info logs on api session create
diff --git a/.changeset/tasty-candles-retire.md b/.changeset/tasty-candles-retire.md
@@ -0,0 +1,5 @@
+---
+"@browserbasehq/stagehand": patch
+---
+
+Improve failed act error logs
diff --git a/.env.example b/.env.example
@@ -11,3 +11,4 @@ EXPERIMENTAL_EVAL_MODELS="gpt-4o,claude-3-5-sonnet-latest,o1-mini,o1-preview"
 EVAL_CATEGORIES="observe,act,combination,extract,experimental"
 AGENT_EVAL_MAX_STEPS=50
 STAGEHAND_API_URL="http://localhost:80"
+AGENT_EVAL_EXECUTION_MODEL="google/gemini-2.5-flash
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+CLAUDE.md
 node_modules/
 /test-results/
 /playwright-report/

diff --git a/docs/configuration/browser.mdx b/docs/configuration/browser.mdx
@@ -114,7 +114,7 @@ stagehand = Stagehand(
       apiKey: process.env.BROWSERBASE_API_KEY,
       projectId: process.env.BROWSERBASE_PROJECT_ID,
       browserbaseSessionCreateParams: {
-        projectId: process.env.BROWSERBASE_PROJECT_ID!,
+        projectId: process.env.BROWSERBASE_PROJECT_ID!, // Optional: automatically set if given in environment variable or by Stagehand parameter
         proxies: true,
         region: "us-west-2",
         timeout: 3600, // 1 hour session timeout
@@ -124,17 +124,11 @@ stagehand = Stagehand(
           blockAds: true,
           solveCaptchas: true,
           recordSession: false,
+          os: "windows", // Valid: "windows" | "mac" | "linux" | "mobile" | "tablet"
           viewport: {
             width: 1920,
             height: 1080,
           },
-          fingerprint: {
-            browsers: ["chrome", "edge"],
-            devices: ["desktop"],
-            operatingSystems: ["windows", "macos"],
-            locales: ["en-US", "en-GB"],
-            httpVersion: 2,
-          },
         },
         userMetadata: {
           userId: "automation-user-123",
@@ -149,7 +143,7 @@ stagehand = Stagehand(
         api_key=os.getenv("BROWSERBASE_API_KEY"),
         project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
         browserbase_session_create_params={
-            "project_id": os.getenv("BROWSERBASE_PROJECT_ID"),
+            "project_id": os.getenv("BROWSERBASE_PROJECT_ID"), # Optional: automatically set if given in environment or by Stagehand parameter
             "proxies": True,
             "region": "us-west-2",
             "timeout": 3600,  # 1 hour session timeout
@@ -159,17 +153,11 @@ stagehand = Stagehand(
                 "block_ads": True,
                 "solve_captchas": True,
                 "record_session": False,
+                "os": "windows",  # "windows" | "mac" | "linux" | "mobile" | "tablet"
                 "viewport": {
                     "width": 1920,
                     "height": 1080,
                 },
-                "fingerprint": {
-                    "browsers": ["chrome", "edge"],
-                    "devices": ["desktop"],
-                    "operating_systems": ["windows", "macos"],
-                    "locales": ["en-US", "en-GB"],
-                    "http_version": 2,
-                },
             },
             "user_metadata": {
                 "user_id": "automation-user-123",

diff --git a/docs/configuration/models.mdx b/docs/configuration/models.mdx
@@ -156,47 +156,70 @@ stagehand = Stagehand(
 ## Custom LLM Integration
 
 <Note>
-Custom LLMs are currently only supported in TypeScript.
+Only [LiteLLM compatible providers](https://docs.litellm.ai/docs/providers) are available in Python. Some may require extra setup.
 </Note>
 
 Integrate any LLM with Stagehand using custom clients. The only requirement is **structured output support** for consistent automation behavior.
 
 ### Vercel AI SDK
 The [Vercel AI SDK](https://sdk.vercel.ai/providers/ai-sdk-providers) is a popular library for interacting with LLMs. You can use any of the providers supported by the Vercel AI SDK to create a client for your model, **as long as they support structured outputs**.
 
-Vercel AI SDK supports providers for OpenAI, Anthropic, and Google, along with support for **Amazon Bedrock** and **Azure OpenAI**.
+Vercel AI SDK supports providers for OpenAI, Anthropic, and Google, along with support for **Amazon Bedrock** and **Azure OpenAI**. For a full list, see the [Vercel AI SDK providers page](https://sdk.vercel.ai/providers/ai-sdk-providers).
 
-To get started, you'll need to install the `ai` package and the provider you want to use. For example, to use Amazon Bedrock, you'll need to install the `@ai-sdk/amazon-bedrock` package.
+To get started, you'll need to install the `ai` package (version 4) and the provider you want to use (version 1 - both need to be compatible with LanguageModelV1). For example, to use Amazon Bedrock, you'll need to install the `@ai-sdk/amazon-bedrock` package.
 
-You'll also need to use the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/main/examples/external_clients/aisdk.ts) as a template to create a client for your model.
+You'll also need to import the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/main/lib/llm/aisdk.ts) through Stagehand to create a client for your model.
 
 <Tabs>
 	<Tab title="npm">
 	```bash
-	npm install ai @ai-sdk/amazon-bedrock
+	npm install ai@4 @ai-sdk/amazon-bedrock@1
 	```
 	</Tab>
 
 	<Tab title="pnpm">
 	```bash
-	pnpm install ai @ai-sdk/amazon-bedrock
+	pnpm install ai@4 @ai-sdk/amazon-bedrock@1
 	```
 	</Tab>
 
 	<Tab title="yarn">
 	```bash
-	yarn add ai @ai-sdk/amazon-bedrock
+	yarn add ai@4 @ai-sdk/amazon-bedrock@1
 	```
 	</Tab>
 </Tabs>
 
-To get started, you can use the [Vercel AI SDK external client](https://github.com/browserbase/stagehand/blob/84f810b4631291307a32a47addad7e26e9c1deb3/examples/external_clients/aisdk.ts) as a template to create a client for your model.
+<Note>
+The `AISdkClient` is not yet available via the Stagehand npm package. For now, install Stagehand as a git repository to access the `AISdkClient` (this will be included in the npm package in an upcoming release).
+</Note>
+
+<Tabs>
+	<Tab title="npm">
+	```bash
+	npm install @browserbasehq/stagehand@git+https://github.com/browserbase/stagehand.git
+	```
+	</Tab>
+
+	<Tab title="pnpm">
+	```bash
+	pnpm install @browserbasehq/stagehand@git+https://github.com/browserbase/stagehand.git
+	```
+	</Tab>
+
+	<Tab title="yarn">
+	```bash
+	yarn add @browserbasehq/stagehand@git+https://github.com/browserbase/stagehand.git
+	```
+	</Tab>
+</Tabs>
 
 ```ts
 // Install/import the provider you want to use.
-// For example, to use OpenAI, import `openai` from @ai-sdk/openai
+// For example, to use Azure OpenAI, import { createAzure } from '@ai-sdk/azure';
 import { bedrock } from "@ai-sdk/amazon-bedrock";
-import { AISdkClient } from "./external_clients/aisdk";
+// @ts-ignore
+import { AISdkClient } from "@browserbasehq/stagehand";
 
 const stagehand = new Stagehand({
   llmClient: new AISdkClient({

diff --git a/docs/first-steps/installation.mdx b/docs/first-steps/installation.mdx
@@ -95,6 +95,26 @@ main().catch((err) => {
 
 <Tab title="Python">
 
+<Tip>
+For uv installation instructions see the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/#__tabbed_1_1).
+</Tip>
+
+### Initialize virtual environment
+
+<CodeGroup>
+
+```bash uv
+uv init my-stagehand-project
+cd my-stagehand-project
+```
+
+```bash pip
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+</CodeGroup>
+
 ### Add dependencies
 
 <CodeGroup>
@@ -128,6 +148,10 @@ BROWSERBASE_PROJECT_ID=your_project_id
 import os
 import asyncio
 from stagehand import Stagehand
+from pydantic import BaseModel
+
+class PageData(BaseModel):
+    title: str
 
 async def main():
     stagehand = Stagehand(
@@ -143,16 +167,12 @@ async def main():
     await page.act("Click the sign in button")
 
     # Extract structured data
-    result = await page.extract({
-        "instruction": "extract the page title",
-        "schema": {
-            "title": {
-                "type": "string"
-            }
-        }
-    })
+    result = await page.extract(
+        instruction = "extract the page title",
+        schema = PageData
+    )
 
-    print(result["title"])
+    print(result.title)
     await stagehand.close()
 
 if __name__ == "__main__":

diff --git a/evals/cli.ts b/evals/cli.ts
@@ -381,7 +381,6 @@ function handleRun(args: string[]): void {
         webbench: "agent/webbench",
         gaia: "agent/gaia",
         webvoyager: "agent/webvoyager",
-        osworld: "agent/osworld",
         onlineMind2Web: "agent/onlineMind2Web",
       };