diff --git a/.changeset/hot-crews-battle.md b/.changeset/hot-crews-battle.md new file mode 100644 index 000000000..322bbc11f --- /dev/null +++ b/.changeset/hot-crews-battle.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Support full Google GenAI client options (to e.g. use Vertex AI, or control the region) diff --git a/packages/core/lib/v3/agent/GoogleCUAClient.ts b/packages/core/lib/v3/agent/GoogleCUAClient.ts index 968a542a4..035a60985 100644 --- a/packages/core/lib/v3/agent/GoogleCUAClient.ts +++ b/packages/core/lib/v3/agent/GoogleCUAClient.ts @@ -31,7 +31,6 @@ import { ToolSet } from "ai"; * This implementation uses the Google Generative AI SDK for Computer Use */ export class GoogleCUAClient extends AgentClient { - private apiKey: string; private client: GoogleGenAI; private currentViewport = { width: 1288, height: 711 }; private currentUrl?: string; @@ -53,16 +52,15 @@ export class GoogleCUAClient extends AgentClient { this.tools = tools; // Process client options - this.apiKey = - (clientOptions?.apiKey as string) || + clientOptions = clientOptions || {}; + clientOptions.apiKey = + (clientOptions.apiKey as string) || process.env.GEMINI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY || - ""; + undefined; // Initialize the Google Generative AI client - this.client = new GoogleGenAI({ - apiKey: this.apiKey, - }); + this.client = new GoogleGenAI(clientOptions); // Get environment if specified if ( @@ -90,9 +88,7 @@ export class GoogleCUAClient extends AgentClient { }; // Store client options for reference - this.clientOptions = { - apiKey: this.apiKey, - }; + this.clientOptions = clientOptions; // Initialize tools if provided if (this.tools && Object.keys(this.tools).length > 0) { diff --git a/packages/core/lib/v3/llm/GoogleClient.ts b/packages/core/lib/v3/llm/GoogleClient.ts index efd3cf925..61f33ccbf 100644 --- a/packages/core/lib/v3/llm/GoogleClient.ts +++ b/packages/core/lib/v3/llm/GoogleClient.ts @@ -8,10 +8,11 @@ import { FunctionCall, Schema, Type, + GoogleGenAIOptions as ClientOptions, } from "@google/genai"; import { LogLine } from "../types/public/logs"; -import { AvailableModel, ClientOptions } from "../types/public/model"; +import { AvailableModel } from "../types/public/model"; import { validateZodSchema, toGeminiSchema, @@ -71,7 +72,7 @@ export class GoogleClient extends LLMClient { }: { logger: (message: LogLine) => void; // Added logger type modelName: AvailableModel; - clientOptions?: ClientOptions; // Expecting { apiKey: string } here + clientOptions?: ClientOptions; }) { super(modelName); if (!clientOptions?.apiKey) { @@ -79,7 +80,7 @@ export class GoogleClient extends LLMClient { clientOptions.apiKey = loadApiKeyFromEnv("google_legacy", logger); } this.clientOptions = clientOptions; - this.client = new GoogleGenAI({ apiKey: clientOptions.apiKey }); + this.client = new GoogleGenAI(clientOptions); this.modelName = modelName; this.logger = logger; // Determine vision capability based on model name (adjust as needed) diff --git a/packages/core/lib/v3/types/public/model.ts b/packages/core/lib/v3/types/public/model.ts index ea8aa57da..d7e3d6d20 100644 --- a/packages/core/lib/v3/types/public/model.ts +++ b/packages/core/lib/v3/types/public/model.ts @@ -1,5 +1,6 @@ import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk"; import type { LanguageModelV2 } from "@ai-sdk/provider"; +import { GoogleGenAIOptions as GoogleGenAIClientOptions } from "@google/genai"; import type { ClientOptions as OpenAIClientOptions } from "openai"; export type AnthropicJsonSchemaObject = { @@ -66,7 +67,16 @@ export type ModelProvider = | "google" | "aisdk"; -export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions; +export type ClientOptions = ( + | OpenAIClientOptions + | AnthropicClientOptions + | GoogleGenAIClientOptions +) & + // aisdk client language model options + { + apiKey?: string; + baseURL?: string; + }; export type ModelConfiguration = | AvailableModel