diff --git a/.gitignore b/.gitignore index e8cfb822..84179387 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ -**/.DS_Store -.DS_Store \ No newline at end of file +.DS_Store +.env +cursorrules.md +.cursorrules \ No newline at end of file diff --git a/python/business-lookup/README.md b/python/business-lookup/README.md new file mode 100644 index 00000000..baf51869 --- /dev/null +++ b/python/business-lookup/README.md @@ -0,0 +1,56 @@ +# Stagehand + Browserbase: Business Lookup with Agent + +## AT A GLANCE +- Goal: Automate business registry searches using an autonomous AI agent with computer-use capabilities. +- Uses Stagehand Agent in CUA mode to navigate complex UI elements, apply filters, and extract structured business data. +- Demonstrates extraction with Pydantic schema validation for consistent data retrieval. +- Docs → https://docs.stagehand.dev/basics/agent + +## GLOSSARY +- agent: create an autonomous AI agent that can execute complex multi-step tasks + Docs → https://docs.stagehand.dev/basics/agent#what-is-agent +- extract: extract structured data from web pages using natural language instructions + Docs → https://docs.stagehand.dev/basics/extract + +## QUICKSTART +1) python -m venv venv +2) source venv/bin/activate # On Windows: venv\Scripts\activate +3) uvx install stagehand python-dotenv pydantic +4) cp .env.example .env +5) Add required API keys/IDs to .env +6) python main.py + +## EXPECTED OUTPUT +- Initializes Stagehand session with Browserbase +- Displays live session link for monitoring +- Navigates to SF Business Registry search page +- Agent searches for business using DBA Name filter +- Agent completes search and opens business details +- Extracts structured business information (DBA Name, Account Number, NAICS Code, etc.) +- Outputs extracted data as JSON +- Closes session cleanly + +## COMMON PITFALLS +- "ModuleNotFoundError": ensure all dependencies are installed via pip +- Missing credentials: verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_API_KEY +- Google API access: ensure you have access to Google's gemini-2.5-computer-use-preview-10-2025 model +- Agent failures: check that the business name exists in the registry and that max_steps is sufficient for complex searches +- Import errors: activate your virtual environment if you created one +- Find more information on your Browserbase dashboard -> https://www.browserbase.com/sign-in + +## USE CASES +• Business verification: Automate registration status checks, license validation, and compliance verification for multiple businesses. +• Data enrichment: Collect structured business metadata (NAICS codes, addresses, ownership) for research or CRM updates. +• Due diligence: Streamline background checks by autonomously searching and extracting business registration details from public registries. + +## NEXT STEPS +• Parameterize search: Accept business names as command-line arguments or from a CSV file for batch processing. +• Expand extraction: Add support for additional fields like tax status, licenses, or historical registration changes. +• Multi-registry support: Extend agent to search across multiple city or state business registries with routing logic. + +## HELPFUL RESOURCES +📚 Stagehand Docs: https://docs.stagehand.dev/stagehand +🎮 Browserbase: https://www.browserbase.com +💡 Templates: https://www.browserbase.com/templates +📧 Need help? support@browserbase.com + diff --git a/python/business-lookup/main.py b/python/business-lookup/main.py new file mode 100644 index 00000000..be214207 --- /dev/null +++ b/python/business-lookup/main.py @@ -0,0 +1,126 @@ +# Stagehand + Browserbase: Business Lookup with Agent - See README.md for full documentation + +import os +import asyncio +import json +from dotenv import load_dotenv +from stagehand import Stagehand, StagehandConfig +from pydantic import BaseModel, Field +from typing import Optional + +# Load environment variables +load_dotenv() + +# Business search variables +business_name = "Jalebi Street" + +async def main(): + print("Starting business lookup...") + + # Initialize Stagehand with Browserbase for cloud-based browser automation. + # Note: set verbose: 0 to prevent API keys from appearing in logs when handling sensitive data. + config = StagehandConfig( + env="BROWSERBASE", + api_key=os.environ.get("BROWSERBASE_API_KEY"), + project_id=os.environ.get("BROWSERBASE_PROJECT_ID"), + model_name="openai/gpt-4.1", + model_api_key=os.environ.get("OPENAI_API_KEY"), + browserbase_session_create_params={ + "project_id": os.environ.get("BROWSERBASE_PROJECT_ID"), + }, + verbose=1 # 0 = errors only, 1 = info, 2 = debug + # (When handling sensitive data like passwords or API keys, set verbose: 0 to prevent secrets from appearing in logs.) + # https://docs.stagehand.dev/configuration/logging + ) + + try: + # Use async context manager for automatic resource management + async with Stagehand(config) as stagehand: + # Initialize browser session to start automation. + print("Stagehand initialized successfully") + session_id = None + if hasattr(stagehand, 'session_id'): + session_id = stagehand.session_id + elif hasattr(stagehand, 'browserbase_session_id'): + session_id = stagehand.browserbase_session_id + + if session_id: + print(f"Live View Link: https://browserbase.com/sessions/{session_id}") + + page = stagehand.page + + # Navigate to SF Business Registry search page. + print("Navigating to SF Business Registry...") + await page.goto( + "https://data.sfgov.org/stories/s/Registered-Business-Lookup/k6sk-2y6w/", + wait_until="domcontentloaded", + timeout=60000 + ) + + # Create agent with computer use capabilities for autonomous business search. + # Using CUA mode allows the agent to interact with complex UI elements like filters and tables. + print("Creating Computer Use Agent...") + agent = stagehand.agent( + provider="google", + model="gemini-2.5-computer-use-preview-10-2025", + instructions="You are a helpful assistant that can use a web browser to search for business information.", + options={ + "api_key": os.getenv("GOOGLE_API_KEY"), + }, + ) + + print(f"Searching for business: {business_name}") + result = await agent.execute( + instruction=f'Find and look up the business "{business_name}" in the SF Business Registry. Use the DBA Name filter to search for "{business_name}", apply the filter, and click on the business row to view detailed information. Scroll towards the right to see the NAICS code.', + max_steps=30, + auto_screenshot=True + ) + + if not result.success: + raise Exception("Agent failed to complete the search") + + print("Agent completed search successfully") + + # Extract comprehensive business information after agent completes the search. + # Using structured schema ensures consistent data extraction even if page layout changes. + print("Extracting business information...") + + # Define schema using Pydantic + class BusinessInfo(BaseModel): + dba_name: str = Field(..., description="DBA Name") + ownership_name: Optional[str] = Field(None, description="Ownership Name") + business_account_number: str = Field(..., description="Business Account Number") + location_id: Optional[str] = Field(None, description="Location Id") + street_address: Optional[str] = Field(None, description="Street Address") + business_start_date: Optional[str] = Field(None, description="Business Start Date") + business_end_date: Optional[str] = Field(None, description="Business End Date") + neighborhood: Optional[str] = Field(None, description="Neighborhood") + naics_code: str = Field(..., description="NAICS Code") + naics_code_description: Optional[str] = Field(None, description="NAICS Code Description") + + business_info = await page.extract( + "Extract all visible business information including DBA Name, Ownership Name, Business Account Number, Location Id, Street Address, Business Start Date, Business End Date, Neighborhood, NAICS Code, and NAICS Code Description", + schema=BusinessInfo + ) + + print("Business information extracted:") + print(json.dumps(business_info.model_dump(), indent=2)) + + print("Session closed successfully") + + except Exception as error: + print(f"Error during business lookup: {error}") + raise + + +if __name__ == "__main__": + try: + asyncio.run(main()) + except Exception as err: + print(f"Error in business lookup: {err}") + print("Common issues:") + print(" - Check .env file has BROWSERBASE_PROJECT_ID and BROWSERBASE_API_KEY") + print(" - Verify GOOGLE_API_KEY is set for the agent") + print("Docs: https://docs.browserbase.com/stagehand") + exit(1) + diff --git a/typescript/business-lookup/README.md b/typescript/business-lookup/README.md new file mode 100644 index 00000000..769a2c58 --- /dev/null +++ b/typescript/business-lookup/README.md @@ -0,0 +1,53 @@ +# Stagehand + Browserbase: Business Lookup with Agent + +## AT A GLANCE +- Goal: Automate business registry searches using an autonomous AI agent with computer-use capabilities. +- Uses Stagehand Agent in CUA mode to navigate complex UI elements, apply filters, and extract structured business data. +- Demonstrates extraction with Zod schema validation for consistent data retrieval. +- Docs → https://docs.stagehand.dev/basics/agent + +## GLOSSARY +- agent: create an autonomous AI agent that can execute complex multi-step tasks + Docs → https://docs.stagehand.dev/basics/agent#what-is-agent +- extract: extract structured data from web pages using natural language instructions + Docs → https://docs.stagehand.dev/basics/extract + +## QUICKSTART +1) npm install +2) cp .env.example .env +3) Add required API keys/IDs to .env +4) npm start + +## EXPECTED OUTPUT +- Initializes Stagehand session with Browserbase +- Displays live session link for monitoring +- Navigates to SF Business Registry search page +- Agent searches for business using DBA Name filter +- Agent completes search and opens business details +- Extracts structured business information (DBA Name, Account Number, NAICS Code, etc.) +- Outputs extracted data as JSON +- Closes session cleanly + +## COMMON PITFALLS +- Dependency install errors: ensure npm install completed +- Missing credentials: verify .env contains BROWSERBASE_PROJECT_ID, BROWSERBASE_API_KEY, and GOOGLE_API_KEY +- Google API access: ensure you have access to Google's gemini-2.5-computer-use-preview-10-2025 model +- Agent failures: check that the business name exists in the registry and that maxSteps is sufficient for complex searches +- Find more information on your Browserbase dashboard -> https://www.browserbase.com/sign-in + +## USE CASES +• Business verification: Automate registration status checks, license validation, and compliance verification for multiple businesses. +• Data enrichment: Collect structured business metadata (NAICS codes, addresses, ownership) for research or CRM updates. +• Due diligence: Streamline background checks by autonomously searching and extracting business registration details from public registries. + +## NEXT STEPS +• Parameterize search: Accept business names as command-line arguments or from a CSV file for batch processing. +• Expand extraction: Add support for additional fields like tax status, licenses, or historical registration changes. +• Multi-registry support: Extend agent to search across multiple city or state business registries with routing logic. + +## HELPFUL RESOURCES +📚 Stagehand Docs: https://docs.stagehand.dev/stagehand +🎮 Browserbase: https://www.browserbase.com +💡 Templates: https://www.browserbase.com/templates +📧 Need help? support@browserbase.com + diff --git a/typescript/business-lookup/index.ts b/typescript/business-lookup/index.ts new file mode 100644 index 00000000..96989662 --- /dev/null +++ b/typescript/business-lookup/index.ts @@ -0,0 +1,89 @@ +// Business Lookup with Agent - See README.md for full documentation + +import "dotenv/config"; +import { Stagehand } from "@browserbasehq/stagehand"; +import { z } from "zod"; + +// Business search variables +const businessName = "Jalebi Street"; + +async function main() { + // Initialize Stagehand with Browserbase for cloud-based browser automation. + const stagehand = new Stagehand({ + env: "BROWSERBASE", + verbose: 1, + model: "openai/gpt-4.1", + }); + + try { + // Initialize browser session to start automation. + await stagehand.init(); + console.log("Stagehand initialized successfully!"); + console.log(`Live View Link: https://browserbase.com/sessions/${stagehand.browserbaseSessionId}`); + + const page = stagehand.context.pages()[0]; + + // Navigate to SF Business Registry search page. + console.log(`Navigating to SF Business Registry...`); + await page.goto("https://data.sfgov.org/stories/s/Registered-Business-Lookup/k6sk-2y6w/"); + + // Create agent with computer use capabilities for autonomous business search. + const agent = stagehand.agent({ + cua: true, // Enable Computer Use Agent mode + model: { + modelName: "google/gemini-2.5-computer-use-preview-10-2025", + apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY + }, + systemPrompt: "You are a helpful assistant that can use a web browser to search for business information.", + }); + + console.log(`Searching for business: ${businessName}`); + const result = await agent.execute({ + instruction: `Find and look up the business "${businessName}" in the SF Business Registry. Use the DBA Name filter to search for "${businessName}", apply the filter, and click on the business row to view detailed information. Scroll towards the right to see the NAICS code.`, + maxSteps: 30, + }); + + if (!result.success) { + throw new Error("Agent failed to complete the search"); + } + + // Extract comprehensive business information after agent completes the search. + console.log("Extracting business information..."); + const businessInfo = await stagehand.extract( + "Extract all visible business information including DBA Name, Ownership Name, Business Account Number, Location Id, Street Address, Business Start Date, Business End Date, Neighborhood, NAICS Code, and NAICS Code Description", + z.object({ + dbaName: z.string(), + ownershipName: z.string().optional(), + businessAccountNumber: z.string(), + locationId: z.string().optional(), + streetAddress: z.string().optional(), + businessStartDate: z.string().optional(), + businessEndDate: z.string().optional(), + neighborhood: z.string().optional(), + naicsCode: z.string(), + naicsCodeDescription: z.string().optional(), + }), + { page }, + ); + + console.log("Business Information:"); + console.log(JSON.stringify(businessInfo, null, 2)); + + } catch (error) { + console.error("Error during business lookup:", error); + } finally { + // Always close session to release resources and clean up. + await stagehand.close(); + console.log("Session closed successfully"); + } +} + +main().catch((err) => { + console.error("Error in business lookup:", err); + console.error("Common issues:"); + console.error(" - Check .env file has BROWSERBASE_PROJECT_ID and BROWSERBASE_API_KEY"); + console.error(" - Verify GOOGLE_API_KEY is set for the agent"); + console.error("Docs: https://docs.browserbase.com/stagehand"); + process.exit(1); +}); +