agentql-integration-ag-279 (#2307)

ysolanky · dirkbrnd · web-flow · commit ba280533adbd · 2025-03-11T11:41:32.000-04:00
## Description

- **Summary of changes**: AgentQL Toolkit

---

## Type of change

Please check the options that are relevant:

- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [ ] Model update (Addition or modification of models)
- [ ] Other (please describe):

---

## Checklist

- [ ] Adherence to standards: Code complies with Agno’s style guidelines
and best practices.
- [ ] Formatting and validation: You have run `./scripts/format.sh` and
`./scripts/validate.sh` to ensure code is formatted and linted.
- [ ] Self-review completed: A thorough review has been performed by the
contributor(s).
- [ ] Documentation: Docstrings and comments have been added or updated
for any complex logic.
- [ ] Examples and guides: Relevant cookbook examples have been included
or updated (if applicable).
- [ ] Tested in a clean environment: Changes have been tested in a clean
environment to confirm expected behavior.
- [ ] Tests (optional): Tests have been added or updated to cover any
new or changed functionality.

---

## Additional Notes

Include any deployment notes, performance implications, security
considerations, or other relevant information (e.g., screenshots or logs
if applicable).

---------

Co-authored-by: Dirk Brand &lt;dirkbrnd@gmail.com&gt;
Co-authored-by: Dirk Brand &lt;51947788+dirkbrnd@users.noreply.github.com&gt;
diff --git a/cookbook/tools/agentql_tools.py b/cookbook/tools/agentql_tools.py
@@ -0,0 +1,38 @@
+"""
+AgentQL Tools for scraping websites.
+
+Prerequisites:
+- Set the environment variable `AGENTQL_API_KEY` with your AgentQL API key.
+  You can obtain the API key from the AgentQL website:
+  https://agentql.com/
+- Run `playwright install` to install a browser extension for playwright.
+
+AgentQL will open up a browser instance (don't close it) and do scraping on the site.
+"""
+
+from agno.agent import Agent
+from agno.models.openai import OpenAIChat
+from agno.tools.agentql import AgentQLTools
+
+# Create agent with default AgentQL tool
+agent = Agent(
+    model=OpenAIChat(id="gpt-4o"), tools=[AgentQLTools()], show_tool_calls=True
+)
+agent.print_response("https://docs.agno.com/introduction", markdown=True)
+
+# Define custom AgentQL query for specific data extraction (see https://docs.agentql.com/concepts/query-language)
+custom_query = """
+{
+    title
+    text_content[]
+}
+"""
+
+# Create AgentQL tool with custom query
+custom_scraper = AgentQLTools(agentql_query=custom_query, custom_scrape=True)
+
+# Create agent with custom AgentQL tool
+custom_agent = Agent(
+    model=OpenAIChat(id="gpt-4o"), tools=[custom_scraper], show_tool_calls=True
+)
+custom_agent.print_response("https://docs.agno.com/introduction", markdown=True)
diff --git a/libs/agno/agno/tools/agentql.py b/libs/agno/agno/tools/agentql.py
@@ -0,0 +1,113 @@
+from os import getenv
+from typing import Optional
+
+from agno.tools import Toolkit
+from agno.utils.log import logger
+
+try:
+    import agentql
+    from playwright.sync_api import sync_playwright
+except ImportError:
+    raise ImportError("`agentql` not installed. Please install using `pip install agentql`")
+
+
+class AgentQLTools(Toolkit):
+    def __init__(
+        self, api_key: Optional[str] = None, scrape: bool = True, agentql_query: str = ""
+    ):
+        super().__init__(name="agentql_tools")
+
+        self.api_key = api_key or getenv("AGENTQL_API_KEY")
+        if not self.api_key:
+            raise ValueError("AGENTQL_API_KEY not set. Please set the AGENTQL_API_KEY environment variable.")
+
+        self.agentql_query = agentql_query
+
+        if scrape:
+            self.register(self.scrape_website)
+
+        if agentql_query:
+            logger.info("Custom AgentQL query provided. Registering custom scrape function.")
+            self.register(self.custom_scrape_website)
+
+    def scrape_website(self, url: str) -> str:
+        """
+        Scrape all text content from a website using AgentQL.
+
+        Args:
+            url (str): The URL of the website to scrape
+
+        Returns:
+            str: Extracted text content or error message
+        """
+        if not url:
+            return "No URL provided"
+
+        TEXT_SEARCH_QUERY = """
+        {
+            text_content[]
+        }
+        """
+
+        try:
+            with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser:
+                page = agentql.wrap(browser.new_page())
+                page.goto(url)
+
+                try:
+                    # Get response from AgentQL query
+                    response = page.query_data(TEXT_SEARCH_QUERY)
+
+                    # Extract text based on response format
+                    if isinstance(response, dict) and "text_content" in response:
+                        text_items = [item for item in response["text_content"] if item and item.strip()]
+
+                        deduplicated = list(set(text_items))
+                        return " ".join(deduplicated)
+
+                except Exception as e:
+                    return f"Error extracting text: {e}"
+        except Exception as e:
+            return f"Error launching browser: {e}"
+
+        return "No text content found"
+
+    def custom_scrape_website(self, url: str) -> str:
+        """
+        Scrape a website using a custom AgentQL query.
+
+        Args:
+            url (str): The URL of the website to scrape
+
+        Returns:
+            str: Extracted text content or error message
+        """
+        if not url:
+            return "No URL provided"
+
+        if self.agentql_query == "":
+            return "Custom AgentQL query not provided. Please provide a custom AgentQL query."
+
+        try:
+            with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser:
+                page = agentql.wrap(browser.new_page())
+                page.goto(url)
+
+                try:
+                    # Get response from AgentQL query
+                    response = page.query_data(self.agentql_query)
+
+                    # Extract text based on response format
+                    if isinstance(response, dict):
+                        items = [item for item in response]
+                        text_items = [text_item for text_item in items if text_item]
+
+                        deduplicated = list(set(text_items))
+                        return " ".join(deduplicated)
+
+                except Exception as e:
+                    return f"Error extracting text: {e}"
+        except Exception as e:
+            return f"Error launching browser: {e}"
+
+        return "No text content found"
diff --git a/libs/agno/pyproject.toml b/libs/agno/pyproject.toml
@@ -202,6 +202,7 @@ exclude = ["tests*"]
 
 [[tool.mypy.overrides]]
 module = [
+  "agentql.*",
   "altair.*",
   "anthropic.*",
   "apify_client.*",
@@ -260,6 +261,7 @@ module = [
   "PIL.*",
   "pinecone_text.*",
   "pinecone.*",
+  "playwright.sync_api.*",
   "psycopg.*",
   "psycopg2.*",
   "pyarrow.*",
diff --git a/libs/agno/tests/unit/tools/test_zoom_tools.py b/libs/agno/tests/unit/tools/test_zoom_tools.py
@@ -1,3 +1,5 @@
+"""Unit tests for ZoomTools class."""
+
 import json
 from datetime import datetime, timedelta
 from unittest.mock import MagicMock, patch
diff --git a/libs/agno/tests/unit/tools/text_agentql.py b/libs/agno/tests/unit/tools/text_agentql.py
@@ -0,0 +1,67 @@
+"""Unit tests for AgentQLTools class."""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+from agno.tools.agentql import AgentQLTools
+
+
+@pytest.fixture
+def mock_playwright():
+    """Create a mock Playwright instance."""
+    with patch("agno.tools.agentql.sync_playwright") as mock_pw:
+        mock_browser = Mock()
+        mock_page = Mock()
+        mock_browser.new_page.return_value = mock_page
+        mock_pw.return_value.__enter__.return_value.chromium.launch.return_value = mock_browser
+        return mock_pw
+
+
+@pytest.fixture
+def agentql_tools():
+    """Create AgentQLTools instance with test API key."""
+    with patch.dict("os.environ", {"AGENTQL_API_KEY": "test_key"}):
+        return AgentQLTools()
+
+
+def test_init_with_api_key():
+    """Test initialization with API key."""
+    tools = AgentQLTools(api_key="test_key")
+    assert tools.api_key == "test_key"
+
+
+def test_init_without_api_key():
+    """Test initialization without API key."""
+    with patch.dict("os.environ", clear=True):
+        with pytest.raises(ValueError, match="AGENTQL_API_KEY not set"):
+            AgentQLTools()
+
+
+def test_scrape_website_no_url(agentql_tools):
+    """Test scraping with no URL provided."""
+    result = agentql_tools.scrape_website("")
+    assert result == "No URL provided"
+
+
+def test_scrape_website_no_api_key():
+    """Test scraping without API key."""
+    with patch.dict("os.environ", clear=True):
+        with pytest.raises(ValueError, match="AGENTQL_API_KEY not set"):
+            tools = AgentQLTools()
+            tools.scrape_website("https://example.com")
+
+
+def test_custom_scrape_no_query(agentql_tools):
+    """Test custom scraping without a query."""
+    result = agentql_tools.custom_scrape_website("https://example.com")
+    assert "Custom AgentQL query not provided" in result
+
+
+def test_scrape_website_success(mock_playwright, agentql_tools):
+    """Test successful website scraping."""
+    mock_page = mock_playwright.return_value.__enter__.return_value.chromium.launch.return_value.new_page.return_value
+    mock_page.query_data.return_value = {"text_content": ["text1", "text2", "text2", "text3"]}
+
+    result = agentql_tools.scrape_website("https://example.com")
+    assert "Example Domain" in result

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+"""Unit tests for ZoomTools class."""`
	`2`	`+`
`1`	`3`	`import json`
`2`	`4`	`from datetime import datetime, timedelta`
`3`	`5`	`from unittest.mock import MagicMock, patch`