Skip to content

Commit ba28053

Browse files
ysolankydirkbrnd
andauthored
agentql-integration-ag-279 (#2307)
## Description - **Summary of changes**: AgentQL Toolkit --- ## Type of change Please check the options that are relevant: - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Model update (Addition or modification of models) - [ ] Other (please describe): --- ## Checklist - [ ] Adherence to standards: Code complies with Agno’s style guidelines and best practices. - [ ] Formatting and validation: You have run `./scripts/format.sh` and `./scripts/validate.sh` to ensure code is formatted and linted. - [ ] Self-review completed: A thorough review has been performed by the contributor(s). - [ ] Documentation: Docstrings and comments have been added or updated for any complex logic. - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable). - [ ] Tested in a clean environment: Changes have been tested in a clean environment to confirm expected behavior. - [ ] Tests (optional): Tests have been added or updated to cover any new or changed functionality. --- ## Additional Notes Include any deployment notes, performance implications, security considerations, or other relevant information (e.g., screenshots or logs if applicable). --------- Co-authored-by: Dirk Brand <[email protected]> Co-authored-by: Dirk Brand <[email protected]>
1 parent 27f9226 commit ba28053

File tree

5 files changed

+222
-0
lines changed

5 files changed

+222
-0
lines changed

cookbook/tools/agentql_tools.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
AgentQL Tools for scraping websites.
3+
4+
Prerequisites:
5+
- Set the environment variable `AGENTQL_API_KEY` with your AgentQL API key.
6+
You can obtain the API key from the AgentQL website:
7+
https://agentql.com/
8+
- Run `playwright install` to install a browser extension for playwright.
9+
10+
AgentQL will open up a browser instance (don't close it) and do scraping on the site.
11+
"""
12+
13+
from agno.agent import Agent
14+
from agno.models.openai import OpenAIChat
15+
from agno.tools.agentql import AgentQLTools
16+
17+
# Create agent with default AgentQL tool
18+
agent = Agent(
19+
model=OpenAIChat(id="gpt-4o"), tools=[AgentQLTools()], show_tool_calls=True
20+
)
21+
agent.print_response("https://docs.agno.com/introduction", markdown=True)
22+
23+
# Define custom AgentQL query for specific data extraction (see https://docs.agentql.com/concepts/query-language)
24+
custom_query = """
25+
{
26+
title
27+
text_content[]
28+
}
29+
"""
30+
31+
# Create AgentQL tool with custom query
32+
custom_scraper = AgentQLTools(agentql_query=custom_query, custom_scrape=True)
33+
34+
# Create agent with custom AgentQL tool
35+
custom_agent = Agent(
36+
model=OpenAIChat(id="gpt-4o"), tools=[custom_scraper], show_tool_calls=True
37+
)
38+
custom_agent.print_response("https://docs.agno.com/introduction", markdown=True)

libs/agno/agno/tools/agentql.py

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
from os import getenv
2+
from typing import Optional
3+
4+
from agno.tools import Toolkit
5+
from agno.utils.log import logger
6+
7+
try:
8+
import agentql
9+
from playwright.sync_api import sync_playwright
10+
except ImportError:
11+
raise ImportError("`agentql` not installed. Please install using `pip install agentql`")
12+
13+
14+
class AgentQLTools(Toolkit):
15+
def __init__(
16+
self, api_key: Optional[str] = None, scrape: bool = True, agentql_query: str = ""
17+
):
18+
super().__init__(name="agentql_tools")
19+
20+
self.api_key = api_key or getenv("AGENTQL_API_KEY")
21+
if not self.api_key:
22+
raise ValueError("AGENTQL_API_KEY not set. Please set the AGENTQL_API_KEY environment variable.")
23+
24+
self.agentql_query = agentql_query
25+
26+
if scrape:
27+
self.register(self.scrape_website)
28+
29+
if agentql_query:
30+
logger.info("Custom AgentQL query provided. Registering custom scrape function.")
31+
self.register(self.custom_scrape_website)
32+
33+
def scrape_website(self, url: str) -> str:
34+
"""
35+
Scrape all text content from a website using AgentQL.
36+
37+
Args:
38+
url (str): The URL of the website to scrape
39+
40+
Returns:
41+
str: Extracted text content or error message
42+
"""
43+
if not url:
44+
return "No URL provided"
45+
46+
TEXT_SEARCH_QUERY = """
47+
{
48+
text_content[]
49+
}
50+
"""
51+
52+
try:
53+
with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser:
54+
page = agentql.wrap(browser.new_page())
55+
page.goto(url)
56+
57+
try:
58+
# Get response from AgentQL query
59+
response = page.query_data(TEXT_SEARCH_QUERY)
60+
61+
# Extract text based on response format
62+
if isinstance(response, dict) and "text_content" in response:
63+
text_items = [item for item in response["text_content"] if item and item.strip()]
64+
65+
deduplicated = list(set(text_items))
66+
return " ".join(deduplicated)
67+
68+
except Exception as e:
69+
return f"Error extracting text: {e}"
70+
except Exception as e:
71+
return f"Error launching browser: {e}"
72+
73+
return "No text content found"
74+
75+
def custom_scrape_website(self, url: str) -> str:
76+
"""
77+
Scrape a website using a custom AgentQL query.
78+
79+
Args:
80+
url (str): The URL of the website to scrape
81+
82+
Returns:
83+
str: Extracted text content or error message
84+
"""
85+
if not url:
86+
return "No URL provided"
87+
88+
if self.agentql_query == "":
89+
return "Custom AgentQL query not provided. Please provide a custom AgentQL query."
90+
91+
try:
92+
with sync_playwright() as playwright, playwright.chromium.launch(headless=False) as browser:
93+
page = agentql.wrap(browser.new_page())
94+
page.goto(url)
95+
96+
try:
97+
# Get response from AgentQL query
98+
response = page.query_data(self.agentql_query)
99+
100+
# Extract text based on response format
101+
if isinstance(response, dict):
102+
items = [item for item in response]
103+
text_items = [text_item for text_item in items if text_item]
104+
105+
deduplicated = list(set(text_items))
106+
return " ".join(deduplicated)
107+
108+
except Exception as e:
109+
return f"Error extracting text: {e}"
110+
except Exception as e:
111+
return f"Error launching browser: {e}"
112+
113+
return "No text content found"

libs/agno/pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ exclude = ["tests*"]
202202

203203
[[tool.mypy.overrides]]
204204
module = [
205+
"agentql.*",
205206
"altair.*",
206207
"anthropic.*",
207208
"apify_client.*",
@@ -260,6 +261,7 @@ module = [
260261
"PIL.*",
261262
"pinecone_text.*",
262263
"pinecone.*",
264+
"playwright.sync_api.*",
263265
"psycopg.*",
264266
"psycopg2.*",
265267
"pyarrow.*",

libs/agno/tests/unit/tools/test_zoom_tools.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Unit tests for ZoomTools class."""
2+
13
import json
24
from datetime import datetime, timedelta
35
from unittest.mock import MagicMock, patch
+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""Unit tests for AgentQLTools class."""
2+
3+
from unittest.mock import Mock, patch
4+
5+
import pytest
6+
7+
from agno.tools.agentql import AgentQLTools
8+
9+
10+
@pytest.fixture
11+
def mock_playwright():
12+
"""Create a mock Playwright instance."""
13+
with patch("agno.tools.agentql.sync_playwright") as mock_pw:
14+
mock_browser = Mock()
15+
mock_page = Mock()
16+
mock_browser.new_page.return_value = mock_page
17+
mock_pw.return_value.__enter__.return_value.chromium.launch.return_value = mock_browser
18+
return mock_pw
19+
20+
21+
@pytest.fixture
22+
def agentql_tools():
23+
"""Create AgentQLTools instance with test API key."""
24+
with patch.dict("os.environ", {"AGENTQL_API_KEY": "test_key"}):
25+
return AgentQLTools()
26+
27+
28+
def test_init_with_api_key():
29+
"""Test initialization with API key."""
30+
tools = AgentQLTools(api_key="test_key")
31+
assert tools.api_key == "test_key"
32+
33+
34+
def test_init_without_api_key():
35+
"""Test initialization without API key."""
36+
with patch.dict("os.environ", clear=True):
37+
with pytest.raises(ValueError, match="AGENTQL_API_KEY not set"):
38+
AgentQLTools()
39+
40+
41+
def test_scrape_website_no_url(agentql_tools):
42+
"""Test scraping with no URL provided."""
43+
result = agentql_tools.scrape_website("")
44+
assert result == "No URL provided"
45+
46+
47+
def test_scrape_website_no_api_key():
48+
"""Test scraping without API key."""
49+
with patch.dict("os.environ", clear=True):
50+
with pytest.raises(ValueError, match="AGENTQL_API_KEY not set"):
51+
tools = AgentQLTools()
52+
tools.scrape_website("https://example.com")
53+
54+
55+
def test_custom_scrape_no_query(agentql_tools):
56+
"""Test custom scraping without a query."""
57+
result = agentql_tools.custom_scrape_website("https://example.com")
58+
assert "Custom AgentQL query not provided" in result
59+
60+
61+
def test_scrape_website_success(mock_playwright, agentql_tools):
62+
"""Test successful website scraping."""
63+
mock_page = mock_playwright.return_value.__enter__.return_value.chromium.launch.return_value.new_page.return_value
64+
mock_page.query_data.return_value = {"text_content": ["text1", "text2", "text2", "text3"]}
65+
66+
result = agentql_tools.scrape_website("https://example.com")
67+
assert "Example Domain" in result

0 commit comments

Comments
 (0)