diff --git a/cookbook/examples/apps/geobuddy/app.py b/cookbook/examples/apps/geobuddy/app.py index 5675d765fd..4a86f4d0d2 100644 --- a/cookbook/examples/apps/geobuddy/app.py +++ b/cookbook/examples/apps/geobuddy/app.py @@ -2,10 +2,9 @@ from pathlib import Path import streamlit as st +from geography_buddy import analyze_image from PIL import Image -from cookbook.use_cases.apps.geobuddy.geography_buddy import analyze_image - # Streamlit App Configuration st.set_page_config( page_title="Geography Location Buddy", diff --git a/cookbook/models/openai/README.md b/cookbook/models/openai/chat/README.md similarity index 100% rename from cookbook/models/openai/README.md rename to cookbook/models/openai/chat/README.md diff --git a/cookbook/models/openai/__init__.py b/cookbook/models/openai/chat/__init__.py similarity index 100% rename from cookbook/models/openai/__init__.py rename to cookbook/models/openai/chat/__init__.py diff --git a/cookbook/models/openai/async_basic.py b/cookbook/models/openai/chat/async_basic.py similarity index 100% rename from cookbook/models/openai/async_basic.py rename to cookbook/models/openai/chat/async_basic.py diff --git a/cookbook/models/openai/async_basic_stream.py b/cookbook/models/openai/chat/async_basic_stream.py similarity index 100% rename from cookbook/models/openai/async_basic_stream.py rename to cookbook/models/openai/chat/async_basic_stream.py diff --git a/cookbook/models/openai/async_tool_use.py b/cookbook/models/openai/chat/async_tool_use.py similarity index 100% rename from cookbook/models/openai/async_tool_use.py rename to cookbook/models/openai/chat/async_tool_use.py diff --git a/cookbook/models/openai/audio_input_agent.py b/cookbook/models/openai/chat/audio_input_agent.py similarity index 100% rename from cookbook/models/openai/audio_input_agent.py rename to cookbook/models/openai/chat/audio_input_agent.py diff --git a/cookbook/models/openai/audio_input_and_output_multi_turn.py b/cookbook/models/openai/chat/audio_input_and_output_multi_turn.py similarity index 100% rename from cookbook/models/openai/audio_input_and_output_multi_turn.py rename to cookbook/models/openai/chat/audio_input_and_output_multi_turn.py diff --git a/cookbook/models/openai/audio_input_local_file_upload.py b/cookbook/models/openai/chat/audio_input_local_file_upload.py similarity index 100% rename from cookbook/models/openai/audio_input_local_file_upload.py rename to cookbook/models/openai/chat/audio_input_local_file_upload.py diff --git a/cookbook/models/openai/audio_output_agent.py b/cookbook/models/openai/chat/audio_output_agent.py similarity index 100% rename from cookbook/models/openai/audio_output_agent.py rename to cookbook/models/openai/chat/audio_output_agent.py diff --git a/cookbook/models/openai/audio_output_stream.py b/cookbook/models/openai/chat/audio_output_stream.py similarity index 100% rename from cookbook/models/openai/audio_output_stream.py rename to cookbook/models/openai/chat/audio_output_stream.py diff --git a/cookbook/models/openai/basic.py b/cookbook/models/openai/chat/basic.py similarity index 100% rename from cookbook/models/openai/basic.py rename to cookbook/models/openai/chat/basic.py diff --git a/cookbook/models/openai/basic_stream.py b/cookbook/models/openai/chat/basic_stream.py similarity index 100% rename from cookbook/models/openai/basic_stream.py rename to cookbook/models/openai/chat/basic_stream.py diff --git a/cookbook/models/openai/generate_images.py b/cookbook/models/openai/chat/generate_images.py similarity index 100% rename from cookbook/models/openai/generate_images.py rename to cookbook/models/openai/chat/generate_images.py diff --git a/cookbook/models/openai/image_agent.py b/cookbook/models/openai/chat/image_agent.py similarity index 100% rename from cookbook/models/openai/image_agent.py rename to cookbook/models/openai/chat/image_agent.py diff --git a/cookbook/models/openai/image_agent_bytes.py b/cookbook/models/openai/chat/image_agent_bytes.py similarity index 100% rename from cookbook/models/openai/image_agent_bytes.py rename to cookbook/models/openai/chat/image_agent_bytes.py diff --git a/cookbook/models/openai/image_agent_with_memory.py b/cookbook/models/openai/chat/image_agent_with_memory.py similarity index 100% rename from cookbook/models/openai/image_agent_with_memory.py rename to cookbook/models/openai/chat/image_agent_with_memory.py diff --git a/cookbook/models/openai/knowledge.py b/cookbook/models/openai/chat/knowledge.py similarity index 100% rename from cookbook/models/openai/knowledge.py rename to cookbook/models/openai/chat/knowledge.py diff --git a/cookbook/models/openai/memory.py b/cookbook/models/openai/chat/memory.py similarity index 100% rename from cookbook/models/openai/memory.py rename to cookbook/models/openai/chat/memory.py diff --git a/cookbook/models/openai/metrics.py b/cookbook/models/openai/chat/metrics.py similarity index 100% rename from cookbook/models/openai/metrics.py rename to cookbook/models/openai/chat/metrics.py diff --git a/cookbook/models/openai/reasoning/__init__.py b/cookbook/models/openai/chat/reasoning/__init__.py similarity index 100% rename from cookbook/models/openai/reasoning/__init__.py rename to cookbook/models/openai/chat/reasoning/__init__.py diff --git a/cookbook/models/openai/reasoning/o1.py b/cookbook/models/openai/chat/reasoning/o1.py similarity index 100% rename from cookbook/models/openai/reasoning/o1.py rename to cookbook/models/openai/chat/reasoning/o1.py diff --git a/cookbook/models/openai/reasoning/o3_mini_stream.py b/cookbook/models/openai/chat/reasoning/o3_mini_stream.py similarity index 100% rename from cookbook/models/openai/reasoning/o3_mini_stream.py rename to cookbook/models/openai/chat/reasoning/o3_mini_stream.py diff --git a/cookbook/models/openai/reasoning/o3_mini_tool_use.py b/cookbook/models/openai/chat/reasoning/o3_mini_tool_use.py similarity index 100% rename from cookbook/models/openai/reasoning/o3_mini_tool_use.py rename to cookbook/models/openai/chat/reasoning/o3_mini_tool_use.py diff --git a/cookbook/models/openai/reasoning/reasoning_effort.py b/cookbook/models/openai/chat/reasoning/reasoning_effort.py similarity index 100% rename from cookbook/models/openai/reasoning/reasoning_effort.py rename to cookbook/models/openai/chat/reasoning/reasoning_effort.py diff --git a/cookbook/models/openai/storage.py b/cookbook/models/openai/chat/storage.py similarity index 100% rename from cookbook/models/openai/storage.py rename to cookbook/models/openai/chat/storage.py diff --git a/cookbook/models/openai/structured_output.py b/cookbook/models/openai/chat/structured_output.py similarity index 100% rename from cookbook/models/openai/structured_output.py rename to cookbook/models/openai/chat/structured_output.py diff --git a/cookbook/models/openai/tool_use.py b/cookbook/models/openai/chat/tool_use.py similarity index 100% rename from cookbook/models/openai/tool_use.py rename to cookbook/models/openai/chat/tool_use.py diff --git a/cookbook/models/openai/tool_use_stream.py b/cookbook/models/openai/chat/tool_use_stream.py similarity index 100% rename from cookbook/models/openai/tool_use_stream.py rename to cookbook/models/openai/chat/tool_use_stream.py diff --git a/cookbook/models/openai/responses/async_basic.py b/cookbook/models/openai/responses/async_basic.py new file mode 100644 index 0000000000..dfdd69b982 --- /dev/null +++ b/cookbook/models/openai/responses/async_basic.py @@ -0,0 +1,13 @@ +import asyncio + +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIResponses + +agent = Agent(model=OpenAIResponses(id="gpt-4o"), markdown=True) + +# Get the response in a variable +# run: RunResponse = agent.run("Share a 2 sentence horror story") +# print(run.content) + +# Print the response in the terminal +asyncio.run(agent.aprint_response("Share a 2 sentence horror story")) diff --git a/cookbook/models/openai/responses/async_basic_stream.py b/cookbook/models/openai/responses/async_basic_stream.py new file mode 100644 index 0000000000..0bb2509a69 --- /dev/null +++ b/cookbook/models/openai/responses/async_basic_stream.py @@ -0,0 +1,15 @@ +import asyncio +from typing import Iterator # noqa + +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIResponses + +agent = Agent(model=OpenAIResponses(id="gpt-4o"), markdown=True) + +# Get the response in a variable +# run_response: Iterator[RunResponse] = agent.run("Share a 2 sentence horror story", stream=True) +# for chunk in run_response: +# print(chunk.content) + +# Print the response in the terminal +asyncio.run(agent.aprint_response("Share a 2 sentence horror story", stream=True)) diff --git a/cookbook/models/openai/responses/async_tool_use.py b/cookbook/models/openai/responses/async_tool_use.py new file mode 100644 index 0000000000..1323848158 --- /dev/null +++ b/cookbook/models/openai/responses/async_tool_use.py @@ -0,0 +1,15 @@ +"""Run `pip install duckduckgo-search` to install dependencies.""" + +import asyncio + +from agno.agent import Agent +from agno.models.openai import OpenAIResponses +from agno.tools.duckduckgo import DuckDuckGoTools + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[DuckDuckGoTools()], + show_tool_calls=True, + markdown=True, +) +asyncio.run(agent.aprint_response("Whats happening in France?", stream=True)) diff --git a/cookbook/models/openai/responses/basic.py b/cookbook/models/openai/responses/basic.py new file mode 100644 index 0000000000..8ba45ee820 --- /dev/null +++ b/cookbook/models/openai/responses/basic.py @@ -0,0 +1,13 @@ +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIResponses + +agent = Agent(model=OpenAIResponses(id="gpt-4o"), markdown=True) + +# Get the response in a variable +# run: RunResponse = agent.run("Share a 2 sentence horror story") +# print(run.content) + +# Print the response in the terminal +agent.print_response("Share a 2 sentence horror story") + +agent.run_response.metrics diff --git a/cookbook/models/openai/responses/basic_stream.py b/cookbook/models/openai/responses/basic_stream.py new file mode 100644 index 0000000000..ef81a62715 --- /dev/null +++ b/cookbook/models/openai/responses/basic_stream.py @@ -0,0 +1,13 @@ +from typing import Iterator # noqa +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIResponses + +agent = Agent(model=OpenAIResponses(id="gpt-4o"), markdown=True) + +# Get the response in a variable +# run_response: Iterator[RunResponse] = agent.run("Share a 2 sentence horror story", stream=True) +# for chunk in run_response: +# print(chunk.content) + +# Print the response in the terminal +agent.print_response("Share a 2 sentence horror story", stream=True) diff --git a/cookbook/models/openai/responses/image_agent.py b/cookbook/models/openai/responses/image_agent.py new file mode 100644 index 0000000000..6e03fd58b5 --- /dev/null +++ b/cookbook/models/openai/responses/image_agent.py @@ -0,0 +1,20 @@ +from agno.agent import Agent +from agno.media import Image +from agno.models.openai import OpenAIResponses +from agno.tools.googlesearch import GoogleSearchTools + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[GoogleSearchTools()], + markdown=True, +) + +agent.print_response( + "Tell me about this image and give me the latest news about it.", + images=[ + Image( + url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg" + ) + ], + stream=True, +) diff --git a/cookbook/models/openai/responses/image_agent_bytes.py b/cookbook/models/openai/responses/image_agent_bytes.py new file mode 100644 index 0000000000..8a612efbc6 --- /dev/null +++ b/cookbook/models/openai/responses/image_agent_bytes.py @@ -0,0 +1,31 @@ +from pathlib import Path + +from agno.agent import Agent +from agno.media import Image +from agno.models.openai import OpenAIResponses +from agno.tools.googlesearch import GoogleSearchTools +from agno.utils.media import download_image + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[GoogleSearchTools()], + markdown=True, +) + +image_path = Path(__file__).parent.joinpath("sample.jpg") + +download_image( + url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", + output_path=str(image_path), +) + +# Read the image file content as bytes +image_bytes = image_path.read_bytes() + +agent.print_response( + "Tell me about this image and give me the latest news about it.", + images=[ + Image(content=image_bytes), + ], + stream=True, +) diff --git a/cookbook/models/openai/responses/image_agent_with_memory.py b/cookbook/models/openai/responses/image_agent_with_memory.py new file mode 100644 index 0000000000..d4da56d25d --- /dev/null +++ b/cookbook/models/openai/responses/image_agent_with_memory.py @@ -0,0 +1,23 @@ +from agno.agent import Agent +from agno.media import Image +from agno.models.openai import OpenAIResponses +from agno.tools.googlesearch import GoogleSearchTools + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[GoogleSearchTools()], + markdown=True, + add_history_to_messages=True, + num_history_responses=3, +) + +agent.print_response( + "Tell me about this image and give me the latest news about it.", + images=[ + Image( + url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg" + ) + ], +) + +agent.print_response("Tell me where I can get more images?") diff --git a/cookbook/models/openai/responses/knowledge.py b/cookbook/models/openai/responses/knowledge.py new file mode 100644 index 0000000000..4649a1e449 --- /dev/null +++ b/cookbook/models/openai/responses/knowledge.py @@ -0,0 +1,21 @@ +"""Run `pip install duckduckgo-search sqlalchemy pgvector pypdf openai` to install dependencies.""" + +from agno.agent import Agent +from agno.knowledge.pdf_url import PDFUrlKnowledgeBase +from agno.models.openai import OpenAIResponses +from agno.vectordb.pgvector import PgVector + +db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai" + +knowledge_base = PDFUrlKnowledgeBase( + urls=["https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"], + vector_db=PgVector(table_name="recipes", db_url=db_url), +) +knowledge_base.load(recreate=True) # Comment out after first run + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + knowledge=knowledge_base, + show_tool_calls=True, +) +agent.print_response("How to make Thai curry?", markdown=True) diff --git a/cookbook/models/openai/responses/memory.py b/cookbook/models/openai/responses/memory.py new file mode 100644 index 0000000000..9f2c8e3ad5 --- /dev/null +++ b/cookbook/models/openai/responses/memory.py @@ -0,0 +1,56 @@ +""" +This recipe shows how to use personalized memories and summaries in an agent. +Steps: +1. Run: `./cookbook/scripts/run_pgvector.sh` to start a postgres container with pgvector +2. Run: `pip install openai sqlalchemy 'psycopg[binary]' pgvector` to install the dependencies +3. Run: `python cookbook/agents/personalized_memories_and_summaries.py` to run the agent +""" + +from agno.agent import Agent, AgentMemory +from agno.memory.db.postgres import PgMemoryDb +from agno.models.openai import OpenAIResponses +from agno.storage.agent.postgres import PostgresAgentStorage +from rich.pretty import pprint + +db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai" +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + # Store the memories and summary in a database + memory=AgentMemory( + db=PgMemoryDb(table_name="agent_memory", db_url=db_url), + create_user_memories=True, + create_session_summary=True, + ), + # Store agent sessions in a database + storage=PostgresAgentStorage( + table_name="personalized_agent_sessions", db_url=db_url + ), + # Show debug logs so, you can see the memory being created + # debug_mode=True, +) + +# -*- Share personal information +agent.print_response("My name is john billings?", stream=True) +# -*- Print memories +pprint(agent.memory.memories) +# -*- Print summary +pprint(agent.memory.summary) + +# -*- Share personal information +agent.print_response("I live in nyc?", stream=True) +# -*- Print memories +pprint(agent.memory.memories) +# -*- Print summary +pprint(agent.memory.summary) + +# -*- Share personal information +agent.print_response("I'm going to a concert tomorrow?", stream=True) +# -*- Print memories +pprint(agent.memory.memories) +# -*- Print summary +pprint(agent.memory.summary) + +# Ask about the conversation +agent.print_response( + "What have we been talking about, do you know my name?", stream=True +) diff --git a/cookbook/models/openai/responses/reasoning_o3_mini.py b/cookbook/models/openai/responses/reasoning_o3_mini.py new file mode 100644 index 0000000000..1d09346a74 --- /dev/null +++ b/cookbook/models/openai/responses/reasoning_o3_mini.py @@ -0,0 +1,13 @@ +from agno.agent import Agent +from agno.models.openai import OpenAIResponses +from agno.tools.yfinance import YFinanceTools + +agent = Agent( + model=OpenAIResponses(id="o3-mini", reasoning_effort="high"), + tools=[YFinanceTools(enable_all=True)], + show_tool_calls=True, + markdown=True, +) + +# Print the response in the terminal +agent.print_response("Write a report on the NVDA, is it a good buy?", stream=True) diff --git a/cookbook/models/openai/responses/storage.py b/cookbook/models/openai/responses/storage.py new file mode 100644 index 0000000000..5eb117966e --- /dev/null +++ b/cookbook/models/openai/responses/storage.py @@ -0,0 +1,17 @@ +"""Run `pip install duckduckgo-search sqlalchemy openai` to install dependencies.""" + +from agno.agent import Agent +from agno.models.openai import OpenAIResponses +from agno.storage.agent.postgres import PostgresAgentStorage +from agno.tools.duckduckgo import DuckDuckGoTools + +db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai" + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + storage=PostgresAgentStorage(table_name="agent_sessions", db_url=db_url), + tools=[DuckDuckGoTools()], + add_history_to_messages=True, +) +agent.print_response("How many people live in Canada?") +agent.print_response("What is their national anthem called?") diff --git a/cookbook/models/openai/responses/structured_output.py b/cookbook/models/openai/responses/structured_output.py new file mode 100644 index 0000000000..2082a00f34 --- /dev/null +++ b/cookbook/models/openai/responses/structured_output.py @@ -0,0 +1,53 @@ +from typing import List + +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIChat +from agno.models.openai.responses import OpenAIResponses # noqa +from pydantic import BaseModel, Field +from rich.pretty import pprint + + +class MovieScript(BaseModel): + setting: str = Field( + ..., description="Provide a nice setting for a blockbuster movie." + ) + ending: str = Field( + ..., + description="Ending of the movie. If not available, provide a happy ending.", + ) + genre: str = Field( + ..., + description="Genre of the movie. If not available, select action, thriller or romantic comedy.", + ) + name: str = Field(..., description="Give a name to this movie") + characters: List[str] = Field(..., description="Name of characters for this movie.") + storyline: str = Field( + ..., description="3 sentence storyline for the movie. Make it exciting!" + ) + + +# Agent that uses JSON mode +json_mode_agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + description="You write movie scripts.", + response_model=MovieScript, + structured_outputs=True, +) + +# Agent that uses structured outputs +structured_output_agent = Agent( + model=OpenAIResponses(id="gpt-4o-2024-08-06"), + description="You write movie scripts.", + response_model=MovieScript, + structured_outputs=True, +) + + +# Get the response in a variable +# json_mode_response: RunResponse = json_mode_agent.run("New York") +# pprint(json_mode_response.content) +# structured_output_response: RunResponse = structured_output_agent.run("New York") +# pprint(structured_output_response.content) + +json_mode_agent.print_response("New York") +structured_output_agent.print_response("New York") diff --git a/cookbook/models/openai/responses/tool_use.py b/cookbook/models/openai/responses/tool_use.py new file mode 100644 index 0000000000..cf3adc3f0b --- /dev/null +++ b/cookbook/models/openai/responses/tool_use.py @@ -0,0 +1,13 @@ +"""Run `pip install duckduckgo-search` to install dependencies.""" + +from agno.agent import Agent +from agno.models.openai import OpenAIResponses +from agno.tools.duckduckgo import DuckDuckGoTools + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[DuckDuckGoTools()], + show_tool_calls=True, + markdown=True, +) +agent.print_response("Whats happening in France?") diff --git a/cookbook/models/openai/responses/tool_use_stream.py b/cookbook/models/openai/responses/tool_use_stream.py new file mode 100644 index 0000000000..02c62d1cd1 --- /dev/null +++ b/cookbook/models/openai/responses/tool_use_stream.py @@ -0,0 +1,13 @@ +"""Run `pip install duckduckgo-search` to install dependencies.""" + +from agno.agent import Agent +from agno.models.openai import OpenAIResponses +from agno.tools.duckduckgo import DuckDuckGoTools + +agent = Agent( + model=OpenAIResponses(id="gpt-4o"), + tools=[DuckDuckGoTools()], + show_tool_calls=True, + markdown=True, +) +agent.print_response("Whats happening in France?", stream=True) diff --git a/cookbook/models/openai/responses/websearch_builtin_tool.py b/cookbook/models/openai/responses/websearch_builtin_tool.py new file mode 100644 index 0000000000..041690c164 --- /dev/null +++ b/cookbook/models/openai/responses/websearch_builtin_tool.py @@ -0,0 +1,10 @@ +"""Run `pip install duckduckgo-search` to install dependencies.""" + +from agno.agent import Agent +from agno.models.openai import OpenAIResponses + +agent = Agent( + model=OpenAIResponses(id="gpt-4o", web_search=True), + markdown=True, +) +agent.print_response("Whats happening in France?") diff --git a/libs/agno/agno/agent/agent.py b/libs/agno/agno/agent/agent.py index ca47214418..6794c75775 100644 --- a/libs/agno/agno/agent/agent.py +++ b/libs/agno/agno/agent/agent.py @@ -31,7 +31,6 @@ from agno.memory.agent import AgentMemory, AgentRun from agno.models.base import Model from agno.models.message import Message, MessageReferences -from agno.models.openai.like import OpenAILike from agno.models.response import ModelResponse, ModelResponseEvent from agno.reasoning.step import NextAction, ReasoningStep, ReasoningSteps from agno.run.messages import RunMessages @@ -2849,6 +2848,8 @@ def get_audio(self) -> Optional[List[AudioArtifact]]: ########################################################################### def reason(self, run_messages: RunMessages) -> Iterator[RunResponse]: + from agno.models.openai.like import OpenAILike + # Yield a reasoning started event if self.stream_intermediate_steps: yield self.create_run_response(content="Reasoning started", event=RunEvent.reasoning_started) @@ -3030,6 +3031,8 @@ def reason(self, run_messages: RunMessages) -> Iterator[RunResponse]: ) async def areason(self, run_messages: RunMessages) -> Any: + from agno.models.openai.like import OpenAILike + # Yield a reasoning started event if self.stream_intermediate_steps: yield self.create_run_response(content="Reasoning started", event=RunEvent.reasoning_started) diff --git a/libs/agno/agno/models/openai/__init__.py b/libs/agno/agno/models/openai/__init__.py index cbd773dafa..394d1a9518 100644 --- a/libs/agno/agno/models/openai/__init__.py +++ b/libs/agno/agno/models/openai/__init__.py @@ -1,2 +1,3 @@ from agno.models.openai.chat import OpenAIChat from agno.models.openai.like import OpenAILike +from agno.models.openai.responses import OpenAIResponses diff --git a/libs/agno/agno/models/openai/chat.py b/libs/agno/agno/models/openai/chat.py index e0d94d5871..f8d68b672b 100644 --- a/libs/agno/agno/models/openai/chat.py +++ b/libs/agno/agno/models/openai/chat.py @@ -654,7 +654,7 @@ def parse_provider_response_delta(self, response_delta: ChatCompletionChunk) -> response_delta: Raw response chunk from OpenAI Returns: - ProviderResponse: Iterator of parsed response data + ModelResponse: Parsed response data """ model_response = ModelResponse() if response_delta.choices and len(response_delta.choices) > 0: diff --git a/libs/agno/agno/models/openai/responses.py b/libs/agno/agno/models/openai/responses.py new file mode 100644 index 0000000000..8d6e589e1c --- /dev/null +++ b/libs/agno/agno/models/openai/responses.py @@ -0,0 +1,668 @@ +from dataclasses import dataclass +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Tuple, Union + +import httpx +from pydantic import BaseModel + +from agno.exceptions import ModelProviderError +from agno.models.base import MessageData, Model +from agno.models.message import Message +from agno.models.response import ModelResponse +from agno.utils.log import logger +from agno.utils.openai_responses import images_to_message + +try: + import importlib.metadata as metadata + + from openai import APIConnectionError, APIStatusError, AsyncOpenAI, OpenAI, RateLimitError + from openai.resources.responses.responses import Response, ResponseStreamEvent + from packaging import version + + # Get installed OpenAI version + openai_version = metadata.version("openai") + + # Check version compatibility + parsed_version = version.parse(openai_version) + if parsed_version.major == 0 and parsed_version.minor < 66: + import warnings + + warnings.warn("OpenAI v1.66.0 or higher is recommended for the Responses API", UserWarning) + +except ImportError as e: + # Handle different import error scenarios + if "openai" in str(e): + raise ImportError("OpenAI not installed. Install with `pip install openai -U`") from e + else: + raise ImportError("Missing dependencies. Install with `pip install packaging importlib-metadata`") from e + + +@dataclass +class OpenAIResponses(Model): + """ + Implementation for the OpenAI Responses API using direct chat completions. + + For more information, see: https://platform.openai.com/docs/api-reference/responses + """ + + id: str = "gpt-4o" + name: str = "OpenAIResponses" + provider: str = "OpenAI" + supports_structured_outputs: bool = True + + # API configuration + api_key: Optional[str] = None + organization: Optional[str] = None + base_url: Optional[Union[str, httpx.URL]] = None + timeout: Optional[float] = None + max_retries: Optional[int] = None + default_headers: Optional[Dict[str, str]] = None + default_query: Optional[Dict[str, str]] = None + http_client: Optional[httpx.Client] = None + client_params: Optional[Dict[str, Any]] = None + + # Response parameters + temperature: Optional[float] = None + top_p: Optional[float] = None + max_output_tokens: Optional[int] = None + response_format: Optional[Dict[str, str]] = None + metadata: Optional[Dict[str, Any]] = None + store: Optional[bool] = None + reasoning_effort: Optional[str] = None + + # Built-in tools + web_search: bool = False + + # The role to map the message role to. + role_map = { + "system": "developer", + "user": "user", + "assistant": "assistant", + "tool": "tool", + } + + # OpenAI clients + client: Optional[OpenAI] = None + async_client: Optional[AsyncOpenAI] = None + + # Internal parameters. Not used for API requests + # Whether to use the structured outputs with this Model. + structured_outputs: bool = False + + def _get_client_params(self) -> Dict[str, Any]: + """ + Get client parameters for API requests. + + Returns: + Dict[str, Any]: Client parameters + """ + from os import getenv + + # Fetch API key from env if not already set + if not self.api_key: + self.api_key = getenv("OPENAI_API_KEY") + if not self.api_key: + logger.error("OPENAI_API_KEY not set. Please set the OPENAI_API_KEY environment variable.") + + # Define base client params + base_params = { + "api_key": self.api_key, + "organization": self.organization, + "base_url": self.base_url, + "timeout": self.timeout, + "max_retries": self.max_retries, + "default_headers": self.default_headers, + "default_query": self.default_query, + } + + # Create client_params dict with non-None values + client_params = {k: v for k, v in base_params.items() if v is not None} + + # Add additional client params if provided + if self.client_params: + client_params.update(self.client_params) + + return client_params + + def get_client(self) -> OpenAI: + """ + Returns an OpenAI client. + + Returns: + OpenAI: An instance of the OpenAI client. + """ + if self.client: + return self.client + + client_params: Dict[str, Any] = self._get_client_params() + if self.http_client is not None: + client_params["http_client"] = self.http_client + + self.client = OpenAI(**client_params) + return self.client + + def get_async_client(self) -> AsyncOpenAI: + """ + Returns an asynchronous OpenAI client. + + Returns: + AsyncOpenAI: An instance of the asynchronous OpenAI client. + """ + if self.async_client: + return self.async_client + + client_params: Dict[str, Any] = self._get_client_params() + if self.http_client: + client_params["http_client"] = self.http_client + else: + # Create a new async HTTP client with custom limits + client_params["http_client"] = httpx.AsyncClient( + limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100) + ) + + self.async_client = AsyncOpenAI(**client_params) + return self.async_client + + @property + def request_kwargs(self) -> Dict[str, Any]: + """ + Returns keyword arguments for API requests. + + Returns: + Dict[str, Any]: A dictionary of keyword arguments for API requests. + """ + # Define base request parameters + base_params = { + "temperature": self.temperature, + "top_p": self.top_p, + "max_output_tokens": self.max_output_tokens, + "metadata": self.metadata, + "store": self.store, + } + if self.reasoning_effort is not None: + base_params["reasoning"] = { + "effort": self.reasoning_effort, + } + + if self.response_format is not None: + if self.structured_outputs and isinstance(self.response_format, BaseModel): + schema = self.response_format.model_json_schema() + schema["additionalProperties"] = False + base_params["text"] = { + "format": { + "type": "json_schema", + "name": self.response_format.__name__, + "schema": schema, + "strict": True, + } + } + else: + # JSON mode + base_params["text"] = {"format": {"type": "json_object"}} + + # Filter out None values + request_params: Dict[str, Any] = {k: v for k, v in base_params.items() if v is not None} + + if self.web_search: + request_params.setdefault("tools", []) # type: ignore + request_params["tools"].append({"type": "web_search_preview"}) + + # Add tools + if self._functions is not None and len(self._functions) > 0: + request_params.setdefault("tools", []) # type: ignore + for function in self._functions.values(): + function_dict = function.to_dict() + for prop in function_dict["parameters"]["properties"].values(): + if isinstance(prop["type"], list): + prop["type"] = prop["type"][0] + request_params["tools"].append({"type": "function", **function_dict}) + if self.tool_choice is not None: + request_params["tool_choice"] = self.tool_choice + + return request_params + + def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]: + """ + Format a message into the format expected by OpenAI. + + Args: + message (Message): The message to format. + + Returns: + Dict[str, Any]: The formatted message. + """ + formatted_messages: List[Dict[str, Any]] = [] + for message in messages: + if message.role in ["user", "system"]: + message_dict: Dict[str, Any] = { + "role": self.role_map[message.role], + "content": message.content, + } + message_dict = {k: v for k, v in message_dict.items() if v is not None} + + # Ignore non-string message content + # because we assume that the images/audio are already added to the message + if message.images is not None and len(message.images) > 0: + # Ignore non-string message content + # because we assume that the images/audio are already added to the message + if isinstance(message.content, str): + message_dict["content"] = [{"type": "input_text", "text": message.content}] + if message.images is not None: + message_dict["content"].extend(images_to_message(images=message.images)) + + # TODO: File support + + if message.audio is not None: + logger.warning("Audio input is currently unsupported.") + + if message.videos is not None: + logger.warning("Video input is currently unsupported.") + + formatted_messages.append(message_dict) + + else: + # OpenAI expects the tool_calls to be None if empty, not an empty list + if message.tool_calls is not None and len(message.tool_calls) > 0: + for tool_call in message.tool_calls: + formatted_messages.append( + { + "type": "function_call", + "id": tool_call["id"], + "call_id": tool_call["call_id"], + "name": tool_call["function"]["name"], + "arguments": tool_call["function"]["arguments"], + "status": "completed", + } + ) + + if message.role == "tool": + formatted_messages.append( + {"type": "function_call_output", "call_id": message.tool_call_id, "output": message.content} + ) + return formatted_messages + + def invoke(self, messages: List[Message]) -> Response: + """ + Send a request to the OpenAI Responses API. + + Args: + messages (List[Message]): A list of messages to send to the model. + + Returns: + Response: The response from the API. + """ + try: + return self.get_client().responses.create( + model=self.id, + input=self._format_messages(messages), # type: ignore + **self.request_kwargs, + ) + except RateLimitError as e: + logger.error(f"Rate limit error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except APIConnectionError as e: + logger.error(f"API connection error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + except APIStatusError as e: + logger.error(f"API status error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except Exception as e: + logger.error(f"Error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + + async def ainvoke(self, messages: List[Message]) -> Response: + """ + Sends an asynchronous request to the OpenAI Responses API. + + Args: + messages (List[Message]): A list of messages to send to the model. + + Returns: + Response: The response from the API. + """ + try: + return await self.get_async_client().responses.create( + model=self.id, + input=self._format_messages(messages), # type: ignore + **self.request_kwargs, + ) + except RateLimitError as e: + logger.error(f"Rate limit error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except APIConnectionError as e: + logger.error(f"API connection error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + except APIStatusError as e: + logger.error(f"API status error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except Exception as e: + logger.error(f"Error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + + def invoke_stream(self, messages: List[Message]) -> Iterator[ResponseStreamEvent]: + """ + Send a streaming request to the OpenAI Responses API. + + Args: + messages (List[Message]): A list of messages to send to the model. + + Returns: + Iterator[ResponseStreamEvent]: An iterator of response stream events. + """ + try: + yield from self.get_client().responses.create( + model=self.id, + input=self._format_messages(messages), # type: ignore + stream=True, + **self.request_kwargs, + ) # type: ignore + except RateLimitError as e: + logger.error(f"Rate limit error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except APIConnectionError as e: + logger.error(f"API connection error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + except APIStatusError as e: + logger.error(f"API status error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except Exception as e: + logger.error(f"Error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + + async def ainvoke_stream(self, messages: List[Message]) -> AsyncIterator[ResponseStreamEvent]: + """ + Sends an asynchronous streaming request to the OpenAI Responses API. + + Args: + messages (List[Message]): A list of messages to send to the model. + + Returns: + Any: An asynchronous iterator of chat completion chunks. + """ + try: + async_stream = await self.get_async_client().responses.create( + model=self.id, + input=self._format_messages(messages), # type: ignore + stream=True, + **self.request_kwargs, + ) + async for chunk in async_stream: # type: ignore + yield chunk + except RateLimitError as e: + logger.error(f"Rate limit error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except APIConnectionError as e: + logger.error(f"API connection error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + except APIStatusError as e: + logger.error(f"API status error from OpenAI API: {e}") + error_message = e.response.json().get("error", {}) + error_message = ( + error_message.get("message", "Unknown model error") + if isinstance(error_message, dict) + else error_message + ) + raise ModelProviderError( + message=error_message, + status_code=e.response.status_code, + model_name=self.name, + model_id=self.id, + ) from e + except Exception as e: + logger.error(f"Error from OpenAI API: {e}") + raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e + + def format_function_call_results( + self, messages: List[Message], function_call_results: List[Message], tool_call_ids: List[str] + ) -> None: + """ + Handle the results of function calls. + + Args: + messages (List[Message]): The list of conversation messages. + function_call_results (List[Message]): The results of the function calls. + tool_ids (List[str]): The tool ids. + """ + if len(function_call_results) > 0: + for _fc_message_index, _fc_message in enumerate(function_call_results): + _fc_message.tool_call_id = tool_call_ids[_fc_message_index] + messages.append(_fc_message) + + def parse_provider_response(self, response: Response) -> ModelResponse: + """ + Parse the OpenAI response into a ModelResponse. + + Args: + response: Response from invoke() method + + Returns: + ModelResponse: Parsed response data + """ + model_response = ModelResponse() + + if response.error: + raise ModelProviderError( + message=response.error.message, + model_name=self.name, + model_id=self.id, + ) + + # Add role + model_response.role = "assistant" + for output in response.output: + if output.type == "message": + # TODO: Support citations/annotations + model_response.content = response.output_text + elif output.type == "function_call": + if model_response.tool_calls is None: + model_response.tool_calls = [] + model_response.tool_calls.append( + { + "id": output.id, + "call_id": output.call_id, + "type": "function", + "function": { + "name": output.name, + "arguments": output.arguments, + }, + } + ) + + model_response.extra = model_response.extra or {} + model_response.extra.setdefault("tool_call_ids", []).append(output.call_id) + + # i.e. we asked for reasoning, so we need to add the reasoning content + if self.reasoning_effort: + model_response.reasoning_content = response.output_text + + if response.usage is not None: + model_response.response_usage = response.usage + + return model_response + + def _process_stream_response( + self, + stream_event: ResponseStreamEvent, + assistant_message: Message, + stream_data: MessageData, + tool_use: Dict[str, Any], + ) -> Tuple[Optional[ModelResponse], Dict[str, Any]]: + """ + Common handler for processing stream responses from Cohere. + + Args: + response: The streamed response from Cohere + assistant_message: The assistant message being built + stream_data: Data accumulated during streaming + tool_use: Current tool use data being built + + Returns: + Tuple containing the ModelResponse to yield and updated tool_use dict + """ + model_response = None + + if stream_event.type == "response.created": + # Update metrics + if not assistant_message.metrics.time_to_first_token: + assistant_message.metrics.set_time_to_first_token() + + elif stream_event.type == "response.output_text.delta": + model_response = ModelResponse() + # Add content + model_response.content = stream_event.delta + stream_data.response_content += stream_event.delta + + if self.reasoning_effort: + model_response.reasoning_content = stream_event.delta + stream_data.response_thinking += stream_event.delta + + elif stream_event.type == "response.output_item.added": + item = stream_event.item + if item.type == "function_call": + tool_use = { + "id": item.id, + "call_id": item.call_id, + "type": "function", + "function": { + "name": item.name, + "arguments": item.arguments, + }, + } + + elif stream_event.type == "response.function_call_arguments.delta": + tool_use["function"]["arguments"] += stream_event.delta + + elif stream_event.type == "response.output_item.done" and tool_use: + model_response = ModelResponse() + model_response.tool_calls = [tool_use] + if assistant_message.tool_calls is None: + assistant_message.tool_calls = [] + assistant_message.tool_calls.append(tool_use) + + stream_data.extra = stream_data.extra or {} + stream_data.extra.setdefault("tool_call_ids", []).append(tool_use["call_id"]) + tool_use = {} + + elif stream_event.type == "response.completed": + model_response = ModelResponse() + # Add usage metrics if present + if stream_event.response.usage is not None: + model_response.response_usage = stream_event.response.usage + + self._add_usage_metrics_to_assistant_message( + assistant_message=assistant_message, + response_usage=model_response.response_usage, + ) + + return model_response, tool_use + + def process_response_stream( + self, messages: List[Message], assistant_message: Message, stream_data: MessageData + ) -> Iterator[ModelResponse]: + """Process the synchronous response stream.""" + tool_use: Dict[str, Any] = {} + + for stream_event in self.invoke_stream(messages=messages): + model_response, tool_use = self._process_stream_response( + stream_event=stream_event, + assistant_message=assistant_message, + stream_data=stream_data, + tool_use=tool_use, + ) + if model_response is not None: + yield model_response + + async def aprocess_response_stream( + self, messages: List[Message], assistant_message: Message, stream_data: MessageData + ) -> AsyncIterator[ModelResponse]: + """Process the asynchronous response stream.""" + tool_use: Dict[str, Any] = {} + + async for stream_event in self.ainvoke_stream(messages=messages): + model_response, tool_use = self._process_stream_response( + stream_event=stream_event, + assistant_message=assistant_message, + stream_data=stream_data, + tool_use=tool_use, + ) + if model_response is not None: + yield model_response + + def parse_provider_response_delta(self, response: Any) -> ModelResponse: # type: ignore + pass diff --git a/libs/agno/agno/utils/openai_responses.py b/libs/agno/agno/utils/openai_responses.py new file mode 100644 index 0000000000..904c671804 --- /dev/null +++ b/libs/agno/agno/utils/openai_responses.py @@ -0,0 +1,93 @@ +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence, Union + +from agno.media import Image +from agno.utils.log import logger + + +def _process_bytes_image(image: bytes) -> Dict[str, Any]: + """Process bytes image data.""" + import base64 + + base64_image = base64.b64encode(image).decode("utf-8") + image_url = f"data:image/jpeg;base64,{base64_image}" + return {"type": "input_image", "image_url": image_url} + + +def _process_image_path(image_path: Union[Path, str]) -> Dict[str, Any]: + """Process image ( file path).""" + # Process local file image + import base64 + import mimetypes + + path = image_path if isinstance(image_path, Path) else Path(image_path) + if not path.exists(): + raise FileNotFoundError(f"Image file not found: {image_path}") + + mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg" + with open(path, "rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode("utf-8") + image_url = f"data:{mime_type};base64,{base64_image}" + return {"type": "input_image", "image_url": image_url} + + +def _process_image_url(image_url: str) -> Dict[str, Any]: + """Process image (base64 or URL).""" + + if image_url.startswith("data:image") or image_url.startswith(("http://", "https://")): + return {"type": "input_image", "image_url": image_url} + else: + raise ValueError("Image URL must start with 'data:image' or 'http(s)://'.") + + +def _process_image(image: Image) -> Optional[Dict[str, Any]]: + """Process an image based on the format.""" + + if image.url is not None: + image_payload = _process_image_url(image.url) + + elif image.filepath is not None: + image_payload = _process_image_path(image.filepath) + + elif image.content is not None: + image_payload = _process_bytes_image(image.content) + + else: + logger.warning(f"Unsupported image format: {image}") + return None + + if image.detail: + image_payload["image_url"]["detail"] = image.detail + + return image_payload + + +def images_to_message(images: Sequence[Image]) -> List[Dict[str, Any]]: + """ + Add images to a message for the model. By default, we use the OpenAI image format but other Models + can override this method to use a different image format. + + Args: + images: Sequence of images in various formats: + - str: base64 encoded image, URL, or file path + - Dict: pre-formatted image data + - bytes: raw image data + + Returns: + Message content with images added in the format expected by the model + """ + + # Create a default message content with text + image_messages: List[Dict[str, Any]] = [] + + # Add images to the message content + for image in images: + try: + image_data = _process_image(image) + if image_data: + image_messages.append(image_data) + except Exception as e: + logger.error(f"Failed to process image: {str(e)}") + continue + + return image_messages diff --git a/libs/agno/agno/workspace/settings.py b/libs/agno/agno/workspace/settings.py index a4f6adc91d..a6aa5509f9 100644 --- a/libs/agno/agno/workspace/settings.py +++ b/libs/agno/agno/workspace/settings.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import List, Optional -from pydantic import field_validator, ValidationInfo +from pydantic import ValidationInfo, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict from agno.api.schemas.workspace import WorkspaceSchema diff --git a/libs/agno/tests/integration/models/openai/test_basic.py b/libs/agno/tests/integration/models/openai/chat/test_basic.py similarity index 100% rename from libs/agno/tests/integration/models/openai/test_basic.py rename to libs/agno/tests/integration/models/openai/chat/test_basic.py diff --git a/libs/agno/tests/integration/models/openai/test_multimodal.py b/libs/agno/tests/integration/models/openai/chat/test_multimodal.py similarity index 100% rename from libs/agno/tests/integration/models/openai/test_multimodal.py rename to libs/agno/tests/integration/models/openai/chat/test_multimodal.py diff --git a/libs/agno/tests/integration/models/openai/test_tool_use.py b/libs/agno/tests/integration/models/openai/chat/test_tool_use.py similarity index 100% rename from libs/agno/tests/integration/models/openai/test_tool_use.py rename to libs/agno/tests/integration/models/openai/chat/test_tool_use.py diff --git a/libs/agno/tests/integration/models/openai/responses/__init__.py b/libs/agno/tests/integration/models/openai/responses/__init__.py new file mode 100644 index 0000000000..aabeb362b8 --- /dev/null +++ b/libs/agno/tests/integration/models/openai/responses/__init__.py @@ -0,0 +1 @@ +"""Integration tests for OpenAI Responses API.""" diff --git a/libs/agno/tests/integration/models/openai/responses/test_basic.py b/libs/agno/tests/integration/models/openai/responses/test_basic.py new file mode 100644 index 0000000000..a158fa02d9 --- /dev/null +++ b/libs/agno/tests/integration/models/openai/responses/test_basic.py @@ -0,0 +1,227 @@ +import pytest +from pydantic import BaseModel, Field + +from agno.agent import Agent, RunResponse # noqa +from agno.exceptions import ModelProviderError +from agno.memory import AgentMemory +from agno.memory.classifier import MemoryClassifier +from agno.memory.db.sqlite import SqliteMemoryDb +from agno.memory.manager import MemoryManager +from agno.memory.summarizer import MemorySummarizer +from agno.models.openai import OpenAIResponses +from agno.storage.agent.sqlite import SqliteAgentStorage +from agno.tools.duckduckgo import DuckDuckGoTools + + +def _assert_metrics(response: RunResponse): + """ + Assert that the response metrics are valid and consistent. + + Args: + response: The RunResponse to validate metrics for + """ + input_tokens = response.metrics.get("input_tokens", []) + output_tokens = response.metrics.get("output_tokens", []) + total_tokens = response.metrics.get("total_tokens", []) + + assert sum(input_tokens) > 0 + assert sum(output_tokens) > 0 + assert sum(total_tokens) > 0 + assert sum(total_tokens) == sum(input_tokens) + sum(output_tokens) + + +def test_basic(): + """Test basic functionality of the OpenAIResponses model.""" + agent = Agent(model=OpenAIResponses(id="gpt-4o-mini"), markdown=True, telemetry=False, monitoring=False) + + # Run a simple query + response: RunResponse = agent.run("Share a 2 sentence horror story") + + assert response.content is not None + assert len(response.messages) == 3 + assert [m.role for m in response.messages] == ["system", "user", "assistant"] + + _assert_metrics(response) + + +def test_basic_stream(): + """Test basic streaming functionality of the OpenAIResponses model.""" + agent = Agent(model=OpenAIResponses(id="gpt-4o-mini"), markdown=True, telemetry=False, monitoring=False) + + response_stream = agent.run("Share a 2 sentence horror story", stream=True) + + # Verify it's an iterator + assert hasattr(response_stream, "__iter__") + + responses = list(response_stream) + assert len(responses) > 0 + for response in responses: + assert isinstance(response, RunResponse) + assert response.content is not None + + _assert_metrics(agent.run_response) + + +@pytest.mark.asyncio +async def test_async_basic(): + """Test basic async functionality of the OpenAIResponses model.""" + agent = Agent(model=OpenAIResponses(id="gpt-4o-mini"), markdown=True, telemetry=False, monitoring=False) + + response = await agent.arun("Share a 2 sentence horror story") + + assert response.content is not None + assert len(response.messages) == 3 + assert [m.role for m in response.messages] == ["system", "user", "assistant"] + _assert_metrics(response) + + +@pytest.mark.asyncio +async def test_async_basic_stream(): + """Test basic async streaming functionality of the OpenAIResponses model.""" + agent = Agent(model=OpenAIResponses(id="gpt-4o-mini"), markdown=True, telemetry=False, monitoring=False) + + response_stream = await agent.arun("Share a 2 sentence horror story", stream=True) + + async for response in response_stream: + assert isinstance(response, RunResponse) + assert response.content is not None + _assert_metrics(agent.run_response) + + +def test_exception_handling(): + """Test proper error handling for invalid model IDs.""" + agent = Agent(model=OpenAIResponses(id="gpt-100"), markdown=True, telemetry=False, monitoring=False) + + with pytest.raises(ModelProviderError) as exc: + agent.run("Share a 2 sentence horror story") + + assert exc.value.model_name == "OpenAIResponses" + assert exc.value.model_id == "gpt-100" + assert exc.value.status_code == 400 + + +def test_with_memory(): + """Test that the model retains context from previous interactions.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + add_history_to_messages=True, + num_history_responses=5, + markdown=True, + telemetry=False, + monitoring=False, + ) + + # First interaction + response1 = agent.run("My name is John Smith") + assert response1.content is not None + + # Second interaction should remember the name + response2 = agent.run("What's my name?") + assert "John Smith" in response2.content + + # Verify memories were created + assert len(agent.memory.messages) == 5 + assert [m.role for m in agent.memory.messages] == ["system", "user", "assistant", "user", "assistant"] + + # Test metrics structure and types + _assert_metrics(response2) + + +def test_structured_output(): + """Test structured output with Pydantic models.""" + + class MovieScript(BaseModel): + title: str = Field(..., description="Movie title") + genre: str = Field(..., description="Movie genre") + plot: str = Field(..., description="Brief plot summary") + + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), response_model=MovieScript, telemetry=False, monitoring=False + ) + + response = agent.run("Create a movie about time travel") + + # Verify structured output + assert isinstance(response.content, MovieScript) + assert response.content.title is not None + assert response.content.genre is not None + assert response.content.plot is not None + + +def test_structured_output_native(): + """Test native structured output with the responses API.""" + + class MovieScript(BaseModel): + title: str = Field(..., description="Movie title") + genre: str = Field(..., description="Movie genre") + plot: str = Field(..., description="Brief plot summary") + + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + response_model=MovieScript, + structured_outputs=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("Create a movie about time travel") + + # Verify structured output + assert isinstance(response.content, MovieScript) + assert response.content.title is not None + assert response.content.genre is not None + assert response.content.plot is not None + + +def test_history(): + """Test conversation history in the agent.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + storage=SqliteAgentStorage(table_name="responses_agent_sessions", db_file="tmp/agent_storage.db"), + add_history_to_messages=True, + telemetry=False, + monitoring=False, + ) + agent.run("Hello") + assert len(agent.run_response.messages) == 2 + agent.run("Hello 2") + assert len(agent.run_response.messages) == 4 + agent.run("Hello 3") + assert len(agent.run_response.messages) == 6 + agent.run("Hello 4") + assert len(agent.run_response.messages) == 8 + + +def test_persistent_memory(): + """Test persistent memory with the Responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[DuckDuckGoTools()], + markdown=True, + show_tool_calls=True, + telemetry=False, + monitoring=False, + instructions=[ + "You can search the internet with DuckDuckGo.", + ], + storage=SqliteAgentStorage(table_name="responses_agent", db_file="tmp/agent_storage.db"), + # Adds the current date and time to the instructions + add_datetime_to_instructions=True, + # Adds the history of the conversation to the messages + add_history_to_messages=True, + # Number of history responses to add to the messages + num_history_responses=15, + memory=AgentMemory( + db=SqliteMemoryDb(db_file="tmp/responses_agent_memory.db"), + create_user_memories=True, + create_session_summary=True, + update_user_memories_after_run=True, + update_session_summary_after_run=True, + classifier=MemoryClassifier(model=OpenAIResponses(id="gpt-4o-mini")), + summarizer=MemorySummarizer(model=OpenAIResponses(id="gpt-4o-mini")), + manager=MemoryManager(model=OpenAIResponses(id="gpt-4o-mini")), + ), + ) + + response = agent.run("What is current news in France?") + assert response.content is not None diff --git a/libs/agno/tests/integration/models/openai/responses/test_multimodal.py b/libs/agno/tests/integration/models/openai/responses/test_multimodal.py new file mode 100644 index 0000000000..994ba2794b --- /dev/null +++ b/libs/agno/tests/integration/models/openai/responses/test_multimodal.py @@ -0,0 +1,48 @@ +from agno.agent.agent import Agent +from agno.media import Image +from agno.models.openai.responses import OpenAIResponses +from agno.tools.duckduckgo import DuckDuckGoTools + + +def test_image_input(): + """Test image input with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[DuckDuckGoTools()], + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run( + "Tell me about this image and give me the latest news about it.", + images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")], + ) + + assert "golden" in response.content.lower() + assert "bridge" in response.content.lower() + assert "san francisco" in response.content.lower() + + +def test_multimodal_with_tools(): + """Test multimodal input with tool use in the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[DuckDuckGoTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run( + "Tell me about this bridge and look up its current status.", + images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")], + ) + + # Verify content includes image analysis and tool usage + assert "golden" in response.content.lower() + assert "bridge" in response.content.lower() + + # Check for tool call + assert any(msg.tool_calls for msg in response.messages if hasattr(msg, "tool_calls") and msg.tool_calls) diff --git a/libs/agno/tests/integration/models/openai/responses/test_tool_use.py b/libs/agno/tests/integration/models/openai/responses/test_tool_use.py new file mode 100644 index 0000000000..5ba59d471b --- /dev/null +++ b/libs/agno/tests/integration/models/openai/responses/test_tool_use.py @@ -0,0 +1,284 @@ +import pytest +from pydantic import BaseModel, Field + +from agno.agent import Agent, RunResponse # noqa +from agno.models.openai import OpenAIResponses +from agno.tools.duckduckgo import DuckDuckGoTools +from agno.tools.exa import ExaTools +from agno.tools.yfinance import YFinanceTools + + +def test_tool_use(): + """Test basic tool usage with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What is the current price of TSLA?") + + # Verify tool usage + assert any(msg.tool_calls for msg in response.messages) + assert response.content is not None + assert "TSLA" in response.content + + +def test_tool_use_stream(): + """Test streaming with tool use in the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response_stream = agent.run("What is the current price of TSLA?", stream=True) + + responses = [] + tool_call_seen = False + + for chunk in response_stream: + assert isinstance(chunk, RunResponse) + responses.append(chunk) + if chunk.tools: + if any(tc.get("tool_name") for tc in chunk.tools): + tool_call_seen = True + + assert len(responses) > 0 + assert tool_call_seen, "No tool calls observed in stream" + assert any("TSLA" in r.content for r in responses if r.content) + + +@pytest.mark.asyncio +async def test_async_tool_use(): + """Test async tool use with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = await agent.arun("What is the current price of TSLA?") + + # Verify tool usage + assert any(msg.tool_calls for msg in response.messages if msg.role == "assistant") + assert response.content is not None + assert "TSLA" in response.content + + +@pytest.mark.asyncio +async def test_async_tool_use_stream(): + """Test async streaming with tool use in the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response_stream = await agent.arun("What is the current price of TSLA?", stream=True) + + responses = [] + tool_call_seen = False + + async for chunk in response_stream: + assert isinstance(chunk, RunResponse) + responses.append(chunk) + if chunk.tools: + if any(tc.get("tool_name") for tc in chunk.tools): + tool_call_seen = True + + assert len(responses) > 0 + assert tool_call_seen, "No tool calls observed in stream" + assert any("TSLA" in r.content for r in responses if r.content) + + +def test_tool_use_with_native_structured_outputs(): + """Test native structured outputs with tool use in the responses API.""" + + class StockPrice(BaseModel): + price: float = Field(..., description="The price of the stock") + currency: str = Field(..., description="The currency of the stock") + + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + response_model=StockPrice, + structured_outputs=True, + telemetry=False, + monitoring=False, + ) + response = agent.run("What is the current price of TSLA?") + assert isinstance(response.content, StockPrice) + assert response.content is not None + assert response.content.price is not None + assert response.content.currency is not None + + +def test_parallel_tool_calls(): + """Test parallel tool calls with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What is the current price of TSLA and AAPL?") + + # Verify tool usage + tool_calls = [msg.tool_calls for msg in response.messages if msg.tool_calls] + assert len(tool_calls) >= 1 # At least one message has tool calls + assert sum(len(calls) for calls in tool_calls) == 2 # Total of 2 tool calls made + assert response.content is not None + assert "TSLA" in response.content and "AAPL" in response.content + + +def test_multiple_tool_calls(): + """Test multiple different tool types with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[YFinanceTools(), DuckDuckGoTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What is the current price of TSLA and what is the latest news about it?") + + # Verify tool usage + tool_calls = [msg.tool_calls for msg in response.messages if msg.tool_calls] + assert len(tool_calls) >= 1 # At least one message has tool calls + assert sum(len(calls) for calls in tool_calls) == 2 # Total of 2 tool calls made + assert response.content is not None + assert "TSLA" in response.content and "latest news" in response.content.lower() + + +def test_tool_call_custom_tool_no_parameters(): + """Test custom tool with no parameters with the responses API.""" + + def get_the_weather(): + return "It is currently 70 degrees and cloudy in Tokyo" + + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[get_the_weather], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What is the weather in Tokyo?") + + # Verify tool usage + assert any(msg.tool_calls for msg in response.messages) + assert response.content is not None + assert "70" in response.content + + +def test_tool_call_list_parameters(): + """Test tool with list parameters with the responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini"), + tools=[ExaTools(answer=False, find_similar=False)], + instructions="Use a single tool call if possible", + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run( + "What are the papers at https://arxiv.org/pdf/2307.06435 and https://arxiv.org/pdf/2502.09601 about?" + ) + + # Verify tool usage + assert any(msg.tool_calls for msg in response.messages) + tool_calls = [] + for msg in response.messages: + if msg.tool_calls: + tool_calls.extend(msg.tool_calls) + for call in tool_calls: + assert call["function"]["name"] in ["get_contents", "exa_answer"] + assert response.content is not None + + +def test_web_search_built_in_tool(): + """Test the built-in web search tool in the Responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini", web_search=True), + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What was the most recent Olympic Games and who won the most medals?") + + assert response.content is not None + assert "medal" in response.content.lower() + # Check for typical web search result indicators + assert any(term in response.content.lower() for term in ["olympic", "games", "gold", "medal"]) + + +def test_web_search_built_in_tool_stream(): + """Test the built-in web search tool in the Responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini", web_search=True), + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response_stream = agent.run("What was the most recent Olympic Games and who won the most medals?", stream=True) + + responses = [] + + responses = list(response_stream) + assert len(responses) > 0 + final_response = "" + for response in responses: + assert isinstance(response, RunResponse) + assert response.content is not None + final_response += response.content + + assert "medal" in final_response.lower() + assert any(term in final_response.lower() for term in ["olympic", "games", "gold", "medal"]) + + +def test_web_search_built_in_tool_with_other_tools(): + """Test the built-in web search tool in the Responses API.""" + agent = Agent( + model=OpenAIResponses(id="gpt-4o-mini", web_search=True), + tools=[YFinanceTools()], + show_tool_calls=True, + markdown=True, + telemetry=False, + monitoring=False, + ) + + response = agent.run("What is the current price of TSLA and the latest news about it?") + + tool_calls = [msg.tool_calls for msg in response.messages if msg.tool_calls] + assert len(tool_calls) >= 1 # At least one message has tool calls + assert response.content is not None + assert "TSLA" in response.content + assert "news" in response.content.lower()