From 87cdb399e63c92c3118d379c6282691531026d49 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Sun, 9 Mar 2025 21:10:03 +0200 Subject: [PATCH 1/4] Add cohere vision --- cookbook/models/{openai => }/.gitignore | 0 cookbook/models/cohere/image_agent.py | 18 +++ cookbook/models/cohere/image_agent_bytes.py | 24 ++++ .../models/cohere/image_agent_local_file.py | 20 ++++ libs/agno/agno/models/cohere/chat.py | 109 +++++++++++++----- .../models/cohere/test_multimodal.py | 74 ++++++++++++ .../models/huggingface/__init__.py | 0 7 files changed, 217 insertions(+), 28 deletions(-) rename cookbook/models/{openai => }/.gitignore (100%) create mode 100644 cookbook/models/cohere/image_agent.py create mode 100644 cookbook/models/cohere/image_agent_bytes.py create mode 100644 cookbook/models/cohere/image_agent_local_file.py create mode 100644 libs/agno/tests/integration/models/cohere/test_multimodal.py create mode 100644 libs/agno/tests/integration/models/huggingface/__init__.py diff --git a/cookbook/models/openai/.gitignore b/cookbook/models/.gitignore similarity index 100% rename from cookbook/models/openai/.gitignore rename to cookbook/models/.gitignore diff --git a/cookbook/models/cohere/image_agent.py b/cookbook/models/cohere/image_agent.py new file mode 100644 index 0000000000..9086ae0b9d --- /dev/null +++ b/cookbook/models/cohere/image_agent.py @@ -0,0 +1,18 @@ +from agno.agent import Agent +from agno.media import Image +from agno.models.cohere import Cohere + +agent = Agent( + model=Cohere(id="c4ai-aya-vision-8b"), + markdown=True, +) + +agent.print_response( + "Tell me about this image.", + images=[ + Image( + url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg" + ) + ], + stream=True, +) diff --git a/cookbook/models/cohere/image_agent_bytes.py b/cookbook/models/cohere/image_agent_bytes.py new file mode 100644 index 0000000000..18ee6afecf --- /dev/null +++ b/cookbook/models/cohere/image_agent_bytes.py @@ -0,0 +1,24 @@ +from pathlib import Path + +from agno.agent import Agent +from agno.media import Image +from agno.models.cohere.chat import Cohere + +agent = Agent( + model=Cohere(id="c4ai-aya-vision-8b"), + markdown=True, +) + +image_path = Path(__file__).parent.joinpath("sample.jpg") + +# Read the image file content as bytes +with open(image_path, "rb") as img_file: + image_bytes = img_file.read() + +agent.print_response( + "Tell me about this image.", + images=[ + Image(content=image_bytes), + ], + stream=True, +) diff --git a/cookbook/models/cohere/image_agent_local_file.py b/cookbook/models/cohere/image_agent_local_file.py new file mode 100644 index 0000000000..229f9796d7 --- /dev/null +++ b/cookbook/models/cohere/image_agent_local_file.py @@ -0,0 +1,20 @@ +from pathlib import Path + +from agno.agent import Agent +from agno.media import Image +from agno.models.cohere.chat import Cohere + +agent = Agent( + model=Cohere(id="c4ai-aya-vision-8b"), + markdown=True, +) + +image_path = Path(__file__).parent.joinpath("sample.jpg") + +agent.print_response( + "Tell me about this image.", + images=[ + Image(filepath=image_path), + ], + stream=True, +) diff --git a/libs/agno/agno/models/cohere/chat.py b/libs/agno/agno/models/cohere/chat.py index 375ebee64d..beb2e1b1e8 100644 --- a/libs/agno/agno/models/cohere/chat.py +++ b/libs/agno/agno/models/cohere/chat.py @@ -1,12 +1,15 @@ from dataclasses import dataclass from os import getenv -from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Tuple +from pathlib import Path +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Sequence, Tuple from agno.exceptions import ModelProviderError +from agno.media import Image from agno.models.base import MessageData, Model from agno.models.message import Message from agno.models.response import ModelResponse from agno.utils.log import logger +from agno.utils.openai import images_to_message try: from cohere import AsyncClientV2 as CohereAsyncClient @@ -17,6 +20,79 @@ raise ImportError("`cohere` not installed. Please install using `pip install cohere`") +def _format_images_for_message(message: Message, images: Sequence[Image]) -> List[Dict[str, Any]]: + """ + Format an image into the format expected by WatsonX. + """ + + # Create a default message content with text + message_content_with_image: List[Dict[str, Any]] = [{"type": "text", "text": message.content}] + + # Add images to the message content + for image in images: + try: + if image.content is not None: + image_content = image.content + elif image.url is not None: + image_content = image.image_url_content + elif image.filepath is not None: + if isinstance(image.filepath, Path): + image_content = image.filepath.read_bytes() + else: + with open(image.filepath, "rb") as f: + image_content = f.read() + else: + logger.warning(f"Unsupported image format: {image}") + continue + + if image_content is not None: + import base64 + + base64_image = base64.b64encode(image_content).decode("utf-8") + image_url = f"data:image/jpeg;base64,{base64_image}" + image_payload = {"type": "image_url", "image_url": {"url": image_url}} + message_content_with_image.append(image_payload) + + except Exception as e: + logger.error(f"Failed to process image: {str(e)}") + + # Update the message content with the images + return message_content_with_image + + +def _format_messages(messages: List[Message]) -> List[Dict[str, Any]]: + """ + Format messages for the Cohere API. + + Args: + messages (List[Message]): The list of messages. + + Returns: + List[Dict[str, Any]]: The formatted messages. + """ + formatted_messages = [] + for message in messages: + message_dict = { + "role": message.role, + "content": message.content, + "name": message.name, + "tool_call_id": message.tool_call_id, + "tool_calls": message.tool_calls, + } + + if message.images is not None and len(message.images) > 0: + + # Ignore non-string message content + if isinstance(message.content, str): + message_content_with_image = _format_images_for_message(message=message, images=message.images) + if len(message_content_with_image) > 1: + message_dict["content"] = message_content_with_image + + message_dict = {k: v for k, v in message_dict.items() if v is not None} + formatted_messages.append(message_dict) + return formatted_messages + + @dataclass class Cohere(Model): id: str = "command-r-plus" @@ -116,29 +192,6 @@ def request_kwargs(self) -> Dict[str, Any]: _request_params.update(self.request_params) return _request_params - def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]: - """ - Format messages for the Cohere API. - - Args: - messages (List[Message]): The list of messages. - - Returns: - List[Dict[str, Any]]: The formatted messages. - """ - formatted_messages = [] - for message in messages: - message_dict = { - "role": message.role, - "content": message.content, - "name": message.name, - "tool_call_id": message.tool_call_id, - "tool_calls": message.tool_calls, - } - message_dict = {k: v for k, v in message_dict.items() if v is not None} - formatted_messages.append(message_dict) - return formatted_messages - def invoke(self, messages: List[Message]) -> ChatResponse: """ Invoke a non-streamed chat response from the Cohere API. @@ -153,7 +206,7 @@ def invoke(self, messages: List[Message]) -> ChatResponse: request_kwargs = self.request_kwargs try: - return self.get_client().chat(model=self.id, messages=self._format_messages(messages), **request_kwargs) # type: ignore + return self.get_client().chat(model=self.id, messages=_format_messages(messages), **request_kwargs) # type: ignore except Exception as e: logger.error(f"Unexpected error calling Cohere API: {str(e)}") raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e @@ -173,7 +226,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[StreamedChatRespons try: return self.get_client().chat_stream( model=self.id, - messages=self._format_messages(messages), # type: ignore + messages=_format_messages(messages), # type: ignore **request_kwargs, ) except Exception as e: @@ -195,7 +248,7 @@ async def ainvoke(self, messages: List[Message]) -> ChatResponse: try: return await self.get_async_client().chat( model=self.id, - messages=self._format_messages(messages), # type: ignore + messages=_format_messages(messages), # type: ignore **request_kwargs, ) except Exception as e: @@ -217,7 +270,7 @@ async def ainvoke_stream(self, messages: List[Message]) -> AsyncIterator[Streame try: async for response in self.get_async_client().chat_stream( model=self.id, - messages=self._format_messages(messages), # type: ignore + messages=_format_messages(messages), # type: ignore **request_kwargs, ): yield response diff --git a/libs/agno/tests/integration/models/cohere/test_multimodal.py b/libs/agno/tests/integration/models/cohere/test_multimodal.py new file mode 100644 index 0000000000..cbce1019c6 --- /dev/null +++ b/libs/agno/tests/integration/models/cohere/test_multimodal.py @@ -0,0 +1,74 @@ +from pathlib import Path +import pytest + +from agno.agent.agent import Agent +from agno.media import Audio, Image +from agno.models.cohere.chat import Cohere +from agno.tools.duckduckgo import DuckDuckGoTools + + +def test_image_input(): + agent = Agent( + model=Cohere(id="c4ai-aya-vision-8b"), + add_history_to_messages=True, + markdown=True, + telemetry=False, + monitoring=False + ) + + response = agent.run( + "Tell me about this image.", + images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")], + ) + + assert "golden" in response.content.lower() + + # Just check it doesn't break on subsequent messages + response = agent.run("Where can I find more information?") + assert [message.role for message in response.messages] == ["system", "user", "assistant", "user", "assistant"] + + +def test_image_input_bytes(): + agent = Agent(model=Cohere(id="c4ai-aya-vision-8b"), telemetry=False, monitoring=False) + + image_path = Path(__file__).parent.joinpath("../sample_image.jpg") + + # Read the image file content as bytes + with open(image_path, "rb") as img_file: + image_bytes = img_file.read() + + response = agent.run( + "Tell me about this image.", + images=[Image(content=image_bytes)], + ) + + assert "golden" in response.content.lower() + assert "bridge" in response.content.lower() + + +def test_image_input_local_file(): + agent = Agent(model=Cohere(id="c4ai-aya-vision-8b"), telemetry=False, monitoring=False) + + image_path = Path(__file__).parent.joinpath("../sample_image.jpg") + + response = agent.run( + "Tell me about this image.", + images=[Image(filepath=image_path)], + ) + + assert "golden" in response.content.lower() + assert "bridge" in response.content.lower() + + +@pytest.mark.skip(reason="Image with tool call is not supported yet.") +def test_image_input_with_tool_call(): + agent = Agent( + model=Cohere(id="c4ai-aya-vision-8b"), tools=[DuckDuckGoTools()], markdown=True, telemetry=False, monitoring=False + ) + + response = agent.run( + "Tell me about this image and give me the latest news about it.", + images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")], + ) + + assert "golden" in response.content.lower() \ No newline at end of file diff --git a/libs/agno/tests/integration/models/huggingface/__init__.py b/libs/agno/tests/integration/models/huggingface/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 96932c9af0147ffe510a7f374118a29ed69f6fb3 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Mon, 10 Mar 2025 09:54:50 +0200 Subject: [PATCH 2/4] Cleanup --- .../examples/apps/github_mcp_agent/agents.py | 10 ++-- .../examples/apps/github_mcp_agent/app.py | 54 +++++++++++-------- libs/agno/agno/models/cohere/chat.py | 2 - .../models/cohere/test_multimodal.py | 19 ++++--- 4 files changed, 51 insertions(+), 34 deletions(-) diff --git a/cookbook/examples/apps/github_mcp_agent/agents.py b/cookbook/examples/apps/github_mcp_agent/agents.py index e25f28db24..f6c74c8e8b 100644 --- a/cookbook/examples/apps/github_mcp_agent/agents.py +++ b/cookbook/examples/apps/github_mcp_agent/agents.py @@ -1,27 +1,29 @@ import os from textwrap import dedent + from agno.agent import Agent from agno.tools.mcp import MCPTools from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client + async def run_github_agent(message): if not os.getenv("GITHUB_TOKEN"): return "Error: GitHub token not provided" - + try: server_params = StdioServerParameters( command="npx", args=["-y", "@modelcontextprotocol/server-github"], ) - + # Create client session async with stdio_client(server_params) as (read, write): async with ClientSession(read, write) as session: # Initialize MCP toolkit mcp_tools = MCPTools(session=session) await mcp_tools.initialize() - + # Create agent agent = Agent( tools=[mcp_tools], @@ -36,7 +38,7 @@ async def run_github_agent(message): markdown=True, show_tool_calls=True, ) - + # Run agent response = await agent.arun(message) return response.content diff --git a/cookbook/examples/apps/github_mcp_agent/app.py b/cookbook/examples/apps/github_mcp_agent/app.py index 4e5e842d18..5cb153927d 100644 --- a/cookbook/examples/apps/github_mcp_agent/app.py +++ b/cookbook/examples/apps/github_mcp_agent/app.py @@ -1,49 +1,58 @@ import asyncio import os + import streamlit as st from agents import run_github_agent + # Page config st.set_page_config(page_title="🐙 GitHub MCP Agent", page_icon="🐙", layout="wide") # Title and description st.markdown("

🐙 GitHub MCP Agent

", unsafe_allow_html=True) -st.markdown("Explore GitHub repositories with natural language using the Model Context Protocol") +st.markdown( + "Explore GitHub repositories with natural language using the Model Context Protocol" +) # Setup sidebar for API key with st.sidebar: st.header("🔑 Authentication") - github_token = st.text_input("GitHub Token", type="password", - help="Create a token with repo scope at github.com/settings/tokens") - + github_token = st.text_input( + "GitHub Token", + type="password", + help="Create a token with repo scope at github.com/settings/tokens", + ) + if github_token: os.environ["GITHUB_TOKEN"] = github_token - + st.markdown("---") st.markdown("### Example Queries") - + st.markdown("**Issues**") st.markdown("- Show me issues by label") st.markdown("- What issues are being actively discussed?") - + st.markdown("**Pull Requests**") st.markdown("- What PRs need review?") st.markdown("- Show me recent merged PRs") - + st.markdown("**Repository**") st.markdown("- Show repository health metrics") st.markdown("- Show repository activity patterns") - + st.markdown("---") - st.caption("Note: Always specify the repository in your query if not already selected in the main input.") + st.caption( + "Note: Always specify the repository in your query if not already selected in the main input." + ) # Query input col1, col2 = st.columns([3, 1]) with col1: repo = st.text_input("Repository", value="agno-agi/agno", help="Format: owner/repo") with col2: - query_type = st.selectbox("Query Type", [ - "Issues", "Pull Requests", "Repository Activity", "Custom" - ]) + query_type = st.selectbox( + "Query Type", ["Issues", "Pull Requests", "Repository Activity", "Custom"] + ) # Create predefined queries based on type if query_type == "Issues": @@ -55,8 +64,11 @@ else: query_template = "" -query = st.text_area("Your Query", value=query_template, - placeholder="What would you like to know about this repository?") +query = st.text_area( + "Your Query", + value=query_template, + placeholder="What would you like to know about this repository?", +) # Run button if st.button("🚀 Run Query", type="primary", use_container_width=True): @@ -71,15 +83,15 @@ full_query = f"{query} in {repo}" else: full_query = query - + result = asyncio.run(run_github_agent(full_query)) - + # Display results in a nice container st.markdown("### Results") st.markdown(result) # Display help text for first-time users -if 'result' not in locals(): +if "result" not in locals(): st.markdown( """

How to use this app:

@@ -96,10 +108,10 @@
  • More specific queries yield better results
  • This app requires Node.js to be installed (for the npx command)
  • -
    """, - unsafe_allow_html=True + """, + unsafe_allow_html=True, ) # Footer st.markdown("---") -st.write("Built with Streamlit, Agno, and Model Context Protocol ❤️") \ No newline at end of file +st.write("Built with Streamlit, Agno, and Model Context Protocol ❤️") diff --git a/libs/agno/agno/models/cohere/chat.py b/libs/agno/agno/models/cohere/chat.py index beb2e1b1e8..b3ac145d4a 100644 --- a/libs/agno/agno/models/cohere/chat.py +++ b/libs/agno/agno/models/cohere/chat.py @@ -9,7 +9,6 @@ from agno.models.message import Message from agno.models.response import ModelResponse from agno.utils.log import logger -from agno.utils.openai import images_to_message try: from cohere import AsyncClientV2 as CohereAsyncClient @@ -81,7 +80,6 @@ def _format_messages(messages: List[Message]) -> List[Dict[str, Any]]: } if message.images is not None and len(message.images) > 0: - # Ignore non-string message content if isinstance(message.content, str): message_content_with_image = _format_images_for_message(message=message, images=message.images) diff --git a/libs/agno/tests/integration/models/cohere/test_multimodal.py b/libs/agno/tests/integration/models/cohere/test_multimodal.py index cbce1019c6..c0218439eb 100644 --- a/libs/agno/tests/integration/models/cohere/test_multimodal.py +++ b/libs/agno/tests/integration/models/cohere/test_multimodal.py @@ -1,19 +1,20 @@ from pathlib import Path + import pytest from agno.agent.agent import Agent -from agno.media import Audio, Image +from agno.media import Image from agno.models.cohere.chat import Cohere from agno.tools.duckduckgo import DuckDuckGoTools def test_image_input(): agent = Agent( - model=Cohere(id="c4ai-aya-vision-8b"), + model=Cohere(id="c4ai-aya-vision-8b"), add_history_to_messages=True, - markdown=True, - telemetry=False, - monitoring=False + markdown=True, + telemetry=False, + monitoring=False, ) response = agent.run( @@ -63,7 +64,11 @@ def test_image_input_local_file(): @pytest.mark.skip(reason="Image with tool call is not supported yet.") def test_image_input_with_tool_call(): agent = Agent( - model=Cohere(id="c4ai-aya-vision-8b"), tools=[DuckDuckGoTools()], markdown=True, telemetry=False, monitoring=False + model=Cohere(id="c4ai-aya-vision-8b"), + tools=[DuckDuckGoTools()], + markdown=True, + telemetry=False, + monitoring=False, ) response = agent.run( @@ -71,4 +76,4 @@ def test_image_input_with_tool_call(): images=[Image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg")], ) - assert "golden" in response.content.lower() \ No newline at end of file + assert "golden" in response.content.lower() From 3a078eac1c9221976d2d328e55f3ac9f0ea2c244 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 11 Mar 2025 10:22:54 +0200 Subject: [PATCH 3/4] Update --- cookbook/models/aws/bedrock/image_agent_bytes.py | 6 ++++-- cookbook/models/azure/ai_foundry/image_agent_bytes.py | 6 ++++-- cookbook/models/cohere/image_agent_bytes.py | 6 ++++-- cookbook/models/cohere/image_agent_local_file.py | 3 +++ cookbook/models/ibm/watsonx/image_agent_bytes.py | 4 +--- cookbook/models/openai/image_agent_bytes.py | 6 ++++-- cookbook/models/together/image_agent_bytes.py | 3 +-- cookbook/models/xai/image_agent_bytes.py | 6 ++++-- libs/agno/agno/utils/media.py | 2 +- .../integration/models/aws/bedrock/test_multimodal.py | 5 ++--- .../tests/integration/models/cohere/test_multimodal.py | 7 +++---- .../integration/models/ibm/watsonx/test_multimodal.py | 5 ++--- 12 files changed, 33 insertions(+), 26 deletions(-) diff --git a/cookbook/models/aws/bedrock/image_agent_bytes.py b/cookbook/models/aws/bedrock/image_agent_bytes.py index 4573532d3c..3e96a2a610 100644 --- a/cookbook/models/aws/bedrock/image_agent_bytes.py +++ b/cookbook/models/aws/bedrock/image_agent_bytes.py @@ -4,6 +4,7 @@ from agno.media import Image from agno.models.aws import AwsBedrock from agno.tools.duckduckgo import DuckDuckGoTools +from agno.utils.media import download_image agent = Agent( model=AwsBedrock(id="amazon.nova-pro-v1:0"), @@ -13,9 +14,10 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image and give me the latest news about it.", diff --git a/cookbook/models/azure/ai_foundry/image_agent_bytes.py b/cookbook/models/azure/ai_foundry/image_agent_bytes.py index 1932321023..66f58c3d5e 100644 --- a/cookbook/models/azure/ai_foundry/image_agent_bytes.py +++ b/cookbook/models/azure/ai_foundry/image_agent_bytes.py @@ -3,6 +3,7 @@ from agno.agent import Agent from agno.media import Image from agno.models.azure import AzureAIFoundry +from agno.utils.media import download_image agent = Agent( model=AzureAIFoundry(id="Llama-3.2-11B-Vision-Instruct"), @@ -11,9 +12,10 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image.", diff --git a/cookbook/models/cohere/image_agent_bytes.py b/cookbook/models/cohere/image_agent_bytes.py index 18ee6afecf..680d98fa4b 100644 --- a/cookbook/models/cohere/image_agent_bytes.py +++ b/cookbook/models/cohere/image_agent_bytes.py @@ -3,6 +3,7 @@ from agno.agent import Agent from agno.media import Image from agno.models.cohere.chat import Cohere +from agno.utils.media import download_image agent = Agent( model=Cohere(id="c4ai-aya-vision-8b"), @@ -11,9 +12,10 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image.", diff --git a/cookbook/models/cohere/image_agent_local_file.py b/cookbook/models/cohere/image_agent_local_file.py index 229f9796d7..3b449cee90 100644 --- a/cookbook/models/cohere/image_agent_local_file.py +++ b/cookbook/models/cohere/image_agent_local_file.py @@ -3,6 +3,7 @@ from agno.agent import Agent from agno.media import Image from agno.models.cohere.chat import Cohere +from agno.utils.media import download_image agent = Agent( model=Cohere(id="c4ai-aya-vision-8b"), @@ -11,6 +12,8 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + agent.print_response( "Tell me about this image.", images=[ diff --git a/cookbook/models/ibm/watsonx/image_agent_bytes.py b/cookbook/models/ibm/watsonx/image_agent_bytes.py index 606be91522..768ffb94c8 100644 --- a/cookbook/models/ibm/watsonx/image_agent_bytes.py +++ b/cookbook/models/ibm/watsonx/image_agent_bytes.py @@ -3,7 +3,6 @@ from agno.agent import Agent from agno.media import Image from agno.models.ibm import WatsonX -from agno.tools.duckduckgo import DuckDuckGoTools agent = Agent( model=WatsonX(id="meta-llama/llama-3-2-11b-vision-instruct"), @@ -13,8 +12,7 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image and and give me the latest news about it.", diff --git a/cookbook/models/openai/image_agent_bytes.py b/cookbook/models/openai/image_agent_bytes.py index f703ef1318..a359eebf71 100644 --- a/cookbook/models/openai/image_agent_bytes.py +++ b/cookbook/models/openai/image_agent_bytes.py @@ -4,6 +4,7 @@ from agno.media import Image from agno.models.openai import OpenAIChat from agno.tools.duckduckgo import DuckDuckGoTools +from agno.utils.media import download_image agent = Agent( model=OpenAIChat(id="gpt-4o"), @@ -13,9 +14,10 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image and give me the latest news about it.", diff --git a/cookbook/models/together/image_agent_bytes.py b/cookbook/models/together/image_agent_bytes.py index 4f24d678b4..82c3f35e31 100644 --- a/cookbook/models/together/image_agent_bytes.py +++ b/cookbook/models/together/image_agent_bytes.py @@ -12,8 +12,7 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image", diff --git a/cookbook/models/xai/image_agent_bytes.py b/cookbook/models/xai/image_agent_bytes.py index 7f7c9f1da4..232236ceb3 100644 --- a/cookbook/models/xai/image_agent_bytes.py +++ b/cookbook/models/xai/image_agent_bytes.py @@ -4,6 +4,7 @@ from agno.media import Image from agno.models.xai import xAI from agno.tools.duckduckgo import DuckDuckGoTools +from agno.utils.media import download_image agent = Agent( model=xAI(id="grok-2-vision-latest"), @@ -13,9 +14,10 @@ image_path = Path(__file__).parent.joinpath("sample.jpg") +download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path)) + # Read the image file content as bytes -with open(image_path, "rb") as img_file: - image_bytes = img_file.read() +image_bytes = image_path.read_bytes() agent.print_response( "Tell me about this image and give me the latest news about it.", diff --git a/libs/agno/agno/utils/media.py b/libs/agno/agno/utils/media.py index 5f8fd0cd0e..6eb56db98e 100644 --- a/libs/agno/agno/utils/media.py +++ b/libs/agno/agno/utils/media.py @@ -3,7 +3,7 @@ import requests -def download_image(url, save_path): +def download_image(url: str, save_path: str) -> bool: """ Downloads an image from the specified URL and saves it to the given local path. Parameters: diff --git a/libs/agno/tests/integration/models/aws/bedrock/test_multimodal.py b/libs/agno/tests/integration/models/aws/bedrock/test_multimodal.py index 659787c2bf..650bef0885 100644 --- a/libs/agno/tests/integration/models/aws/bedrock/test_multimodal.py +++ b/libs/agno/tests/integration/models/aws/bedrock/test_multimodal.py @@ -11,11 +11,10 @@ def test_image_input_bytes(): """ agent = Agent(model=AwsBedrock(id="amazon.nova-pro-v1:0"), markdown=True, telemetry=False, monitoring=False) - image_path = Path(__file__).parent.joinpath("../../sample_image.jpg") + image_path = Path(__file__).parent.parent.parent.joinpath("sample_image.jpg") # Read the image file content as bytes - with open(image_path, "rb") as img_file: - image_bytes = img_file.read() + image_bytes = image_path.read_bytes() response = agent.run( "Tell me about this image.", diff --git a/libs/agno/tests/integration/models/cohere/test_multimodal.py b/libs/agno/tests/integration/models/cohere/test_multimodal.py index c0218439eb..663bb4c496 100644 --- a/libs/agno/tests/integration/models/cohere/test_multimodal.py +++ b/libs/agno/tests/integration/models/cohere/test_multimodal.py @@ -32,11 +32,10 @@ def test_image_input(): def test_image_input_bytes(): agent = Agent(model=Cohere(id="c4ai-aya-vision-8b"), telemetry=False, monitoring=False) - image_path = Path(__file__).parent.joinpath("../sample_image.jpg") + image_path = Path(__file__).parent.parent.joinpath("sample_image.jpg") # Read the image file content as bytes - with open(image_path, "rb") as img_file: - image_bytes = img_file.read() + image_bytes = image_path.read_bytes() response = agent.run( "Tell me about this image.", @@ -50,7 +49,7 @@ def test_image_input_bytes(): def test_image_input_local_file(): agent = Agent(model=Cohere(id="c4ai-aya-vision-8b"), telemetry=False, monitoring=False) - image_path = Path(__file__).parent.joinpath("../sample_image.jpg") + image_path = Path(__file__).parent.parent.joinpath("sample_image.jpg") response = agent.run( "Tell me about this image.", diff --git a/libs/agno/tests/integration/models/ibm/watsonx/test_multimodal.py b/libs/agno/tests/integration/models/ibm/watsonx/test_multimodal.py index 8b8de35d31..052dba6896 100644 --- a/libs/agno/tests/integration/models/ibm/watsonx/test_multimodal.py +++ b/libs/agno/tests/integration/models/ibm/watsonx/test_multimodal.py @@ -20,11 +20,10 @@ def test_image_input(): def test_image_input_bytes(): agent = Agent(model=WatsonX(id="meta-llama/llama-3-2-11b-vision-instruct"), telemetry=False, monitoring=False) - image_path = Path(__file__).parent.joinpath("../../sample_image.jpg") + image_path = Path(__file__).parent.parent.parent.joinpath("sample_image.jpg") # Read the image file content as bytes - with open(image_path, "rb") as img_file: - image_bytes = img_file.read() + image_bytes = image_path.read_bytes() response = agent.run( "Tell me about this image.", From 046bd8d2a7c257a3ada07bcee5edc02bc876326c Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 11 Mar 2025 10:24:30 +0200 Subject: [PATCH 4/4] Update --- libs/agno/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/agno/pyproject.toml b/libs/agno/pyproject.toml index 23422d00b5..ce08959fc1 100644 --- a/libs/agno/pyproject.toml +++ b/libs/agno/pyproject.toml @@ -75,7 +75,7 @@ postgres = ["psycopg-binary", "psycopg"] # Dependencies for Vector databases pgvector = ["pgvector"] chromadb = ["chromadb"] -lancedb = ["lancedb", "tantivy"] +lancedb = ["lancedb==0.20.0", "tantivy"] qdrant = ["qdrant-client"] cassandra = ["cassio"] mongodb = ["pymongo[srv]"]