Skip to content

Commit 27f9226

Browse files
authored
Add cohere vision (#2342)
## Description --- ## Type of change Please check the options that are relevant: - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Model update (Addition or modification of models) - [ ] Other (please describe): --- ## Checklist - [ ] Adherence to standards: Code complies with Agno’s style guidelines and best practices. - [ ] Formatting and validation: You have run `./scripts/format.sh` and `./scripts/validate.sh` to ensure code is formatted and linted. - [ ] Self-review completed: A thorough review has been performed by the contributor(s). - [ ] Documentation: Docstrings and comments have been added or updated for any complex logic. - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable). - [ ] Tested in a clean environment: Changes have been tested in a clean environment to confirm expected behavior. - [ ] Tests (optional): Tests have been added or updated to cover any new or changed functionality. --- ## Additional Notes Include any deployment notes, performance implications, security considerations, or other relevant information (e.g., screenshots or logs if applicable).
1 parent d4002c6 commit 27f9226

File tree

17 files changed

+248
-49
lines changed

17 files changed

+248
-49
lines changed
File renamed without changes.

cookbook/models/aws/bedrock/image_agent_bytes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from agno.media import Image
55
from agno.models.aws import AwsBedrock
66
from agno.tools.duckduckgo import DuckDuckGoTools
7+
from agno.utils.media import download_image
78

89
agent = Agent(
910
model=AwsBedrock(id="amazon.nova-pro-v1:0"),
@@ -13,9 +14,10 @@
1314

1415
image_path = Path(__file__).parent.joinpath("sample.jpg")
1516

17+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
18+
1619
# Read the image file content as bytes
17-
with open(image_path, "rb") as img_file:
18-
image_bytes = img_file.read()
20+
image_bytes = image_path.read_bytes()
1921

2022
agent.print_response(
2123
"Tell me about this image and give me the latest news about it.",

cookbook/models/azure/ai_foundry/image_agent_bytes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from agno.agent import Agent
44
from agno.media import Image
55
from agno.models.azure import AzureAIFoundry
6+
from agno.utils.media import download_image
67

78
agent = Agent(
89
model=AzureAIFoundry(id="Llama-3.2-11B-Vision-Instruct"),
@@ -11,9 +12,10 @@
1112

1213
image_path = Path(__file__).parent.joinpath("sample.jpg")
1314

15+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
16+
1417
# Read the image file content as bytes
15-
with open(image_path, "rb") as img_file:
16-
image_bytes = img_file.read()
18+
image_bytes = image_path.read_bytes()
1719

1820
agent.print_response(
1921
"Tell me about this image.",

cookbook/models/cohere/image_agent.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from agno.agent import Agent
2+
from agno.media import Image
3+
from agno.models.cohere import Cohere
4+
5+
agent = Agent(
6+
model=Cohere(id="c4ai-aya-vision-8b"),
7+
markdown=True,
8+
)
9+
10+
agent.print_response(
11+
"Tell me about this image.",
12+
images=[
13+
Image(
14+
url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg"
15+
)
16+
],
17+
stream=True,
18+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from pathlib import Path
2+
3+
from agno.agent import Agent
4+
from agno.media import Image
5+
from agno.models.cohere.chat import Cohere
6+
from agno.utils.media import download_image
7+
8+
agent = Agent(
9+
model=Cohere(id="c4ai-aya-vision-8b"),
10+
markdown=True,
11+
)
12+
13+
image_path = Path(__file__).parent.joinpath("sample.jpg")
14+
15+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
16+
17+
# Read the image file content as bytes
18+
image_bytes = image_path.read_bytes()
19+
20+
agent.print_response(
21+
"Tell me about this image.",
22+
images=[
23+
Image(content=image_bytes),
24+
],
25+
stream=True,
26+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from pathlib import Path
2+
3+
from agno.agent import Agent
4+
from agno.media import Image
5+
from agno.models.cohere.chat import Cohere
6+
from agno.utils.media import download_image
7+
8+
agent = Agent(
9+
model=Cohere(id="c4ai-aya-vision-8b"),
10+
markdown=True,
11+
)
12+
13+
image_path = Path(__file__).parent.joinpath("sample.jpg")
14+
15+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
16+
17+
agent.print_response(
18+
"Tell me about this image.",
19+
images=[
20+
Image(filepath=image_path),
21+
],
22+
stream=True,
23+
)

cookbook/models/ibm/watsonx/image_agent_bytes.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from agno.agent import Agent
44
from agno.media import Image
55
from agno.models.ibm import WatsonX
6-
from agno.tools.duckduckgo import DuckDuckGoTools
76

87
agent = Agent(
98
model=WatsonX(id="meta-llama/llama-3-2-11b-vision-instruct"),
@@ -13,8 +12,7 @@
1312
image_path = Path(__file__).parent.joinpath("sample.jpg")
1413

1514
# Read the image file content as bytes
16-
with open(image_path, "rb") as img_file:
17-
image_bytes = img_file.read()
15+
image_bytes = image_path.read_bytes()
1816

1917
agent.print_response(
2018
"Tell me about this image and and give me the latest news about it.",

cookbook/models/openai/image_agent_bytes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from agno.media import Image
55
from agno.models.openai import OpenAIChat
66
from agno.tools.duckduckgo import DuckDuckGoTools
7+
from agno.utils.media import download_image
78

89
agent = Agent(
910
model=OpenAIChat(id="gpt-4o"),
@@ -13,9 +14,10 @@
1314

1415
image_path = Path(__file__).parent.joinpath("sample.jpg")
1516

17+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
18+
1619
# Read the image file content as bytes
17-
with open(image_path, "rb") as img_file:
18-
image_bytes = img_file.read()
20+
image_bytes = image_path.read_bytes()
1921

2022
agent.print_response(
2123
"Tell me about this image and give me the latest news about it.",

cookbook/models/together/image_agent_bytes.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
image_path = Path(__file__).parent.joinpath("sample.jpg")
1313

1414
# Read the image file content as bytes
15-
with open(image_path, "rb") as img_file:
16-
image_bytes = img_file.read()
15+
image_bytes = image_path.read_bytes()
1716

1817
agent.print_response(
1918
"Tell me about this image",

cookbook/models/xai/image_agent_bytes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from agno.media import Image
55
from agno.models.xai import xAI
66
from agno.tools.duckduckgo import DuckDuckGoTools
7+
from agno.utils.media import download_image
78

89
agent = Agent(
910
model=xAI(id="grok-2-vision-latest"),
@@ -13,9 +14,10 @@
1314

1415
image_path = Path(__file__).parent.joinpath("sample.jpg")
1516

17+
download_image(url="https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg", save_path=str(image_path))
18+
1619
# Read the image file content as bytes
17-
with open(image_path, "rb") as img_file:
18-
image_bytes = img_file.read()
20+
image_bytes = image_path.read_bytes()
1921

2022
agent.print_response(
2123
"Tell me about this image and give me the latest news about it.",

libs/agno/agno/models/cohere/chat.py

+79-28
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from dataclasses import dataclass
22
from os import getenv
3-
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Tuple
3+
from pathlib import Path
4+
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Sequence, Tuple
45

56
from agno.exceptions import ModelProviderError
7+
from agno.media import Image
68
from agno.models.base import MessageData, Model
79
from agno.models.message import Message
810
from agno.models.response import ModelResponse
@@ -17,6 +19,78 @@
1719
raise ImportError("`cohere` not installed. Please install using `pip install cohere`")
1820

1921

22+
def _format_images_for_message(message: Message, images: Sequence[Image]) -> List[Dict[str, Any]]:
23+
"""
24+
Format an image into the format expected by WatsonX.
25+
"""
26+
27+
# Create a default message content with text
28+
message_content_with_image: List[Dict[str, Any]] = [{"type": "text", "text": message.content}]
29+
30+
# Add images to the message content
31+
for image in images:
32+
try:
33+
if image.content is not None:
34+
image_content = image.content
35+
elif image.url is not None:
36+
image_content = image.image_url_content
37+
elif image.filepath is not None:
38+
if isinstance(image.filepath, Path):
39+
image_content = image.filepath.read_bytes()
40+
else:
41+
with open(image.filepath, "rb") as f:
42+
image_content = f.read()
43+
else:
44+
logger.warning(f"Unsupported image format: {image}")
45+
continue
46+
47+
if image_content is not None:
48+
import base64
49+
50+
base64_image = base64.b64encode(image_content).decode("utf-8")
51+
image_url = f"data:image/jpeg;base64,{base64_image}"
52+
image_payload = {"type": "image_url", "image_url": {"url": image_url}}
53+
message_content_with_image.append(image_payload)
54+
55+
except Exception as e:
56+
logger.error(f"Failed to process image: {str(e)}")
57+
58+
# Update the message content with the images
59+
return message_content_with_image
60+
61+
62+
def _format_messages(messages: List[Message]) -> List[Dict[str, Any]]:
63+
"""
64+
Format messages for the Cohere API.
65+
66+
Args:
67+
messages (List[Message]): The list of messages.
68+
69+
Returns:
70+
List[Dict[str, Any]]: The formatted messages.
71+
"""
72+
formatted_messages = []
73+
for message in messages:
74+
message_dict = {
75+
"role": message.role,
76+
"content": message.content,
77+
"name": message.name,
78+
"tool_call_id": message.tool_call_id,
79+
"tool_calls": message.tool_calls,
80+
}
81+
82+
if message.images is not None and len(message.images) > 0:
83+
# Ignore non-string message content
84+
if isinstance(message.content, str):
85+
message_content_with_image = _format_images_for_message(message=message, images=message.images)
86+
if len(message_content_with_image) > 1:
87+
message_dict["content"] = message_content_with_image
88+
89+
message_dict = {k: v for k, v in message_dict.items() if v is not None}
90+
formatted_messages.append(message_dict)
91+
return formatted_messages
92+
93+
2094
@dataclass
2195
class Cohere(Model):
2296
id: str = "command-r-plus"
@@ -116,29 +190,6 @@ def request_kwargs(self) -> Dict[str, Any]:
116190
_request_params.update(self.request_params)
117191
return _request_params
118192

119-
def _format_messages(self, messages: List[Message]) -> List[Dict[str, Any]]:
120-
"""
121-
Format messages for the Cohere API.
122-
123-
Args:
124-
messages (List[Message]): The list of messages.
125-
126-
Returns:
127-
List[Dict[str, Any]]: The formatted messages.
128-
"""
129-
formatted_messages = []
130-
for message in messages:
131-
message_dict = {
132-
"role": message.role,
133-
"content": message.content,
134-
"name": message.name,
135-
"tool_call_id": message.tool_call_id,
136-
"tool_calls": message.tool_calls,
137-
}
138-
message_dict = {k: v for k, v in message_dict.items() if v is not None}
139-
formatted_messages.append(message_dict)
140-
return formatted_messages
141-
142193
def invoke(self, messages: List[Message]) -> ChatResponse:
143194
"""
144195
Invoke a non-streamed chat response from the Cohere API.
@@ -153,7 +204,7 @@ def invoke(self, messages: List[Message]) -> ChatResponse:
153204
request_kwargs = self.request_kwargs
154205

155206
try:
156-
return self.get_client().chat(model=self.id, messages=self._format_messages(messages), **request_kwargs) # type: ignore
207+
return self.get_client().chat(model=self.id, messages=_format_messages(messages), **request_kwargs) # type: ignore
157208
except Exception as e:
158209
logger.error(f"Unexpected error calling Cohere API: {str(e)}")
159210
raise ModelProviderError(message=str(e), model_name=self.name, model_id=self.id) from e
@@ -173,7 +224,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[StreamedChatRespons
173224
try:
174225
return self.get_client().chat_stream(
175226
model=self.id,
176-
messages=self._format_messages(messages), # type: ignore
227+
messages=_format_messages(messages), # type: ignore
177228
**request_kwargs,
178229
)
179230
except Exception as e:
@@ -195,7 +246,7 @@ async def ainvoke(self, messages: List[Message]) -> ChatResponse:
195246
try:
196247
return await self.get_async_client().chat(
197248
model=self.id,
198-
messages=self._format_messages(messages), # type: ignore
249+
messages=_format_messages(messages), # type: ignore
199250
**request_kwargs,
200251
)
201252
except Exception as e:
@@ -217,7 +268,7 @@ async def ainvoke_stream(self, messages: List[Message]) -> AsyncIterator[Streame
217268
try:
218269
async for response in self.get_async_client().chat_stream(
219270
model=self.id,
220-
messages=self._format_messages(messages), # type: ignore
271+
messages=_format_messages(messages), # type: ignore
221272
**request_kwargs,
222273
):
223274
yield response

libs/agno/agno/utils/media.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import requests
44

55

6-
def download_image(url, save_path):
6+
def download_image(url: str, save_path: str) -> bool:
77
"""
88
Downloads an image from the specified URL and saves it to the given local path.
99
Parameters:

libs/agno/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ postgres = ["psycopg-binary", "psycopg"]
7575
# Dependencies for Vector databases
7676
pgvector = ["pgvector"]
7777
chromadb = ["chromadb"]
78-
lancedb = ["lancedb", "tantivy"]
78+
lancedb = ["lancedb==0.20.0", "tantivy"]
7979
qdrant = ["qdrant-client"]
8080
cassandra = ["cassio"]
8181
mongodb = ["pymongo[srv]"]

libs/agno/tests/integration/models/aws/bedrock/test_multimodal.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@ def test_image_input_bytes():
1111
"""
1212
agent = Agent(model=AwsBedrock(id="amazon.nova-pro-v1:0"), markdown=True, telemetry=False, monitoring=False)
1313

14-
image_path = Path(__file__).parent.joinpath("../../sample_image.jpg")
14+
image_path = Path(__file__).parent.parent.parent.joinpath("sample_image.jpg")
1515

1616
# Read the image file content as bytes
17-
with open(image_path, "rb") as img_file:
18-
image_bytes = img_file.read()
17+
image_bytes = image_path.read_bytes()
1918

2019
response = agent.run(
2120
"Tell me about this image.",

0 commit comments

Comments
 (0)