Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

document-upload-ag-2793 #2325

Merged
merged 9 commits into from
Mar 12, 2025
25 changes: 25 additions & 0 deletions cookbook/models/anthropic/pdf_input_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pathlib import Path

from agno.agent import Agent
from agno.media import File
from agno.models.anthropic import Claude

# Please download the file using
# wget https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf

pdf_path = Path(__file__).parents[3].joinpath("ThaiRecipes.pdf")

agent = Agent(
model=Claude(id="claude-3-5-sonnet-20241022"),
markdown=True,
)

agent.print_response(
"Summarize the contents of the attached file.",
files=[
File(
filepath=pdf_path,
),
],
stream=True,
)
16 changes: 16 additions & 0 deletions cookbook/models/anthropic/pdf_input_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from agno.agent import Agent
from agno.media import File
from agno.models.anthropic import Claude

agent = Agent(
model=Claude(id="claude-3-5-sonnet-20241022"),
markdown=True,
)

agent.print_response(
"Summarize the contents of the attached file.",
files=[
File(url="https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"),
],
stream=True,
)
20 changes: 20 additions & 0 deletions cookbook/models/google/gemini/pdf_input_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from pathlib import Path

from agno.agent import Agent
from agno.media import File
from agno.models.google import Gemini

# Please download the file using
# wget https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf

pdf_path = Path(__file__).parents[4].joinpath("ThaiRecipes.pdf")

agent = Agent(
model=Gemini(id="gemini-2.0-flash-exp"),
markdown=True,
)

agent.print_response(
"Summarize the contents of the attached file.",
files=[File(filepath=pdf_path)],
)
13 changes: 13 additions & 0 deletions cookbook/models/google/gemini/pdf_input_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from agno.agent import Agent
from agno.media import File
from agno.models.google import Gemini

agent = Agent(
model=Gemini(id="gemini-2.0-flash-exp"),
markdown=True,
)

agent.print_response(
"Summarize the contents of the attached file.",
files=[File(url="https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf")],
)
57 changes: 49 additions & 8 deletions libs/agno/agno/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from agno.agent.metrics import SessionMetrics
from agno.exceptions import ModelProviderError, StopAgentRun
from agno.knowledge.agent import AgentKnowledge
from agno.media import Audio, AudioArtifact, AudioResponse, Image, ImageArtifact, Video, VideoArtifact
from agno.media import Audio, AudioArtifact, AudioResponse, File, Image, ImageArtifact, Video, VideoArtifact
from agno.memory.agent import AgentMemory, AgentRun
from agno.models.base import Model
from agno.models.message import Message, MessageReferences
Expand Down Expand Up @@ -463,6 +463,7 @@ def _run(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
**kwargs: Any,
Expand Down Expand Up @@ -507,7 +508,7 @@ def _run(

# 4. Prepare run messages
run_messages: RunMessages = self.get_run_messages(
message=message, audio=audio, images=images, videos=videos, messages=messages, **kwargs
message=message, audio=audio, images=images, videos=videos, files=files, messages=messages, **kwargs
)
if len(run_messages.messages) == 0:
logger.error("No messages to be sent to the model.")
Expand Down Expand Up @@ -801,6 +802,7 @@ def run(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
retries: Optional[int] = None,
Expand All @@ -816,6 +818,7 @@ def run(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
retries: Optional[int] = None,
Expand All @@ -830,6 +833,7 @@ def run(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Any]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
retries: Optional[int] = None,
Expand Down Expand Up @@ -861,6 +865,7 @@ def run(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand Down Expand Up @@ -900,6 +905,7 @@ def run(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand All @@ -912,6 +918,7 @@ def run(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand Down Expand Up @@ -948,6 +955,7 @@ async def _arun(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
**kwargs: Any,
Expand Down Expand Up @@ -993,7 +1001,7 @@ async def _arun(

# 4. Prepare run messages
run_messages: RunMessages = self.get_run_messages(
message=message, audio=audio, images=images, videos=videos, messages=messages, **kwargs
message=message, audio=audio, images=images, videos=videos, files=files, messages=messages, **kwargs
)
if len(run_messages.messages) == 0:
logger.error("No messages to be sent to the model.")
Expand Down Expand Up @@ -1285,6 +1293,7 @@ async def arun(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
stream_intermediate_steps: bool = False,
retries: Optional[int] = None,
Expand Down Expand Up @@ -1315,6 +1324,7 @@ async def arun(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand Down Expand Up @@ -1353,6 +1363,7 @@ async def arun(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand All @@ -1365,6 +1376,7 @@ async def arun(
audio=audio,
images=images,
videos=videos,
files=files,
messages=messages,
stream_intermediate_steps=stream_intermediate_steps,
**kwargs,
Expand Down Expand Up @@ -2104,6 +2116,7 @@ def get_user_message(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
**kwargs: Any,
) -> Optional[Message]:
"""Return the user message for the Agent.
Expand Down Expand Up @@ -2161,13 +2174,20 @@ def get_user_message(
audio=audio,
images=images,
videos=videos,
files=files,
**kwargs,
)

# 2. If create_default_user_message is False or message is a list, return the message as is.
if not self.create_default_user_message or isinstance(message, list):
return Message(
role=self.user_message_role, content=message, images=images, audio=audio, videos=videos, **kwargs
role=self.user_message_role,
content=message,
images=images,
audio=audio,
videos=videos,
files=files,
**kwargs,
)

# 3. Build the default user message for the Agent
Expand Down Expand Up @@ -2203,6 +2223,7 @@ def get_user_message(
audio=audio,
images=images,
videos=videos,
files=files,
**kwargs,
)

Expand All @@ -2213,6 +2234,7 @@ def get_run_messages(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
messages: Optional[Sequence[Union[Dict, Message]]] = None,
**kwargs: Any,
) -> RunMessages:
Expand All @@ -2236,7 +2258,7 @@ def get_run_messages(

Typical usage:
run_messages = self.get_run_messages(
message=message, audio=audio, images=images, videos=videos, messages=messages, **kwargs
message=message, audio=audio, images=images, videos=videos, files=files, messages=messages, **kwargs
)
"""
# Initialize the RunMessages object
Expand Down Expand Up @@ -2310,7 +2332,9 @@ def get_run_messages(
user_message: Optional[Message] = None
# 4.1 Build user message if message is None, str or list
if message is None or isinstance(message, str) or isinstance(message, list):
user_message = self.get_user_message(message=message, audio=audio, images=images, videos=videos, **kwargs)
user_message = self.get_user_message(
message=message, audio=audio, images=images, videos=videos, files=files, **kwargs
)
# 4.2 If message is provided as a Message, use it directly
elif isinstance(message, Message):
user_message = message
Expand Down Expand Up @@ -3473,6 +3497,7 @@ def print_response(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
stream: bool = False,
markdown: bool = False,
show_message: bool = True,
Expand Down Expand Up @@ -3529,7 +3554,14 @@ def print_response(
live_log.update(Group(*panels))

for resp in self.run(
message=message, messages=messages, audio=audio, images=images, videos=videos, stream=True, **kwargs
message=message,
messages=messages,
audio=audio,
images=images,
videos=videos,
files=files,
stream=True,
**kwargs,
):
if isinstance(resp, RunResponse):
if resp.event == RunEvent.run_response:
Expand Down Expand Up @@ -3646,6 +3678,7 @@ def print_response(
audio=audio,
images=images,
videos=videos,
files=files,
stream=False,
**kwargs,
)
Expand Down Expand Up @@ -3740,6 +3773,7 @@ async def aprint_response(
audio: Optional[Sequence[Audio]] = None,
images: Optional[Sequence[Image]] = None,
videos: Optional[Sequence[Video]] = None,
files: Optional[Sequence[File]] = None,
stream: bool = False,
markdown: bool = False,
show_message: bool = True,
Expand Down Expand Up @@ -3796,7 +3830,14 @@ async def aprint_response(
live_log.update(Group(*panels))

_arun_generator = await self.arun(
message=message, messages=messages, audio=audio, images=images, videos=videos, stream=True, **kwargs
message=message,
messages=messages,
audio=audio,
images=images,
videos=videos,
files=files,
stream=True,
**kwargs,
)
async for resp in _arun_generator:
if isinstance(resp, RunResponse):
Expand Down
54 changes: 52 additions & 2 deletions libs/agno/agno/media.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union

from pydantic import BaseModel, model_validator
from pydantic import BaseModel, field_validator, model_validator


class Media(BaseModel):
Expand Down Expand Up @@ -255,3 +255,53 @@ def to_dict(self) -> Dict[str, Any]:
}

return {k: v for k, v in response_dict.items() if v is not None}


class File(BaseModel):
url: Optional[str] = None
filepath: Optional[Union[Path, str]] = None
content: Optional[Any] = None
mime_type: Optional[str] = None

VALID_MIME_TYPES: List[str] = [
"application/pdf",
"application/x-javascript",
"text/javascript",
"application/x-python",
"text/x-python",
"text/plain",
"text/html",
"text/css",
"text/md",
"text/csv",
"text/xml",
"text/rtf",
]

@model_validator(mode="before")
@classmethod
def check_at_least_one_source(cls, data):
"""Ensure at least one of url, filepath, or content is provided."""
if isinstance(data, dict) and not any(data.get(field) for field in ["url", "filepath", "content"]):
raise ValueError("At least one of url, filepath, or content must be provided")
return data

@field_validator("mime_type")
@classmethod
def validate_mime_type(cls, v):
"""Validate that the mime_type is one of the allowed types."""
if v is not None and v not in cls.VALID_MIME_TYPES:
raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.VALID_MIME_TYPES}")
return v

@property
def file_url_content(self) -> Optional[Tuple[bytes, str]]:
import httpx

if self.url:
response = httpx.get(self.url)
content = response.content
mime_type = response.headers.get("Content-Type", "").split(";")[0]
return content, mime_type
else:
return None
Loading