Skip to content

Commit ed90113

Browse files
Ayush0054pritipsinghdirkbrnd
authored
add image support in file upload (#2007)
## Description update. - added image support in file upload, - added multiple image support in file upload --- ## Type of change Please check the options that are relevant: - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Model update (Addition or modification of models) - [ ] Other (please describe): --- ## Checklist - [x] Adherence to standards: Code complies with Agno’s style guidelines and best practices. - [x] Formatting and validation: You have run `./scripts/format.sh` and `./scripts/validate.sh` to ensure code is formatted and linted. - [x] Self-review completed: A thorough review has been performed by the contributor(s). - [x] Documentation: Docstrings and comments have been added or updated for any complex logic. - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable). - [x] Tested in a clean environment: Changes have been tested in a clean environment to confirm expected behavior. - [x] Tests (optional): Tests have been added or updated to cover any new or changed functionality. --- ## Additional Notes Include any deployment notes, performance implications, security considerations, or other relevant information (e.g., screenshots or logs if applicable). --------- Co-authored-by: Priti <[email protected]> Co-authored-by: Dirk Brand <[email protected]> Co-authored-by: Dirk Brand <[email protected]>
1 parent 9b82e9e commit ed90113

File tree

7 files changed

+430
-119
lines changed

7 files changed

+430
-119
lines changed

cookbook/models/perplexity/basic_stream.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import Iterator # noqa
22
from agno.agent import Agent, RunResponse # noqa
33
from agno.models.perplexity import Perplexity
4+
45
agent = Agent(model=Perplexity(id="sonar"), markdown=True)
56

67
# Get the response in a variable

libs/agno/agno/models/perplexity/perplexity.py

-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ class Perplexity(OpenAILike):
1919
max_tokens (int): The maximum number of tokens. Defaults to 1024.
2020
"""
2121

22-
2322
id: str = "sonar"
2423
name: str = "Perplexity"
2524
provider: str = "Perplexity: " + id

libs/agno/agno/playground/async_router.py

+64-58
Original file line numberDiff line numberDiff line change
@@ -125,24 +125,20 @@ async def create_agent_run(
125125
session_id: Optional[str] = Form(None),
126126
user_id: Optional[str] = Form(None),
127127
files: Optional[List[UploadFile]] = File(None),
128-
image: Optional[UploadFile] = File(None),
129128
):
130129
logger.debug(f"AgentRunRequest: {message} {session_id} {user_id} {agent_id}")
131130
agent = get_agent_by_id(agent_id, agents)
132131
if agent is None:
133132
raise HTTPException(status_code=404, detail="Agent not found")
134133

135-
if files:
136-
if agent.knowledge is None:
137-
raise HTTPException(status_code=404, detail="KnowledgeBase not found")
138-
139134
if session_id is not None:
140135
logger.debug(f"Continuing session: {session_id}")
141136
else:
142137
logger.debug("Creating new session")
143138

144139
# Create a new instance of this agent
145140
new_agent_instance = agent.deep_copy(update={"session_id": session_id})
141+
new_agent_instance.session_name = None
146142
if user_id is not None:
147143
new_agent_instance.user_id = user_id
148144

@@ -151,72 +147,82 @@ async def create_agent_run(
151147
else:
152148
new_agent_instance.monitoring = False
153149

154-
base64_image: Optional[Image] = None
155-
if image:
156-
base64_image = await process_image(image)
150+
base64_images: List[Image] = []
157151

158152
if files:
159153
for file in files:
160-
if file.content_type == "application/pdf":
161-
from agno.document.reader.pdf_reader import PDFReader
162-
163-
contents = await file.read()
164-
pdf_file = BytesIO(contents)
165-
pdf_file.name = file.filename
166-
file_content = PDFReader().read(pdf_file)
167-
if agent.knowledge is not None:
168-
agent.knowledge.load_documents(file_content)
169-
elif file.content_type == "text/csv":
170-
from agno.document.reader.csv_reader import CSVReader
171-
172-
contents = await file.read()
173-
csv_file = BytesIO(contents)
174-
csv_file.name = file.filename
175-
file_content = CSVReader().read(csv_file)
176-
if agent.knowledge is not None:
177-
agent.knowledge.load_documents(file_content)
178-
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
179-
from agno.document.reader.docx_reader import DocxReader
180-
181-
contents = await file.read()
182-
docx_file = BytesIO(contents)
183-
docx_file.name = file.filename
184-
file_content = DocxReader().read(docx_file)
185-
if agent.knowledge is not None:
186-
agent.knowledge.load_documents(file_content)
187-
elif file.content_type == "text/plain":
188-
from agno.document.reader.text_reader import TextReader
189-
190-
contents = await file.read()
191-
text_file = BytesIO(contents)
192-
text_file.name = file.filename
193-
file_content = TextReader().read(text_file)
194-
if agent.knowledge is not None:
195-
agent.knowledge.load_documents(file_content)
196-
197-
elif file.content_type == "application/json":
198-
from agno.document.reader.json_reader import JSONReader
199-
200-
contents = await file.read()
201-
json_file = BytesIO(contents)
202-
json_file.name = file.filename
203-
file_content = JSONReader().read(json_file)
204-
if agent.knowledge is not None:
205-
agent.knowledge.load_documents(file_content)
154+
if file.content_type in ["image/png", "image/jpeg", "image/jpg", "image/webp"]:
155+
try:
156+
base64_image = await process_image(file)
157+
base64_images.append(base64_image)
158+
except Exception as e:
159+
logger.error(f"Error processing image {file.filename}: {e}")
160+
continue
206161
else:
207-
raise HTTPException(status_code=400, detail="Unsupported file type")
162+
# Check for knowledge base before processing documents
163+
if new_agent_instance.knowledge is None:
164+
raise HTTPException(status_code=404, detail="KnowledgeBase not found")
165+
166+
if file.content_type == "application/pdf":
167+
from agno.document.reader.pdf_reader import PDFReader
168+
169+
contents = await file.read()
170+
pdf_file = BytesIO(contents)
171+
pdf_file.name = file.filename
172+
file_content = PDFReader().read(pdf_file)
173+
if new_agent_instance.knowledge is not None:
174+
new_agent_instance.knowledge.load_documents(file_content)
175+
elif file.content_type == "text/csv":
176+
from agno.document.reader.csv_reader import CSVReader
177+
178+
contents = await file.read()
179+
csv_file = BytesIO(contents)
180+
csv_file.name = file.filename
181+
file_content = CSVReader().read(csv_file)
182+
if new_agent_instance.knowledge is not None:
183+
new_agent_instance.knowledge.load_documents(file_content)
184+
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
185+
from agno.document.reader.docx_reader import DocxReader
186+
187+
contents = await file.read()
188+
docx_file = BytesIO(contents)
189+
docx_file.name = file.filename
190+
file_content = DocxReader().read(docx_file)
191+
if new_agent_instance.knowledge is not None:
192+
new_agent_instance.knowledge.load_documents(file_content)
193+
elif file.content_type == "text/plain":
194+
from agno.document.reader.text_reader import TextReader
195+
196+
contents = await file.read()
197+
text_file = BytesIO(contents)
198+
text_file.name = file.filename
199+
file_content = TextReader().read(text_file)
200+
if new_agent_instance.knowledge is not None:
201+
new_agent_instance.knowledge.load_documents(file_content)
202+
203+
elif file.content_type == "application/json":
204+
from agno.document.reader.json_reader import JSONReader
205+
206+
contents = await file.read()
207+
json_file = BytesIO(contents)
208+
json_file.name = file.filename
209+
file_content = JSONReader().read(json_file)
210+
if new_agent_instance.knowledge is not None:
211+
new_agent_instance.knowledge.load_documents(file_content)
212+
else:
213+
raise HTTPException(status_code=400, detail="Unsupported file type")
208214

209215
if stream:
210216
return StreamingResponse(
211-
chat_response_streamer(new_agent_instance, message, images=[base64_image] if base64_image else None),
217+
chat_response_streamer(new_agent_instance, message, images=base64_images if base64_images else None),
212218
media_type="text/event-stream",
213219
)
214220
else:
215221
run_response = cast(
216222
RunResponse,
217223
await new_agent_instance.arun(
218-
message,
219-
images=[base64_image] if base64_image else None,
224+
message=message,
225+
images=base64_images if base64_images else None,
220226
stream=False,
221227
),
222228
)

libs/agno/agno/playground/sync_router.py

+64-60
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ def chat_response_streamer(agent: Agent, message: str, images: Optional[List[Ima
9595

9696
def process_image(file: UploadFile) -> Image:
9797
content = file.file.read()
98-
98+
if not content:
99+
raise HTTPException(status_code=400, detail="Empty file")
99100
return Image(content=content)
100101

101102
@playground_router.post("/agents/{agent_id}/runs")
@@ -107,17 +108,12 @@ def create_agent_run(
107108
session_id: Optional[str] = Form(None),
108109
user_id: Optional[str] = Form(None),
109110
files: Optional[List[UploadFile]] = File(None),
110-
image: Optional[UploadFile] = File(None),
111111
):
112112
logger.debug(f"AgentRunRequest: {message} {agent_id} {stream} {monitor} {session_id} {user_id} {files}")
113113
agent = get_agent_by_id(agent_id, agents)
114114
if agent is None:
115115
raise HTTPException(status_code=404, detail="Agent not found")
116116

117-
if files:
118-
if agent.knowledge is None:
119-
raise HTTPException(status_code=404, detail="KnowledgeBase not found")
120-
121117
if session_id is not None:
122118
logger.debug(f"Continuing session: {session_id}")
123119
else:
@@ -135,73 +131,81 @@ def create_agent_run(
135131
else:
136132
new_agent_instance.monitoring = False
137133

138-
base64_image: Optional[Image] = None
139-
if image:
140-
base64_image = process_image(image)
134+
base64_images: List[Image] = []
141135

142136
if files:
143137
for file in files:
144-
if file.content_type == "application/pdf":
145-
from agno.document.reader.pdf_reader import PDFReader
146-
147-
contents = file.file.read()
148-
pdf_file = BytesIO(contents)
149-
pdf_file.name = file.filename
150-
file_content = PDFReader().read(pdf_file)
151-
if agent.knowledge is not None:
152-
agent.knowledge.load_documents(file_content)
153-
elif file.content_type == "text/csv":
154-
from agno.document.reader.csv_reader import CSVReader
155-
156-
contents = file.file.read()
157-
csv_file = BytesIO(contents)
158-
csv_file.name = file.filename
159-
file_content = CSVReader().read(csv_file)
160-
if agent.knowledge is not None:
161-
agent.knowledge.load_documents(file_content)
162-
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
163-
from agno.document.reader.docx_reader import DocxReader
164-
165-
contents = file.file.read()
166-
docx_file = BytesIO(contents)
167-
docx_file.name = file.filename
168-
file_content = DocxReader().read(docx_file)
169-
if agent.knowledge is not None:
170-
agent.knowledge.load_documents(file_content)
171-
elif file.content_type == "text/plain":
172-
from agno.document.reader.text_reader import TextReader
173-
174-
contents = file.file.read()
175-
text_file = BytesIO(contents)
176-
text_file.name = file.filename
177-
file_content = TextReader().read(text_file)
178-
if agent.knowledge is not None:
179-
agent.knowledge.load_documents(file_content)
180-
181-
elif file.content_type == "application/json":
182-
from agno.document.reader.json_reader import JSONReader
183-
184-
content = file.read()
185-
json_file = BytesIO(content)
186-
json_file.name = file.filename
187-
file_content = JSONReader().read(json_file)
188-
if agent.knowledge is not None:
189-
agent.knowledge.load_documents(file_content)
190-
138+
if file.content_type in ["image/png", "image/jpeg", "image/jpg", "image/webp"]:
139+
try:
140+
base64_image = process_image(file)
141+
base64_images.append(base64_image)
142+
except Exception as e:
143+
logger.error(f"Error processing image {file.filename}: {e}")
144+
continue
191145
else:
192-
raise HTTPException(status_code=400, detail="Unsupported file type")
146+
# Check for knowledge base before processing documents
147+
if new_agent_instance.knowledge is None:
148+
raise HTTPException(status_code=404, detail="KnowledgeBase not found")
149+
150+
if file.content_type == "application/pdf":
151+
from agno.document.reader.pdf_reader import PDFReader
152+
153+
contents = file.file.read()
154+
pdf_file = BytesIO(contents)
155+
pdf_file.name = file.filename
156+
file_content = PDFReader().read(pdf_file)
157+
if new_agent_instance.knowledge is not None:
158+
new_agent_instance.knowledge.load_documents(file_content)
159+
elif file.content_type == "text/csv":
160+
from agno.document.reader.csv_reader import CSVReader
161+
162+
contents = file.file.read()
163+
csv_file = BytesIO(contents)
164+
csv_file.name = file.filename
165+
file_content = CSVReader().read(csv_file)
166+
if new_agent_instance.knowledge is not None:
167+
new_agent_instance.knowledge.load_documents(file_content)
168+
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
169+
from agno.document.reader.docx_reader import DocxReader
170+
171+
contents = file.file.read()
172+
docx_file = BytesIO(contents)
173+
docx_file.name = file.filename
174+
file_content = DocxReader().read(docx_file)
175+
if new_agent_instance.knowledge is not None:
176+
new_agent_instance.knowledge.load_documents(file_content)
177+
elif file.content_type == "text/plain":
178+
from agno.document.reader.text_reader import TextReader
179+
180+
contents = file.file.read()
181+
text_file = BytesIO(contents)
182+
text_file.name = file.filename
183+
file_content = TextReader().read(text_file)
184+
if new_agent_instance.knowledge is not None:
185+
new_agent_instance.knowledge.load_documents(file_content)
186+
elif file.content_type == "application/json":
187+
from agno.document.reader.json_reader import JSONReader
188+
189+
contents = file.file.read()
190+
json_file = BytesIO(contents)
191+
json_file.name = file.filename
192+
file_content = JSONReader().read(json_file)
193+
if new_agent_instance.knowledge is not None:
194+
new_agent_instance.knowledge.load_documents(file_content)
195+
else:
196+
raise HTTPException(status_code=400, detail="Unsupported file type")
193197

194198
if stream:
195199
return StreamingResponse(
196-
chat_response_streamer(new_agent_instance, message, images=[base64_image] if base64_image else None),
200+
chat_response_streamer(new_agent_instance, message, images=base64_images if base64_images else None),
197201
media_type="text/event-stream",
198202
)
199203
else:
200204
run_response = cast(
201205
RunResponse,
202206
new_agent_instance.run(
203-
message,
204-
images=[base64_image] if base64_image else None,
207+
message=message,
208+
images=base64_images if base64_images else None,
205209
stream=False,
206210
),
207211
)

libs/agno/tests/unit/playground/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)