Skip to content

Commit 229c9b7

Browse files
Whanodclaude
authored andcommitted
fix: wire image data through to Claude for screenshot/photo support (RichardAtCT#168)
The bot was downloading and base64-encoding images but only passing a text prompt to Claude, never the actual image data. This sends images as multimodal content blocks via the SDK AsyncIterable path so Claude can actually see uploaded screenshots and photos. Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
1 parent d36bc1d commit 229c9b7

File tree

4 files changed

+56
-3
lines changed

4 files changed

+56
-3
lines changed

src/bot/features/image_handler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ async def process_image(
5757
else:
5858
prompt = self._create_generic_prompt(caption)
5959

60-
# Convert to base64 for Claude (if supported in future)
6160
base64_image = base64.b64encode(image_bytes).decode("utf-8")
6261

6362
return ProcessedImage(

src/bot/orchestrator.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ def _format_size(size_bytes: int) -> str:
5454
return f"{size_bytes / (1024 * 1024):.1f} MB"
5555

5656

57+
_MEDIA_TYPE_MAP = {
58+
"png": "image/png",
59+
"jpeg": "image/jpeg",
60+
"gif": "image/gif",
61+
"webp": "image/webp",
62+
}
63+
5764
# Patterns that look like secrets/credentials in CLI arguments
5865
_SECRET_PATTERNS: List[re.Pattern[str]] = [
5966
# API keys / tokens (sk-ant-..., sk-..., ghp_..., gho_..., github_pat_..., xoxb-...)
@@ -1751,13 +1758,22 @@ async def agentic_photo(
17511758
processed_image = await image_handler.process_image(
17521759
photo, update.message.caption
17531760
)
1761+
fmt = processed_image.metadata.get("format", "png")
1762+
images = [
1763+
{
1764+
"data": processed_image.base64_data,
1765+
"media_type": _MEDIA_TYPE_MAP.get(fmt, "image/png"),
1766+
}
1767+
]
1768+
17541769
await self._handle_agentic_media_message(
17551770
update=update,
17561771
context=context,
17571772
prompt=processed_image.prompt,
17581773
progress_msg=progress_msg,
17591774
user_id=user_id,
17601775
chat=chat,
1776+
images=images,
17611777
)
17621778

17631779
except Exception as e:
@@ -1820,6 +1836,7 @@ async def _handle_agentic_media_message(
18201836
progress_msg: Any,
18211837
user_id: int,
18221838
chat: Any,
1839+
images: Optional[List[Dict[str, str]]] = None,
18231840
) -> None:
18241841
"""Run a media-derived prompt through Claude and send responses."""
18251842
assert update.message is not None
@@ -1866,6 +1883,7 @@ async def _handle_agentic_media_message(
18661883
session_id=session_id,
18671884
on_stream=on_stream,
18681885
force_new=force_new,
1886+
images=images,
18691887
)
18701888
finally:
18711889
heartbeat.cancel()

src/claude/facade.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ async def run_command(
4242
interrupt_event: Optional["asyncio.Event"] = None,
4343
chat_id: Optional[int] = None,
4444
message_thread_id: Optional[int] = None,
45+
images: Optional[List[Dict[str, str]]] = None,
4546
) -> ClaudeResponse:
4647
"""Run Claude Code command with full integration."""
4748
logger.info(
@@ -124,6 +125,7 @@ async def run_command(
124125
continue_session=should_continue,
125126
stream_callback=on_stream,
126127
interrupt_event=interrupt_event,
128+
images=images,
127129
)
128130
execution_completed = True
129131
except Exception as resume_error:
@@ -156,6 +158,7 @@ async def run_command(
156158
continue_session=False,
157159
stream_callback=on_stream,
158160
interrupt_event=interrupt_event,
161+
images=images,
159162
)
160163
execution_completed = True
161164
else:
@@ -219,6 +222,7 @@ async def _execute(
219222
continue_session: bool = False,
220223
stream_callback: Optional[Callable] = None,
221224
interrupt_event: Optional[asyncio.Event] = None,
225+
images: Optional[List[Dict[str, str]]] = None,
222226
) -> ClaudeResponse:
223227
"""Execute command via SDK."""
224228
return await self.sdk_manager.execute_command(
@@ -228,6 +232,7 @@ async def _execute(
228232
continue_session=continue_session,
229233
stream_callback=stream_callback,
230234
interrupt_event=interrupt_event,
235+
images=images,
231236
)
232237

233238
async def _find_resumable_session(

src/claude/sdk_integration.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
from dataclasses import dataclass, field
66
from pathlib import Path
7-
from typing import Any, Callable, Dict, List, Optional
7+
from typing import Any, AsyncIterator, Callable, Dict, List, Optional
88

99
import structlog
1010
from claude_agent_sdk import (
@@ -277,6 +277,7 @@ async def execute_command(
277277
continue_session: bool = False,
278278
stream_callback: Optional[Callable[[StreamUpdate], None]] = None,
279279
interrupt_event: Optional[asyncio.Event] = None,
280+
images: Optional[List[Dict[str, str]]] = None,
280281
) -> ClaudeResponse:
281282
"""Execute Claude Code command via SDK."""
282283
start_time = asyncio.get_event_loop().time()
@@ -370,7 +371,37 @@ async def _run_client() -> None:
370371
client = ClaudeSDKClient(options)
371372
try:
372373
await client.connect()
373-
await client.query(prompt)
374+
375+
if images:
376+
content_blocks: List[Dict[str, Any]] = []
377+
for img in images:
378+
media_type = img.get("media_type", "image/png")
379+
content_blocks.append(
380+
{
381+
"type": "image",
382+
"source": {
383+
"type": "base64",
384+
"media_type": media_type,
385+
"data": img["data"],
386+
},
387+
}
388+
)
389+
content_blocks.append({"type": "text", "text": prompt})
390+
391+
multimodal_msg = {
392+
"type": "user",
393+
"message": {
394+
"role": "user",
395+
"content": content_blocks,
396+
},
397+
}
398+
399+
async def _multimodal_prompt() -> AsyncIterator[Dict[str, Any]]:
400+
yield multimodal_msg
401+
402+
await client.query(_multimodal_prompt())
403+
else:
404+
await client.query(prompt)
374405

375406
assert client._query is not None
376407
async for raw_data in client._query.receive_messages():

0 commit comments

Comments
 (0)