diff --git a/connectonion/core/agent.py b/connectonion/core/agent.py index 1b8bbe4..df63eb1 100644 --- a/connectonion/core/agent.py +++ b/connectonion/core/agent.py @@ -12,6 +12,7 @@ import os import sys import time +import base64 from typing import List, Optional, Dict, Any, Callable, Union from pathlib import Path from .llm import LLM, create_llm, TokenUsage @@ -218,7 +219,8 @@ def _register_event(self, event_func: EventHandler): self.events[event_type].append(event_func) def input(self, prompt: str, max_iterations: Optional[int] = None, - session: Optional[Dict] = None, images: list[str] | None = None) -> str: + session: Optional[Dict] = None, images: list[str] | None = None, + files: list[dict] | None = None) -> str: """Provide input to the agent and get response. Args: @@ -226,6 +228,9 @@ def input(self, prompt: str, max_iterations: Optional[int] = None, max_iterations: Override agent's max_iterations for this request session: Optional session to continue a conversation. images: Optional list of base64 data URLs for multimodal input + files: Optional list of file dicts with keys: + - name: filename (e.g. "report.pdf") + - data: base64-encoded data URL (e.g. "data:application/pdf;base64,...") Returns: The agent's response after processing the input @@ -254,11 +259,34 @@ def input(self, prompt: str, max_iterations: Optional[int] = None, # Start YAML session logging self.logger.start_session(self.system_prompt) - # Add user message to conversation (multimodal if images provided) - if images: + # Save uploaded files to .co/uploads/ and build file path references + saved_files = [] + if files: + uploads_dir = self.logger.co_dir / "uploads" + uploads_dir.mkdir(parents=True, exist_ok=True) + for f in files: + safe_name = Path(f["name"]).name + file_path = uploads_dir / safe_name + # Decode base64 data URL and write to disk + data_url = f["data"] + if "," in data_url: + raw_data = base64.b64decode(data_url.split(",", 1)[1]) + else: + raw_data = base64.b64decode(data_url) + file_path.write_bytes(raw_data) + saved_files.append(str(file_path)) + + # Add user message to conversation (multimodal if images or files provided) + if images or saved_files: content = [{"type": "text", "text": prompt}] - for img in images: + for img in (images or []): content.append({"type": "image_url", "image_url": {"url": img}}) + if saved_files: + file_list = "\n".join(f"- {p}" for p in saved_files) + content.append({ + "type": "text", + "text": f"The user uploaded the following files:\n{file_list}\nUse your read_file tool or other available tools to read the file contents before responding. Do not assume or guess the contents." + }) self.current_session['messages'].append({"role": "user", "content": content}) else: self.current_session['messages'].append({"role": "user", "content": prompt}) diff --git a/connectonion/network/asgi/http.py b/connectonion/network/asgi/http.py index a7802e3..6771dc8 100644 --- a/connectonion/network/asgi/http.py +++ b/connectonion/network/asgi/http.py @@ -281,7 +281,13 @@ async def handle_http( # Extract session for conversation continuation session = data.get("session") - result = route_handlers["input"](storage, prompt, session) + images = data.get("images") + files = data.get("files") + try: + result = route_handlers["input"](storage, prompt, session, images=images, files=files) + except ValueError as e: + await send_json(send, {"error": str(e)}, 400) + return await send_json(send, result) elif method == "GET" and path.startswith("/sessions/"): diff --git a/connectonion/network/asgi/websocket.py b/connectonion/network/asgi/websocket.py index 1d1ef6e..75d4915 100644 --- a/connectonion/network/asgi/websocket.py +++ b/connectonion/network/asgi/websocket.py @@ -135,6 +135,7 @@ async def handle_websocket( # Extract session for conversation continuation (same as HTTP) session = data.get("session") images = data.get("images") + files = data.get("files") # Create IO for bidirectional communication io = WebSocketIO() @@ -144,7 +145,7 @@ async def handle_websocket( def run_agent(): try: - result_holder[0] = route_handlers["ws_input"](storage, prompt, io, session, images) + result_holder[0] = route_handlers["ws_input"](storage, prompt, io, session, images, files) except Exception as e: error_holder[0] = str(e) agent_done.set() diff --git a/connectonion/network/connect.py b/connectonion/network/connect.py index 7eec0ce..0a88f19 100644 --- a/connectonion/network/connect.py +++ b/connectonion/network/connect.py @@ -189,7 +189,9 @@ def input( self, prompt: str, timeout: float = 60.0, - on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None + on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None, + images: Optional[List[str]] = None, + files: Optional[List[Dict[str, Any]]] = None, ) -> Response: """ Send prompt to remote agent and get response. @@ -205,6 +207,8 @@ def input( Called with (methods: list[str], payment_amount: float | None). Should return {"invite_code": "..."} or {"payment": amount}. If None, prompts interactively in terminal. + images: Optional list of base64 data URLs for multimodal input + files: Optional list of file dicts with name and base64 data Returns: Response with text and done flag @@ -229,16 +233,18 @@ def input( except RuntimeError as e: if "input() cannot be used" in str(e): raise - return asyncio.run(self._stream_input(prompt, timeout, on_onboard)) + return asyncio.run(self._stream_input(prompt, timeout, on_onboard, images, files)) async def input_async( self, prompt: str, timeout: float = 60.0, - on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None + on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None, + images: Optional[List[str]] = None, + files: Optional[List[Dict[str, Any]]] = None, ) -> Response: """Async version of input().""" - return await self._stream_input(prompt, timeout, on_onboard) + return await self._stream_input(prompt, timeout, on_onboard, images, files) def reset(self) -> None: """Clear conversation and start fresh.""" @@ -257,7 +263,9 @@ async def _stream_input( self, prompt: str, timeout: float, - on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None + on_onboard: Optional[Callable[[List[str], Optional[float]], Dict[str, Any]]] = None, + images: Optional[List[str]] = None, + files: Optional[List[Dict[str, Any]]] = None, ) -> Response: """Send prompt via WebSocket and stream events.""" import websockets @@ -285,7 +293,7 @@ async def _stream_input( input_id = str(uuid.uuid4()) # Build the INPUT message - input_msg = self._build_input_message(prompt, input_id, is_direct) + input_msg = self._build_input_message(prompt, input_id, is_direct, images, files) try: async with websockets.connect(ws_url) as ws: @@ -379,7 +387,14 @@ async def _stream_input( self._status = "idle" raise TimeoutError(f"Request timed out after {timeout}s") - def _build_input_message(self, prompt: str, input_id: str, is_direct: bool = False) -> Dict[str, Any]: + def _build_input_message( + self, + prompt: str, + input_id: str, + is_direct: bool = False, + images: Optional[List[str]] = None, + files: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: """Build INPUT message with optional signing.""" input_msg: Dict[str, Any] = { "type": "INPUT", @@ -396,6 +411,12 @@ def _build_input_message(self, prompt: str, input_id: str, is_direct: bool = Fal if self._current_session: input_msg["session"] = self._current_session + # Add multimodal attachments + if images: + input_msg["images"] = images + if files: + input_msg["files"] = files + # Sign if keys provided if self._keys: payload: Dict[str, Any] = {"prompt": prompt, "timestamp": input_msg["timestamp"]} diff --git a/connectonion/network/host/routes.py b/connectonion/network/host/routes.py index c9209a1..7d3574a 100644 --- a/connectonion/network/host/routes.py +++ b/connectonion/network/host/routes.py @@ -19,7 +19,8 @@ def input_handler(create_agent: Callable, storage: SessionStorage, prompt: str, result_ttl: int, - session: dict | None = None, connection=None, images: list[str] | None = None) -> dict: + session: dict | None = None, connection=None, images: list[str] | None = None, + files: list[dict] | None = None) -> dict: """POST /input (and WebSocket /ws) Args: @@ -30,6 +31,7 @@ def input_handler(create_agent: Callable, storage: SessionStorage, prompt: str, session: Optional conversation session for continuation connection: WebSocket connection for bidirectional I/O (None for HTTP) images: Optional list of base64 data URLs for multimodal input + files: Optional list of file dicts with name and base64 data """ agent = create_agent() # Fresh instance per request agent.io = connection # WebSocket connection or None for HTTP @@ -55,7 +57,7 @@ def input_handler(create_agent: Callable, storage: SessionStorage, prompt: str, # TODO: If agent.input() throws, record stays "running" until TTL expires. # This is acceptable: client gets 500 error, record expires naturally. start = time.time() - result = agent.input(prompt, session=session, images=images) + result = agent.input(prompt, session=session, images=images, files=files) duration_ms = int((time.time() - start) * 1000) record.status = "done" @@ -89,17 +91,22 @@ def health_handler(agent_name: str, start_time: float) -> dict: return {"status": "healthy", "agent": agent_name, "uptime": int(time.time() - start_time)} -def info_handler(agent_metadata: dict, trust, trust_config: dict | None = None) -> dict: +def info_handler(agent_metadata: dict, trust, trust_config: dict | None = None, + host_config: dict | None = None) -> dict: """GET /info - Returns pre-extracted metadata including onboard requirements. + Returns pre-extracted metadata including onboard requirements and accepted inputs. Args: agent_metadata: Agent name, address, tools trust: TrustAgent instance (trust level extracted via .trust attribute) trust_config: Parsed YAML config from trust policy (optional) + host_config: Host config dict with file upload limits (optional) """ from ... import __version__ + from .config import DEFAULT_FILE_LIMITS + + file_config = host_config or DEFAULT_FILE_LIMITS result = { "name": agent_metadata["name"], @@ -108,6 +115,14 @@ def info_handler(agent_metadata: dict, trust, trust_config: dict | None = None) "model": agent_metadata.get("model", "unknown"), # Add model info "trust": trust.trust, # Extract level string from TrustAgent "version": __version__, + "accepted_inputs": { + "text": True, + "images": True, + "files": { + "max_file_size_mb": file_config.get("max_file_size", DEFAULT_FILE_LIMITS["max_file_size"]), + "max_files_per_request": file_config.get("max_files_per_request", DEFAULT_FILE_LIMITS["max_files_per_request"]), + }, + }, } # Add onboard info if available diff --git a/connectonion/network/host/server.py b/connectonion/network/host/server.py index 5d8e3c7..8c82ff4 100644 --- a/connectonion/network/host/server.py +++ b/connectonion/network/host/server.py @@ -111,7 +111,7 @@ def _extract_agent_metadata(create_agent: Callable) -> tuple[dict, object]: return metadata, sample -def _create_route_handlers(create_agent: Callable, agent_metadata: dict, result_ttl: int, trust_agent): +def _create_route_handlers(create_agent: Callable, agent_metadata: dict, result_ttl: int, trust_agent, config: dict): """Create route handler dict for ASGI app. Args: @@ -121,20 +121,25 @@ def _create_route_handlers(create_agent: Callable, agent_metadata: dict, result_ creating agents for health/info endpoints. result_ttl: How long to keep results on server in seconds trust_agent: TrustAgent instance for trust operations + config: Host config dict (includes file upload limits) """ + from .config import validate_files + agent_name = agent_metadata["name"] - def handle_input(storage, prompt, session=None, connection=None, images=None): - return input_handler(create_agent, storage, prompt, result_ttl, session, connection, images) + def handle_input(storage, prompt, session=None, connection=None, images=None, files=None): + validate_files(files, config) + return input_handler(create_agent, storage, prompt, result_ttl, session, connection, images, files) - def handle_ws_input(storage, prompt, connection, session=None, images=None): - return input_handler(create_agent, storage, prompt, result_ttl, session, connection, images) + def handle_ws_input(storage, prompt, connection, session=None, images=None, files=None): + validate_files(files, config) + return input_handler(create_agent, storage, prompt, result_ttl, session, connection, images, files) def handle_health(start_time): return health_handler(agent_name, start_time) def handle_info(trust, trust_config=None): - return info_handler(agent_metadata, trust, trust_config) + return info_handler(agent_metadata, trust, trust_config, config) def handle_admin_logs(): return admin_logs_handler(agent_name) @@ -417,7 +422,7 @@ def create_agent(): else: trust_agent = TrustAgent(trust if isinstance(trust, str) else "careful") - route_handlers = _create_route_handlers(create_agent, agent_metadata, result_ttl, trust_agent) + route_handlers = _create_route_handlers(create_agent, agent_metadata, result_ttl, trust_agent, config) # Parse trust config for /info onboard info trust_config = _parse_trust_config(trust) @@ -482,7 +487,8 @@ def create_agent(): else: trust_agent = TrustAgent(trust if isinstance(trust, str) else "careful") - route_handlers = _create_route_handlers(create_agent, agent_metadata, result_ttl, trust_agent) + from .config import DEFAULT_FILE_LIMITS + route_handlers = _create_route_handlers(create_agent, agent_metadata, result_ttl, trust_agent, DEFAULT_FILE_LIMITS) return asgi_create_app( route_handlers=route_handlers, storage=storage, diff --git a/connectonion/network/static/docs.html b/connectonion/network/static/docs.html index 5f33482..41c2168 100644 --- a/connectonion/network/static/docs.html +++ b/connectonion/network/static/docs.html @@ -182,6 +182,10 @@ Tools - +
+ Accepted Inputs + - +
@@ -201,6 +205,17 @@

Request Body

+
+ ▶ Images & Files (optional) +
+ + + + +
+
+
+
▶ Signed Request (optional)
@@ -506,6 +521,15 @@

Trust Levels

$('agent-trust').textContent = info.trust; $('agent-address').textContent = info.address || '-'; $('agent-tools').textContent = (info.tools||[]).join(', ') || '-'; + // Accepted inputs + const ai = info.accepted_inputs; + if (ai) { + const parts = []; + if (ai.text) parts.push('text'); + if (ai.images) parts.push('images'); + if (ai.files) parts.push('files (max ' + ai.files.max_file_size_mb + 'MB, ' + ai.files.max_files_per_request + ' per request)'); + $('agent-inputs').textContent = parts.join(', ') || '-'; + } // Header $('hdr-name').textContent = info.name; $('hdr-version').textContent = 'v' + info.version; @@ -589,15 +613,37 @@

Trust Levels

$('ws-curl').textContent = `websocat ${wsBase}/ws`; } + function readFileAsDataUrl(file){ + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve({ name: file.name, data: reader.result }); + reader.onerror = reject; + reader.readAsDataURL(file); + }); + } + + async function getAttachments(){ + const images = $('image-input').value.trim().split('\n').map(s=>s.trim()).filter(Boolean); + const fileEl = $('file-input'); + const files = []; + for (const f of fileEl.files) { + files.push(await readFileAsDataUrl(f)); + } + return { images: images.length ? images : undefined, files: files.length ? files : undefined }; + } + async function submitHttp(){ const from = $('from').value.trim(); const signature = $('signature').value.trim(); + const { images, files } = await getAttachments(); let body; if (from && signature){ body = { payload: JSON.parse($('payload-preview').textContent), from, signature }; } else { body = { prompt: $('prompt').value }; } + if (images) body.images = images; + if (files) body.files = files; $('http-result').textContent = 'Loading...'; const res = await fetch(buildUrl('/input'), { method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify(body)}); const data = await res.json(); @@ -622,7 +668,7 @@

Trust Levels

ws.onmessage = (ev)=>{ log(ev.data); }; } function closeWs(){ if(ws){ ws.close(); } } - function sendWs(){ if(!ws) return; const msg = { type:'INPUT', prompt: $('ws-prompt').value }; ws.send(JSON.stringify(msg)); log('→ ' + JSON.stringify(msg)); } + async function sendWs(){ if(!ws) return; const { images, files } = await getAttachments(); const msg = { type:'INPUT', prompt: $('ws-prompt').value }; if(images) msg.images=images; if(files) msg.files=files; ws.send(JSON.stringify(msg)); log('→ ' + JSON.stringify(msg)); } async function refreshSessions(){ const res = await fetch(buildUrl('/sessions')); @@ -677,6 +723,7 @@

Trust Levels

$('admin-sessions-result').textContent = JSON.stringify(data, null, 2); } + $('file-input').addEventListener('change', function(){ const names=[]; for(const f of this.files) names.push(f.name+' ('+Math.round(f.size/1024)+'KB)'); $('file-list').textContent=names.length?names.join(', '):''; }); $('prompt').addEventListener('input', updatePayloadPreview); $('from').addEventListener('input', updateCurlCommands); $('signature').addEventListener('input', updateCurlCommands); diff --git a/docs/design-decisions/file-transfer-api.md b/docs/design-decisions/file-transfer-api.md new file mode 100644 index 0000000..fc056ad --- /dev/null +++ b/docs/design-decisions/file-transfer-api.md @@ -0,0 +1,86 @@ +# Design: File Transfer API + +> **Status**: Proposal +> **Related**: File input support (base64-in-JSON via `/input`) + +## Background + +Currently, file uploads are handled inline via base64-encoded data URLs in the `/input` JSON payload (both HTTP POST and WebSocket). This works well for small files (<10MB) but has limitations for larger files and doesn't provide a way to retrieve uploaded files. + +Files are saved to `.co/uploads/{filename}` on the server and referenced via system reminders that prompt the agent to use `read_file` or other tools. + +## Problems to Solve + +### 1. Large file support + +Base64 encoding adds ~33% overhead, and embedding files in JSON payloads means the entire message must be buffered in memory. For files >10MB, a dedicated upload endpoint with streaming/multipart support would be more efficient. + +### 2. File retrieval + +There's currently no way to download or list files that were uploaded to an agent. Use cases: + +- **Admin inspection**: Check what files an agent received (debugging) +- **Agent-generated files**: Agents may create output files that users want to download +- **Audit trail**: List all files associated with a session + +### 3. File lifecycle management + +No cleanup or expiration policy exists for uploaded files. Over time, `.co/uploads/` can grow unbounded. + +## Proposed Options + +### Option A: Admin-only endpoints (minimal, recommended first step) + +``` +GET /admin/files → list uploaded files (name, size, mtime) +GET /admin/files/{filename} → download a specific file +``` + +- Protected by `OPENONION_API_KEY` header (same as existing admin endpoints) +- Read-only, no upload via these endpoints (continue using `/input` for uploads) +- Simple to implement, covers debugging and inspection use cases + +### Option B: Full file transfer API + +``` +POST /files/upload → multipart file upload, returns file ID/path +GET /files/{id} → download file by ID +GET /files → list files (with optional session filter) +DELETE /files/{id} → delete a file (admin only) +``` + +- Supports streaming/multipart uploads for large files +- File IDs instead of filenames (avoids collisions) +- Session-scoped file listing +- Separate auth: upload could be user-level, delete is admin-only + +### Option C: Hybrid approach + +Keep base64-in-JSON for small files (<5MB), add multipart endpoint for large files: + +``` +POST /input → existing flow, base64 files in JSON (small files) +POST /files/upload → multipart upload for large files, returns file reference +GET /admin/files → list/download (admin only) +``` + +The `/input` payload would accept either inline base64 data or a file reference from a prior upload. + +## Considerations + +- **Auth model**: Should file download require admin auth or can regular users download their own uploads? +- **File scoping**: Per-session vs per-agent file storage +- **Size limits**: Current default is 10MB max per file, 50MB total. Should dedicated endpoint support larger? +- **Cleanup policy**: TTL-based expiration? Manual cleanup? Session-scoped lifecycle? +- **Security**: Path traversal is already handled (`Path(name).name`), but file IDs would be safer than filenames + +## Current Implementation Reference + +- File handling: `agent.py` saves to `.co/uploads/`, adds system reminder +- Validation: `host/routes.py` `validate_files()` checks count, size, total size +- Config: `max_file_size`, `max_files`, `max_total_file_size` in host server config +- Tests: `tests/unit/test_agent.py`, `test_host_routes.py`, `test_asgi_http.py` + +## Recommendation + +Start with **Option A** (admin-only read endpoints) — it's the simplest starting point that covers debugging needs. Evolve to **Option C** if large file support becomes necessary. This follows the project philosophy: *"keep simple things simple, make complicated things possible."* diff --git a/docs/network/host.md b/docs/network/host.md index 5b8f27c..d37312a 100644 --- a/docs/network/host.md +++ b/docs/network/host.md @@ -299,12 +299,23 @@ curl -X POST http://localhost:8000/input \ "messages": [...], "trace": [...], "turn": 1 - } + }, + "images": [ // Optional - base64 data URLs + "data:image/png;base64,iVBOR..." + ], + "files": [ // Optional - base64 encoded files + { + "name": "document.pdf", + "data": "data:application/pdf;base64,JVBERi..." + } + ] } ``` **Note:** `session_id` is always generated by the server. For new conversations, omit `session`. For continuations, pass the entire `session` object from the previous response. +See [Multimodal Input](#multimodal-input-images--files) for details on sending images and files. + **Response format:** ```json { @@ -384,7 +395,7 @@ curl http://localhost:8000/health ### GET /info -Agent capabilities and metadata. +Agent capabilities and metadata, including accepted input types and file limits. ```bash curl http://localhost:8000/info @@ -396,11 +407,22 @@ curl http://localhost:8000/info "name": "translator", "address": "0x3d4017c3...", "tools": ["translate", "detect_language"], + "model": "co/gemini-2.5-pro", "trust": "careful", - "version": "0.4.1" + "version": "0.4.1", + "accepted_inputs": { + "text": true, + "images": true, + "files": { + "max_file_size_mb": 10, + "max_files_per_request": 10 + } + } } ``` +The `accepted_inputs` field tells clients what input types the agent supports and any file size limits. File limits are configured in `host.yaml` (see [host.yaml Configuration](host-config.md#file-upload-limits)). + ### GET /docs Interactive UI to test your agent in the browser. @@ -475,7 +497,9 @@ ws.send(JSON.stringify({ prompt: "Translate hello to Spanish", session: { session_id: "550e8400-e29b-41d4-a716-446655440000" // Optional: for session continuity - } + }, + images: ["data:image/png;base64,..."], // Optional: base64 data URLs + files: [{ name: "doc.pdf", data: "data:application/pdf;base64,..." }] // Optional })); ``` @@ -659,6 +683,96 @@ We chose client-managed state like Anthropic/OpenAI's Messages API, but with the --- +## Multimodal Input (Images & Files) + +Both HTTP and WebSocket endpoints accept images and files alongside text prompts. + +### Images + +Pass base64 data URLs in the `images` array: + +```bash +curl -X POST http://localhost:8000/input \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "What do you see in this image?", + "images": ["data:image/png;base64,iVBORw0KGgo..."] + }' +``` + +### Files + +Pass files as objects with `name` and base64 `data`: + +```bash +curl -X POST http://localhost:8000/input \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "Summarize this document", + "files": [ + {"name": "report.pdf", "data": "data:application/pdf;base64,JVBERi..."} + ] + }' +``` + +**How files are handled internally:** Unlike images (which are passed directly to the LLM as visual content), file data is **not** inserted into the LLM messages. Instead: + +1. The file is decoded from base64 and saved to `.co/uploads/{filename}` +2. The agent receives a system reminder with the file path, prompting it to use `read_file` or other available tools to read the contents +3. The agent's tools read the file from disk + +This means your agent needs tools that can read files (e.g. `read_file`, bash, or other file-reading tools) to process uploaded files. + +### File Size Limits + +File uploads are validated against configurable limits (default: 10MB per file, 10 files per request). Configure in `host.yaml`: + +```yaml +max_file_size: 10 # MB per file +max_files_per_request: 10 # Max files in one request +``` + +Or in code: + +```python +host(create_agent, max_file_size=50, max_files_per_request=5) +``` + +When limits are exceeded, the server returns a 400 error: + +```json +{"error": "File too large: video.mp4 (150.2MB, max: 10MB). Increase max_file_size in host.yaml"} +``` + +### Client-Side (connect) + +```python +from connectonion import connect + +agent = connect("0xaddress...") + +# Send with images +result = agent.input("Describe this", images=["data:image/png;base64,..."]) + +# Send with files +result = agent.input("Summarize", files=[ + {"name": "report.pdf", "data": "data:application/pdf;base64,..."} +]) +``` + +### Feature Parity + +Images and files work identically across all connection types: + +| Feature | HTTP POST /input | WebSocket /ws | Relay | +|---------|-----------------|---------------|-------| +| Text | `prompt` field | `prompt` field | `prompt` field | +| Images | `images` array | `images` array | `images` array | +| Files | `files` array | `files` array | `files` array | +| File validation | 400 error | ERROR message | ERROR message | + +--- + ## Project Structure When you run `host(agent)`, these files are used: diff --git a/tests/unit/test_agent.py b/tests/unit/test_agent.py index bbd67c4..6a75766 100644 --- a/tests/unit/test_agent.py +++ b/tests/unit/test_agent.py @@ -592,3 +592,136 @@ def test_agent_input_without_images_unchanged(): # Content should be a simple string when no images assert isinstance(user_message['content'], str) assert user_message['content'] == "Hello" + + +def test_agent_input_with_files(tmp_path): + """Test that agent.input() saves files to .co/uploads/ and adds system reminder.""" + mock_llm = MockLLM(responses=[ + LLMResponse( + content="I received the PDF file.", + tool_calls=[], + raw_response={}, + usage=TokenUsage(), + ) + ]) + + agent = Agent(name="file_agent", llm=mock_llm, log=False, co_dir=tmp_path / ".co") + + test_file = {"name": "report.pdf", "data": "data:application/pdf;base64,JVBERi0xLjQK"} + + result = agent.input("Analyze this document", files=[test_file]) + + assert "pdf" in result.lower() + + # Verify file was saved to disk + saved_path = tmp_path / ".co" / "uploads" / "report.pdf" + assert saved_path.exists() + assert saved_path.read_bytes() == b"%PDF-1.4\n" + + # Verify message format: text + system reminder (no raw file data) + messages = mock_llm.last_call["messages"] + user_message = [msg for msg in messages if msg['role'] == 'user'][-1] + + content = user_message['content'] + assert isinstance(content, list) + assert len(content) == 2 # 1 text + 1 system reminder + + assert content[0]['type'] == 'text' + assert content[0]['text'] == "Analyze this document" + + assert content[1]['type'] == 'text' + assert "report.pdf" in content[1]['text'] + assert "" in content[1]['text'] + assert str(saved_path) in content[1]['text'] + + +def test_agent_input_with_images_and_files(tmp_path): + """Test that agent.input() handles both images and files together.""" + mock_llm = MockLLM(responses=[ + LLMResponse( + content="I see an image and a file.", + tool_calls=[], + raw_response={}, + usage=TokenUsage(), + ) + ]) + + agent = Agent(name="multi_agent", llm=mock_llm, log=False, co_dir=tmp_path / ".co") + + test_image = "data:image/png;base64,iVBORw0KGgo" + test_file = {"name": "data.csv", "data": "data:text/csv;base64,bmFtZSxhZ2U="} + + agent.input("Analyze these", images=[test_image], files=[test_file]) + + messages = mock_llm.last_call["messages"] + user_message = [msg for msg in messages if msg['role'] == 'user'][-1] + + content = user_message['content'] + assert isinstance(content, list) + assert len(content) == 3 # 1 text + 1 image + 1 system reminder + + assert content[0]['type'] == 'text' + assert content[1]['type'] == 'image_url' + assert content[2]['type'] == 'text' + assert "data.csv" in content[2]['text'] + + # Verify file saved + assert (tmp_path / ".co" / "uploads" / "data.csv").exists() + + +def test_agent_input_with_multiple_files(tmp_path): + """Test that agent.input() saves multiple files and lists all paths in reminder.""" + mock_llm = MockLLM(responses=[ + LLMResponse( + content="I received both files.", + tool_calls=[], + raw_response={}, + usage=TokenUsage(), + ) + ]) + + agent = Agent(name="multi_file_agent", llm=mock_llm, log=False, co_dir=tmp_path / ".co") + + test_files = [ + {"name": "report.pdf", "data": "data:application/pdf;base64,JVBERi0xLjQK"}, + {"name": "data.csv", "data": "data:text/csv;base64,bmFtZSxhZ2U="}, + ] + + agent.input("Compare these documents", files=test_files) + + # Verify both files saved + assert (tmp_path / ".co" / "uploads" / "report.pdf").exists() + assert (tmp_path / ".co" / "uploads" / "data.csv").exists() + + messages = mock_llm.last_call["messages"] + user_message = [msg for msg in messages if msg['role'] == 'user'][-1] + + content = user_message['content'] + assert isinstance(content, list) + assert len(content) == 2 # 1 text + 1 system reminder with both file paths + + reminder_text = content[1]['text'] + assert "report.pdf" in reminder_text + assert "data.csv" in reminder_text + + +def test_agent_input_file_path_traversal(tmp_path): + """Test that malicious filenames with path traversal are sanitized.""" + mock_llm = MockLLM(responses=[ + LLMResponse( + content="Done.", + tool_calls=[], + raw_response={}, + usage=TokenUsage(), + ) + ]) + + agent = Agent(name="safe_agent", llm=mock_llm, log=False, co_dir=tmp_path / ".co") + + test_file = {"name": "../../etc/passwd", "data": "data:text/plain;base64,cm9vdA=="} + + agent.input("Read this", files=[test_file]) + + # File should be saved as just "passwd" inside .co/uploads/, not outside + assert (tmp_path / ".co" / "uploads" / "passwd").exists() + assert not (tmp_path / "etc").exists() diff --git a/tests/unit/test_asgi_http.py b/tests/unit/test_asgi_http.py index 5e74702..0ce1533 100644 --- a/tests/unit/test_asgi_http.py +++ b/tests/unit/test_asgi_http.py @@ -406,7 +406,7 @@ async def send(msg): handlers = { "auth": lambda data, trust, **kw: ("Hello", "0xtest", True, None), - "input": lambda storage, prompt, session: {"result": "World", "session_id": "x"}, + "input": lambda storage, prompt, session, **kw: {"result": "World", "session_id": "x"}, } await handle_http( @@ -421,6 +421,88 @@ async def send(msg): body = json.loads(sent[1]["body"]) assert body["result"] == "World" + async def test_input_endpoint_passes_images_and_files(self): + """POST /input passes images and files to handler.""" + scope = {"method": "POST", "path": "/input", "headers": []} + sent = [] + captured = {} + + async def receive(): + return { + "body": json.dumps({ + "payload": {"prompt": "Analyze", "timestamp": 123}, + "from": "0xtest", + "signature": "0xsig", + "images": ["data:image/png;base64,abc"], + "files": [{"name": "doc.pdf", "data": "data:application/pdf;base64,xyz"}], + }).encode(), + "more_body": False + } + + async def send(msg): + sent.append(msg) + + def mock_input(storage, prompt, session, **kw): + captured["images"] = kw.get("images") + captured["files"] = kw.get("files") + return {"result": "OK", "session_id": "x"} + + handlers = { + "auth": lambda data, trust, **kw: ("Analyze", "0xtest", True, None), + "input": mock_input, + } + + await handle_http( + scope, receive, send, + route_handlers=handlers, + storage=Mock(), + trust="open", + start_time=0 + ) + + assert sent[0]["status"] == 200 + assert captured["images"] == ["data:image/png;base64,abc"] + assert captured["files"] == [{"name": "doc.pdf", "data": "data:application/pdf;base64,xyz"}] + + async def test_input_endpoint_rejects_invalid_files(self): + """POST /input returns 400 when file validation fails.""" + scope = {"method": "POST", "path": "/input", "headers": []} + sent = [] + + async def receive(): + return { + "body": json.dumps({ + "payload": {"prompt": "Analyze", "timestamp": 123}, + "from": "0xtest", + "signature": "0xsig", + "files": [{"name": "big.pdf", "data": "x" * 100}], + }).encode(), + "more_body": False + } + + async def send(msg): + sent.append(msg) + + def mock_input(storage, prompt, session, **kw): + raise ValueError("File too large: big.pdf (50.0MB, max: 10MB)") + + handlers = { + "auth": lambda data, trust, **kw: ("Analyze", "0xtest", True, None), + "input": mock_input, + } + + await handle_http( + scope, receive, send, + route_handlers=handlers, + storage=Mock(), + trust="open", + start_time=0 + ) + + assert sent[0]["status"] == 400 + body = json.loads(sent[1]["body"]) + assert "File too large" in body["error"] + async def test_input_endpoint_auth_error(self): """POST /input returns 401 on auth error.""" scope = {"method": "POST", "path": "/input", "headers": []} diff --git a/tests/unit/test_host_routes.py b/tests/unit/test_host_routes.py index e299537..d632197 100644 --- a/tests/unit/test_host_routes.py +++ b/tests/unit/test_host_routes.py @@ -306,6 +306,40 @@ def test_no_onboard_without_onboard_key(self): assert "onboard" not in result + def test_returns_accepted_inputs_with_file_limits(self): + """info_handler includes accepted_inputs with file size limits.""" + metadata = { + "name": "agent", + "tools": [], + "address": "0x123", + } + mock_trust = Mock() + mock_trust.trust = "open" + + result = info_handler(metadata, mock_trust) + + accepted = result["accepted_inputs"] + assert accepted["text"] is True + assert accepted["images"] is True + assert accepted["files"]["max_file_size_mb"] == 10 + assert accepted["files"]["max_files_per_request"] == 10 + + def test_accepted_inputs_uses_custom_config(self): + """info_handler uses host_config for file limits.""" + metadata = { + "name": "agent", + "tools": [], + "address": "0x123", + } + mock_trust = Mock() + mock_trust.trust = "open" + custom_config = {"max_file_size": 50, "max_files_per_request": 5} + + result = info_handler(metadata, mock_trust, host_config=custom_config) + + assert result["accepted_inputs"]["files"]["max_file_size_mb"] == 50 + assert result["accepted_inputs"]["files"]["max_files_per_request"] == 5 + class TestAdminLogsHandler: """Test admin_logs_handler route.""" @@ -505,5 +539,50 @@ def test_admin_remove_handler(self): assert result["success"] is True +class TestValidateFiles: + """Test file upload validation from config.py.""" + + def test_accepts_valid_files(self): + """validate_files passes for files within limits.""" + from connectonion.network.host.config import validate_files + + files = [{"name": "doc.pdf", "data": "x" * 1000}] + config = {"max_file_size": 10, "max_files_per_request": 10} + validate_files(files, config) # Should not raise + + def test_rejects_oversized_file(self): + """validate_files raises ValueError for file exceeding max_file_size.""" + from connectonion.network.host.config import validate_files + + # 2MB of data, but limit is 1MB + files = [{"name": "big.pdf", "data": "x" * (2 * 1024 * 1024)}] + config = {"max_file_size": 1, "max_files_per_request": 10} + with pytest.raises(ValueError, match="File too large"): + validate_files(files, config) + + def test_rejects_too_many_files(self): + """validate_files raises ValueError when file count exceeds limit.""" + from connectonion.network.host.config import validate_files + + files = [{"name": f"file{i}.txt", "data": "x"} for i in range(5)] + config = {"max_file_size": 10, "max_files_per_request": 3} + with pytest.raises(ValueError, match="Too many files"): + validate_files(files, config) + + def test_none_files_passes(self): + """validate_files accepts None without error.""" + from connectonion.network.host.config import validate_files + + config = {"max_file_size": 10, "max_files_per_request": 10} + validate_files(None, config) # Should not raise + + def test_empty_files_passes(self): + """validate_files accepts empty list without error.""" + from connectonion.network.host.config import validate_files + + config = {"max_file_size": 10, "max_files_per_request": 10} + validate_files([], config) # Should not raise + + if __name__ == "__main__": pytest.main([__file__, "-v"])