From b1b06f2ecc3025ba76319edc9bbec8555508ec32 Mon Sep 17 00:00:00 2001 From: ianleely Date: Thu, 5 Feb 2026 08:31:44 +0000 Subject: [PATCH 1/3] feat: add AgentCore Runtime backend and deployment infrastructure Backend (new): - FastAPI server with Claude Agent SDK integration - Session management with slide detection - Unified /invocations endpoint for AgentCore routing - Permission callback system for tool execution Deployment (new): - Dockerfile for ARM64 AgentCore container - Shell scripts for ECR build/push and AgentCore deployment - Config template for AWS environment variables Frontend updates: - AgentCoreClient with JWT token authentication - Expose Cognito tokens (accessToken/idToken) in NextAuth session - Route agent chat through AgentCore when configured Infrastructure (new CDK constructs): - AgentCoreConstruct for runtime deployment - FargateNextJsService for frontend (simplified) - Updated main stack with AgentCore integration Co-Authored-By: Claude Opus 4.5 --- backend/__init__.py | 1 + backend/api/__init__.py | 13 + backend/api/invocations.py | 300 ++++ backend/api/messages.py | 195 +++ backend/api/permissions.py | 42 + backend/api/sessions.py | 239 ++++ backend/claude_system_prompt.md | 118 ++ backend/core/__init__.py | 7 + backend/core/session.py | 578 ++++++++ backend/core/session_manager.py | 457 ++++++ backend/core/slide_detector.py | 196 +++ backend/models/__init__.py | 33 + backend/models/schemas.py | 124 ++ backend/pyproject.toml | 19 + backend/server.py | 156 +++ deploy/01_build_and_push.sh | 137 ++ deploy/02_deploy_agentcore.sh | 468 +++++++ deploy/Dockerfile | 61 + deploy/config.env.template | 119 ++ deploy/serve | 35 + frontend/package.json | 2 + frontend/pnpm-lock.yaml | 221 +++ frontend/src/app/api/agent/chat/route.ts | 1231 +++++++++++------ frontend/src/env.js | 4 + frontend/src/lib/agent/agentcore-client.ts | 478 +++++++ frontend/src/server/auth.ts | 35 +- .../lib/compute/agentcore-construct.ts | 593 ++++++++ .../lib/compute/fargate-nextjs-service.ts | 456 ++++++ infrastructure/lib/slide-forge-stack.ts | 90 +- 29 files changed, 5976 insertions(+), 432 deletions(-) create mode 100644 backend/__init__.py create mode 100644 backend/api/__init__.py create mode 100644 backend/api/invocations.py create mode 100644 backend/api/messages.py create mode 100644 backend/api/permissions.py create mode 100644 backend/api/sessions.py create mode 100644 backend/claude_system_prompt.md create mode 100644 backend/core/__init__.py create mode 100644 backend/core/session.py create mode 100644 backend/core/session_manager.py create mode 100644 backend/core/slide_detector.py create mode 100644 backend/models/__init__.py create mode 100644 backend/models/schemas.py create mode 100644 backend/pyproject.toml create mode 100644 backend/server.py create mode 100755 deploy/01_build_and_push.sh create mode 100755 deploy/02_deploy_agentcore.sh create mode 100644 deploy/Dockerfile create mode 100644 deploy/config.env.template create mode 100755 deploy/serve create mode 100644 frontend/src/lib/agent/agentcore-client.ts create mode 100644 infrastructure/lib/compute/agentcore-construct.ts create mode 100644 infrastructure/lib/compute/fargate-nextjs-service.ts diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..5e426d1 --- /dev/null +++ b/backend/__init__.py @@ -0,0 +1 @@ +"""Slide Forge Backend - FastAPI server for presentation generation with AgentCore.""" diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000..8383313 --- /dev/null +++ b/backend/api/__init__.py @@ -0,0 +1,13 @@ +"""API endpoint routers.""" + +from .invocations import router as invocations_router +from .messages import router as messages_router +from .permissions import router as permissions_router +from .sessions import router as sessions_router + +__all__ = [ + "sessions_router", + "messages_router", + "permissions_router", + "invocations_router", +] diff --git a/backend/api/invocations.py b/backend/api/invocations.py new file mode 100644 index 0000000..29e996e --- /dev/null +++ b/backend/api/invocations.py @@ -0,0 +1,300 @@ +""" +Unified Invocations Endpoint for AgentCore. + +This module provides a single /invocations endpoint that routes requests +to the appropriate internal handlers based on the path and method in the +request payload. This is the pattern required by AWS Bedrock AgentCore. + +Request format: +{ + "path": "/sessions", + "method": "POST", + "payload": {...}, + "path_params": {}, + "query_params": {} +} +""" + +import json +import re +from typing import Any, Dict, Optional + +from fastapi import APIRouter, HTTPException, Request +from fastapi.responses import StreamingResponse +from pydantic import BaseModel + +from ..core import SessionManager +from ..models import ( + CreateSessionRequest, + SendMessageRequest, + SetPermissionModeRequest, +) + + +router = APIRouter() + + +class InvocationRequest(BaseModel): + """Request format for the unified /invocations endpoint.""" + path: str + method: str = "GET" + payload: Optional[Dict[str, Any]] = None + path_params: Optional[Dict[str, str]] = None + query_params: Optional[Dict[str, str]] = None + + +def get_session_manager() -> SessionManager: + """Get the global session manager instance.""" + from ..server import session_manager + return session_manager + + +def safe_json_dumps(obj: Any) -> str: + """ + Safely serialize objects to JSON, handling non-serializable objects. + + Args: + obj: Object to serialize + + Returns: + JSON string + """ + def default_handler(o): + if hasattr(o, "__dict__"): + return o.__dict__ + return str(o) + + return json.dumps(obj, default=default_handler) + + +def extract_session_id(path: str, path_params: Optional[Dict[str, str]] = None) -> Optional[str]: + """ + Extract session_id from path or path_params. + + Args: + path: The request path + path_params: Optional path parameters + + Returns: + Session ID if found + """ + # Try path_params first + if path_params and "session_id" in path_params: + return path_params["session_id"] + + # Try extracting from path + match = re.search(r"/sessions/([^/]+)", path) + if match: + return match.group(1) + + return None + + +@router.post("/invocations") +async def handle_invocation(request: InvocationRequest): + """ + Unified invocations endpoint for AgentCore. + + Routes requests to the appropriate handler based on path and method. + + Supported routes: + - POST /sessions -> Create session + - GET /sessions -> List sessions + - GET /sessions/available -> List available sessions + - GET /sessions/{session_id}/status -> Get session status + - GET /sessions/{session_id}/history -> Get session history + - POST /sessions/{session_id}/messages -> Send message (non-streaming) + - POST /sessions/{session_id}/messages/stream -> Send message (streaming) + - POST /sessions/{session_id}/interrupt -> Interrupt session + - POST /sessions/{session_id}/permission_mode -> Set permission mode + - POST /sessions/{session_id}/permissions/respond -> Respond to permission + - DELETE /sessions/{session_id} -> Close session + + Args: + request: Invocation request with path, method, and payload + + Returns: + Response from the appropriate handler + """ + path = request.path + method = request.method.upper() + payload = request.payload or {} + path_params = request.path_params or {} + query_params = request.query_params or {} + + manager = get_session_manager() + + print(f"[Invocations] Routing: {method} {path}") + print(f"[Invocations] Payload: {json.dumps(payload)[:200]}...") + + try: + # ======================================== + # Session Management Routes + # ======================================== + + # POST /sessions - Create session + if path == "/sessions" and method == "POST": + from datetime import datetime, timezone + + session_request = CreateSessionRequest(**payload) + session_id = await manager.create_session( + user_id=session_request.user_id, + resume_session_id=session_request.resume_session_id, + model=session_request.model, + cwd=session_request.cwd, + ) + return { + "session_id": session_id, + "created_at": datetime.now(timezone.utc).isoformat(), + "status": "connected", + } + + # GET /sessions - List sessions + if path == "/sessions" and method == "GET": + cwd = query_params.get("cwd") + sessions = manager.list_sessions(cwd=cwd) + return {"sessions": [s.dict() for s in sessions]} + + # GET /sessions/available - List available sessions + if path == "/sessions/available" and method == "GET": + cwd = query_params.get("cwd") + limit = int(query_params.get("limit", "20")) + offset = int(query_params.get("offset", "0")) + return manager.list_available_sessions(cwd=cwd, limit=limit, offset=offset) + + # ======================================== + # Session-specific Routes + # ======================================== + + session_id = extract_session_id(path, path_params) + + # GET /sessions/{session_id}/status - Get session status + if re.match(r"^/sessions/[^/]+/status$", path) and method == "GET": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + session = await manager.get_session(session_id) + status = session.get_status() + return status.dict() if hasattr(status, "dict") else status + + # GET /sessions/{session_id}/history - Get session history + if re.match(r"^/sessions/[^/]+/history$", path) and method == "GET": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + # Import and call the history function + from .sessions import get_session_history + cwd = query_params.get("cwd") + return await get_session_history(session_id, cwd) + + # POST /sessions/{session_id}/messages - Send message (non-streaming) + if re.match(r"^/sessions/[^/]+/messages$", path) and method == "POST": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + session = await manager.get_session(session_id) + message_request = SendMessageRequest(**payload) + return await session.send_message(message_request.message) + + # POST /sessions/{session_id}/messages/stream - Send message (streaming) + if re.match(r"^/sessions/[^/]+/messages/stream$", path) and method == "POST": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + + print(f"[Invocations] Streaming message to session {session_id}") + session = await manager.get_session(session_id) + message_request = SendMessageRequest(**payload) + + async def event_generator(): + """Generate SSE events from the agent response.""" + event_count = 0 + try: + async for event in session.send_message_stream(message_request.message): + event_count += 1 + event_type = event.get("type", "unknown") + print(f"[Invocations] Event #{event_count}: type={event_type}") + yield f"data: {safe_json_dumps(event)}\n\n" + + print(f"[Invocations] Stream complete (total events: {event_count})") + except Exception as e: + print(f"[Invocations] Stream error: {type(e).__name__}: {str(e)}") + import traceback + print(f"[Invocations] Traceback:\n{traceback.format_exc()}") + error_event = {"type": "error", "error": str(e)} + yield f"data: {safe_json_dumps(error_event)}\n\n" + + return StreamingResponse( + event_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + # POST /sessions/{session_id}/interrupt - Interrupt session + if re.match(r"^/sessions/[^/]+/interrupt$", path) and method == "POST": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + session = await manager.get_session(session_id) + await session.interrupt() + return {"status": "interrupted"} + + # POST /sessions/{session_id}/permission_mode - Set permission mode + if re.match(r"^/sessions/[^/]+/permission_mode$", path) and method == "POST": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + session = await manager.get_session(session_id) + mode_request = SetPermissionModeRequest(**payload) + await session.set_permission_mode(mode_request.mode) + return {"status": "ok", "mode": mode_request.mode} + + # POST /sessions/{session_id}/permissions/respond - Respond to permission + if re.match(r"^/sessions/[^/]+/permissions/respond$", path) and method == "POST": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + session = await manager.get_session(session_id) + request_id = payload.get("request_id") + allowed = payload.get("allowed", False) + + if allowed: + await session.grant_permission(request_id) + else: + await session.deny_permission(request_id) + + return {"status": "ok", "allowed": allowed} + + # DELETE /sessions/{session_id} - Close session + if re.match(r"^/sessions/[^/]+$", path) and method == "DELETE": + if not session_id: + raise HTTPException(status_code=400, detail="Session ID required") + await manager.close_session(session_id) + return {"status": "closed"} + + # POST /sessions/close_all - Close all sessions + if path == "/sessions/close_all" and method == "POST": + cwd = query_params.get("cwd") + sessions = manager.list_sessions(cwd=cwd) + closed_count = 0 + for session_info in sessions: + try: + await manager.close_session(session_info.session_id) + closed_count += 1 + except Exception as e: + print(f"Failed to close session {session_info.session_id}: {e}") + return {"status": "success", "closed_count": closed_count} + + # ======================================== + # Route Not Found + # ======================================== + + raise HTTPException( + status_code=404, + detail=f"Route not found: {method} {path}" + ) + + except HTTPException: + raise + except Exception as e: + print(f"[Invocations] Error handling {method} {path}: {type(e).__name__}: {str(e)}") + import traceback + print(f"[Invocations] Traceback:\n{traceback.format_exc()}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/api/messages.py b/backend/api/messages.py new file mode 100644 index 0000000..03cb080 --- /dev/null +++ b/backend/api/messages.py @@ -0,0 +1,195 @@ +""" +Message and Status Endpoints. + +Provides REST API endpoints for sending messages to sessions, +checking session status, and streaming responses with slide detection. +""" + +import json +from fastapi import APIRouter +from fastapi.responses import StreamingResponse + +from ..core import SessionManager +from ..models import ( + SendMessageRequest, + SendMessageResponse, + SessionStatus, + SetPermissionModeRequest, +) + +router = APIRouter() + + +def get_session_manager() -> SessionManager: + """Get the global session manager instance.""" + from ..server import session_manager + + return session_manager + + +def safe_json_dumps(obj): + """ + Safely serialize objects to JSON, handling non-serializable objects. + + Args: + obj: Object to serialize + + Returns: + JSON string + """ + + def default_handler(o): + if hasattr(o, "__dict__"): + return o.__dict__ + return str(o) + + return json.dumps(obj, default=default_handler) + + +@router.get("/sessions/{session_id}/status", response_model=SessionStatus) +async def get_session_status(session_id: str): + """ + Get the status of a session. + + Args: + session_id: The session ID + + Returns: + Session status including pending permissions + """ + manager = get_session_manager() + session = await manager.get_session(session_id) + return session.get_status() + + +@router.post("/sessions/{session_id}/messages", response_model=SendMessageResponse) +async def send_message(session_id: str, request: SendMessageRequest): + """ + Send a message in a session (non-streaming). + + Args: + session_id: The session ID + request: Message request + + Returns: + Assistant's response + """ + manager = get_session_manager() + session = await manager.get_session(session_id) + return await session.send_message(request.message) + + +@router.post("/sessions/{session_id}/messages/stream") +async def send_message_stream(session_id: str, request: SendMessageRequest): + """ + Send a message in a session with streaming response (SSE). + + Includes slide detection - emits 'slide_complete' events when + slides are fully generated with HTML content. + + Event types: + - start: Stream started + - status: Status update (connecting, ready, etc.) + - text: Text content from assistant + - tool_use: Tool being used by assistant + - slide_complete: A slide was fully generated (includes HTML) + - permission: Permission request from agent + - result: Final result with cost/turn info + - done: Stream completed + - error: Error occurred + + Args: + session_id: The session ID + request: Message request + + Returns: + Server-Sent Events stream with real-time updates + """ + print(f"\n[API] send_message_stream START") + print(f"[API] session_id: {session_id}") + print(f"[API] message: {request.message[:100] if isinstance(request.message, str) else request.message}") + + manager = get_session_manager() + session = await manager.get_session(session_id) + + async def event_generator(): + """Generate SSE events from the agent response.""" + event_count = 0 + try: + async for event in session.send_message_stream(request.message): + event_count += 1 + event_type = event.get("type", "unknown") + print(f"[API] Event #{event_count}: type={event_type}") + + if event_type == "text": + content_preview = event.get("content", "")[:100] + print(f"[API] text preview: {content_preview}...") + elif event_type == "slide_complete": + print(f"[API] slide_index: {event.get('slide_index')}") + print(f"[API] html length: {len(event.get('html', ''))}") + elif event_type == "tool_use": + print(f"[API] tool: {event.get('tool_name')}") + elif event_type == "result": + print(f"[API] cost_usd: {event.get('cost_usd')}") + elif event_type == "done": + print(f"[API] slides_detected: {event.get('slides_detected')}") + + # Format as SSE + yield f"data: {safe_json_dumps(event)}\n\n" + + print(f"[API] send_message_stream END (total events: {event_count})") + except Exception as e: + print(f"[API] send_message_stream ERROR: {type(e).__name__}: {str(e)}") + import traceback + + print(f"[API] Traceback:\n{traceback.format_exc()}") + error_event = { + "type": "error", + "error": str(e), + } + yield f"data: {safe_json_dumps(error_event)}\n\n" + + return StreamingResponse( + event_generator(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + +@router.post("/sessions/{session_id}/interrupt") +async def interrupt_session(session_id: str): + """ + Interrupt the current operation in a session. + + Args: + session_id: The session ID + + Returns: + Success message + """ + manager = get_session_manager() + session = await manager.get_session(session_id) + await session.interrupt() + return {"status": "interrupted"} + + +@router.post("/sessions/{session_id}/permission_mode") +async def set_permission_mode(session_id: str, request: SetPermissionModeRequest): + """ + Change the permission mode for a session. + + Args: + session_id: The session ID + request: Permission mode change request + + Returns: + Success message with new mode + """ + manager = get_session_manager() + session = await manager.get_session(session_id) + await session.set_permission_mode(request.mode) + return {"status": "ok", "mode": request.mode} diff --git a/backend/api/permissions.py b/backend/api/permissions.py new file mode 100644 index 0000000..d9a4855 --- /dev/null +++ b/backend/api/permissions.py @@ -0,0 +1,42 @@ +""" +Permission Management Endpoints. + +Provides REST API endpoints for responding to permission requests +from the agent sessions. +""" + +from fastapi import APIRouter + +from ..core import SessionManager +from ..models import PermissionResponse + +router = APIRouter() + + +def get_session_manager() -> SessionManager: + """Get the global session manager instance.""" + from ..server import session_manager + + return session_manager + + +@router.post("/sessions/{session_id}/permissions/respond") +async def respond_to_permission(session_id: str, response: PermissionResponse): + """ + Respond to a pending permission request. + + Args: + session_id: The session ID + response: Permission response (allowed, denied, with suggestions) + + Returns: + Success message + """ + manager = get_session_manager() + session = await manager.get_session(session_id) + session.respond_to_permission( + request_id=response.request_id, + allowed=response.allowed, + apply_suggestions=response.apply_suggestions, + ) + return {"status": "ok"} diff --git a/backend/api/sessions.py b/backend/api/sessions.py new file mode 100644 index 0000000..4f83d90 --- /dev/null +++ b/backend/api/sessions.py @@ -0,0 +1,239 @@ +""" +Session Management Endpoints. + +Provides REST API endpoints for session CRUD operations including +creating, listing, and closing sessions. +""" + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +from fastapi import APIRouter, HTTPException + +from ..core import SessionManager +from ..models import ( + CreateSessionRequest, + CreateSessionResponse, + ListSessionsResponse, +) + +router = APIRouter() + + +def get_session_manager() -> SessionManager: + """Get the global session manager instance.""" + from ..server import session_manager + + return session_manager + + +@router.post("/sessions", response_model=CreateSessionResponse) +async def create_session(request: CreateSessionRequest): + """ + Create a new session or resume an existing one. + + Args: + request: Session creation request + + Returns: + Session information + """ + manager = get_session_manager() + internal_session_id = await manager.create_session( + user_id=request.user_id, + resume_session_id=request.resume_session_id, + model=request.model, + cwd=request.cwd, + ) + + return CreateSessionResponse( + session_id=internal_session_id, + created_at=datetime.now(timezone.utc).isoformat(), + status="connected", + ) + + +@router.get("/sessions", response_model=ListSessionsResponse) +async def list_sessions(cwd: Optional[str] = None): + """ + List all active sessions, optionally filtered by cwd. + + Args: + cwd: Optional working directory to filter by + + Returns: + List of active sessions + """ + manager = get_session_manager() + sessions = manager.list_sessions(cwd=cwd) + return ListSessionsResponse(sessions=sessions) + + +@router.get("/sessions/available") +async def list_available_sessions( + cwd: Optional[str] = None, + limit: int = 20, + offset: int = 0, +): + """ + List all available sessions from disk, optionally filtered by cwd. + + Args: + cwd: Optional working directory to filter by + limit: Maximum number of sessions to return + offset: Number of sessions to skip + + Returns: + Dict with sessions and pagination info + """ + manager = get_session_manager() + return manager.list_available_sessions( + cwd=cwd, + limit=limit, + offset=offset, + ) + + +@router.get("/sessions/{session_id}/history") +async def get_session_history(session_id: str, cwd: Optional[str] = None): + """ + Get the conversation history for a session from disk. + + Args: + session_id: The session ID + cwd: Optional current working directory + + Returns: + Session history with messages and metadata + """ + base_dir = Path.home() / ".claude" / "projects" + + session_file = None + + # If cwd is provided, try direct lookup + if cwd: + path_key = cwd.replace("/", "-").replace("_", "-") + potential_file = base_dir / path_key / f"{session_id}.jsonl" + if potential_file.exists(): + session_file = potential_file + + # Search all project directories + if not session_file: + for project_dir in base_dir.iterdir(): + if not project_dir.is_dir(): + continue + potential_file = project_dir / f"{session_id}.jsonl" + if potential_file.exists(): + session_file = potential_file + break + + if not session_file: + raise HTTPException(status_code=404, detail="Session history not found") + + try: + messages = [] + metadata = { + "session_id": session_id, + "cwd": None, + } + + with open(session_file, encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + + try: + entry = json.loads(line) + entry_type = entry.get("type") + + # Extract metadata + if not metadata["cwd"]: + metadata["cwd"] = entry.get("cwd") + + # Process messages + if entry_type in ["user", "assistant"]: + msg_data = entry.get("message", {}) + role = msg_data.get("role") + content = msg_data.get("content") + + if isinstance(content, str): + messages.append({ + "role": role, + "content": content, + "timestamp": entry.get("timestamp"), + }) + elif isinstance(content, list): + for block in content: + if isinstance(block, dict): + block_type = block.get("type") + if block_type == "text": + messages.append({ + "role": role, + "content": block.get("text", ""), + "timestamp": entry.get("timestamp"), + }) + elif block_type == "tool_use": + messages.append({ + "type": "tool_use", + "role": role, + "tool_name": block.get("name"), + "tool_input": block.get("input"), + "timestamp": entry.get("timestamp"), + }) + except json.JSONDecodeError: + continue + + return { + "metadata": metadata, + "messages": messages, + "message_count": len(messages), + } + + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed to read session history: {str(e)}" + ) + + +@router.delete("/sessions/{session_id}") +async def close_session(session_id: str): + """ + Close a session. + + Args: + session_id: The session ID + + Returns: + Success message + """ + manager = get_session_manager() + await manager.close_session(session_id) + return {"status": "closed"} + + +@router.post("/sessions/close_all") +async def close_all_sessions(cwd: Optional[str] = None): + """ + Close all active sessions, optionally filtered by cwd. + + Args: + cwd: Optional working directory to filter + + Returns: + Number of sessions closed + """ + manager = get_session_manager() + sessions = manager.list_sessions(cwd=cwd) + + closed_count = 0 + for session_info in sessions: + try: + await manager.close_session(session_info.session_id) + closed_count += 1 + except Exception as e: + print(f"Failed to close session {session_info.session_id}: {e}") + + return {"status": "success", "closed_count": closed_count} diff --git a/backend/claude_system_prompt.md b/backend/claude_system_prompt.md new file mode 100644 index 0000000..0aa50c2 --- /dev/null +++ b/backend/claude_system_prompt.md @@ -0,0 +1,118 @@ +# Slide Forge - Presentation Generation Agent + +You are an expert presentation designer and content creator. Your role is to help users create professional, visually appealing presentation slides. + +## Core Capabilities + +1. **Outline Generation**: Create structured presentation outlines based on user topics +2. **Slide Generation**: Generate individual slide HTML with proper styling +3. **Content Enhancement**: Improve existing slide content +4. **Web Research**: Use WebSearch and WebFetch to gather relevant information when needed + +## Slide Output Format + +When generating slides, you MUST use the following format: + +``` +๐ŸŽฏSLIDE_START:{slide_number}๐ŸŽฏ + +```html-slide + + + + + + + + + +``` + +๐ŸŽฏSLIDE_END:{slide_number}๐ŸŽฏ +``` + +**Important**: +- `{slide_number}` must be a number starting from 0 +- Each slide MUST be wrapped with the emoji markers (๐ŸŽฏ) +- HTML must be inside ```html-slide code blocks +- Generate complete, self-contained HTML for each slide + +## Slide Design Guidelines + +### Layout Principles +- Use clean, professional layouts +- Maintain consistent spacing and alignment +- Limit text per slide (6-8 lines maximum) +- Use visual hierarchy with headings, subheadings, and body text + +### Typography +- Use web-safe fonts or Google Fonts +- Title: 32-48px, bold +- Subtitle: 24-32px +- Body: 18-24px +- Maintain good contrast + +### Color Usage +- Use cohesive color schemes +- Ensure readability (dark text on light backgrounds or vice versa) +- Accent colors for emphasis +- Consider accessibility + +### Visual Elements +- Use SVG icons when appropriate +- Include simple diagrams or charts when data is present +- Add visual separators between sections + +## Workflow + +1. **Understand Requirements**: Ask clarifying questions about topic, audience, and style +2. **Create Outline**: Generate a structured outline for user approval +3. **Generate Slides**: Create each slide one at a time, in order +4. **Iterate**: Refine slides based on user feedback + +## Example Slide HTML + +```html-slide + + + + + + +

Welcome to Our Presentation

+

A brief introduction to the topic at hand

+ + +``` + +## Response Guidelines + +- Be concise and focused +- Generate slides sequentially (one at a time) +- Wait for user confirmation before proceeding to the next slide +- Offer to modify slides if the user has feedback +- Use web search when factual information is needed diff --git a/backend/core/__init__.py b/backend/core/__init__.py new file mode 100644 index 0000000..5e4241d --- /dev/null +++ b/backend/core/__init__.py @@ -0,0 +1,7 @@ +"""Core session management components.""" + +from .session import AgentSession +from .session_manager import SessionManager +from .slide_detector import SlideDetector + +__all__ = ["AgentSession", "SessionManager", "SlideDetector"] diff --git a/backend/core/session.py b/backend/core/session.py new file mode 100644 index 0000000..b1121be --- /dev/null +++ b/backend/core/session.py @@ -0,0 +1,578 @@ +""" +Agent Session Management. + +This module contains the AgentSession class which represents a single +interactive session with the Claude Agent SDK, managing the client +connection, permission callbacks, and conversation state. + +Adapted from sample-claude-code-web-agent-on-bedrock-agentcore for slide generation. +""" + +import asyncio +import os +import time +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Optional + +from fastapi import HTTPException + +from claude_agent_sdk import ( + AssistantMessage, + ClaudeAgentOptions, + ClaudeSDKClient, + CLIConnectionError, + CLINotFoundError, + PermissionResultAllow, + PermissionResultDeny, + ResultMessage, + SystemMessage, + TextBlock, + ToolPermissionContext, + ToolUseBlock, + UserMessage, +) + +from ..models import MessageBlock, PermissionRequest, SendMessageResponse, SessionStatus +from .slide_detector import SlideDetector + + +def load_custom_system_prompt() -> Optional[str]: + """ + Load custom system prompt from backend/claude_system_prompt.md. + + Returns: + The content of the file if it exists, None otherwise. + """ + try: + backend_dir = Path(__file__).parent.parent + prompt_file = backend_dir / "claude_system_prompt.md" + + if prompt_file.exists(): + with open(prompt_file, encoding="utf-8") as f: + content = f.read().strip() + if content: + return content + except Exception as e: + import logging + + logging.warning(f"Failed to load custom system prompt: {e}") + + return None + + +class AgentSession: + """ + Represents a single Claude Agent session for slide generation. + + Manages the SDK client, permission callbacks, and conversation state + for one interactive session. Includes slide detection for streaming + slide HTML to clients in real-time. + """ + + def __init__( + self, + session_id: str, + user_id: Optional[str] = None, + model: Optional[str] = None, + cwd: Optional[str] = None, + ): + """ + Initialize an agent session. + + Args: + session_id: Unique session identifier + user_id: User ID for tracking + model: Optional model name (defaults to ANTHROPIC_MODEL env var) + cwd: Working directory for the session + """ + self.session_id = session_id + self.user_id = user_id + self.client: Optional[ClaudeSDKClient] = None + self.created_at = datetime.now(timezone.utc) + self.last_activity = datetime.now(timezone.utc) + self.status = "initializing" + self.message_count = 0 + + # Permission management + self.pending_permission: Optional[dict[str, Any]] = None + self.permission_event: Optional[asyncio.Event] = None + self.permission_result: Optional[Any] = None + self.permission_queue: asyncio.Queue = asyncio.Queue() + + # Session configuration + self.cwd = cwd + self.model = model or os.environ.get("ANTHROPIC_MODEL") + self.current_model = self.model + + # Slide detection + self.slide_detector = SlideDetector() + + async def connect(self, resume_session_id: Optional[str] = None): + """ + Connect the SDK client and initialize the session. + + Args: + resume_session_id: Optional session ID to resume from + """ + print(f"\n[Session] {'Resuming' if resume_session_id else 'Creating'} session {self.session_id}") + print(f"[Session] user_id: {self.user_id}") + print(f"[Session] cwd: {self.cwd}") + print(f"[Session] model: {self.model}") + + # Load custom system prompt for slide generation + custom_prompt = load_custom_system_prompt() + + # Build system prompt configuration + if custom_prompt: + system_prompt_config = { + "type": "preset", + "preset": "claude_code", + "append": custom_prompt, + } + else: + system_prompt_config = { + "type": "preset", + "preset": "claude_code", + } + + # Configure allowed tools for slide generation + allowed_tools = [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash", + "WebFetch", + "WebSearch", + ] + + options_dict = { + "allowed_tools": allowed_tools, + "system_prompt": system_prompt_config, + "max_turns": 0, + "can_use_tool": self.permission_callback, + "permission_mode": "default", + } + + if resume_session_id: + options_dict["resume"] = resume_session_id + + if self.model: + options_dict["model"] = self.model + + if self.cwd: + options_dict["cwd"] = self.cwd + + print(f"[Session] SDK options: {list(options_dict.keys())}") + + options = ClaudeAgentOptions(**options_dict) + + try: + print(f"[Session] Connecting to Claude SDK...") + self.client = ClaudeSDKClient(options=options) + await self.client.connect() + self.status = "connected" + print(f"[Session] Connected successfully") + except (CLINotFoundError, CLIConnectionError) as e: + self.status = "error" + raise HTTPException(status_code=500, detail=f"Failed to connect: {str(e)}") + + async def disconnect(self): + """Disconnect the SDK client and cleanup.""" + if self.client: + try: + await self.client.disconnect() + except RuntimeError as e: + if "cancel scope" in str(e) or "different task" in str(e): + import logging + + logging.warning( + f"Session {self.session_id}: Disconnect cleanup error (non-fatal): {e}" + ) + else: + raise + finally: + self.status = "disconnected" + + async def permission_callback( + self, tool_name: str, input_data: dict, context: ToolPermissionContext + ) -> PermissionResultAllow | PermissionResultDeny: + """ + Permission callback for tool usage. + + Auto-allows most tools for slide generation workflow. + + Args: + tool_name: Name of the tool requesting permission + input_data: Tool input parameters + context: Permission context with suggestions + + Returns: + Permission result (allow or deny) + """ + print(f"[Permission] Tool: {tool_name}") + + # Auto-allow common tools for slide generation + auto_allow_tools = [ + "Read", + "Write", + "Edit", + "Glob", + "Grep", + "Bash", + "WebSearch", + "WebFetch", + "Task", + "TodoWrite", + ] + + if tool_name in auto_allow_tools: + print(f"[Permission] Auto-allow: {tool_name}") + return PermissionResultAllow() + + # For other tools, create permission request + print(f"[Permission] Requesting user approval for: {tool_name}") + request_id = str(uuid.uuid4()) + self.pending_permission = { + "request_id": request_id, + "tool_name": tool_name, + "tool_input": input_data, + "suggestions": [ + s.__dict__ if hasattr(s, "__dict__") else s for s in context.suggestions + ], + } + + self.permission_event = asyncio.Event() + self.permission_result = None + + try: + self.permission_queue.put_nowait(self.pending_permission) + except Exception as e: + print(f"[Permission] Warning: Failed to queue permission: {e}") + + # Wait for response with timeout + try: + await asyncio.wait_for(self.permission_event.wait(), timeout=300) + except asyncio.TimeoutError: + print(f"[Permission] Timeout for: {tool_name}") + self.pending_permission = None + return PermissionResultDeny(message="Permission request timed out") + + result = self.permission_result + self.pending_permission = None + self.permission_event = None + self.permission_result = None + + return result + + def respond_to_permission( + self, request_id: str, allowed: bool, apply_suggestions: bool = False + ): + """ + Respond to a pending permission request. + + Args: + request_id: The permission request ID + allowed: Whether to allow the operation + apply_suggestions: Whether to apply permission suggestions + + Raises: + HTTPException: If no matching pending permission + """ + if ( + not self.pending_permission + or self.pending_permission["request_id"] != request_id + ): + raise HTTPException( + status_code=404, detail="No matching permission request" + ) + + if allowed: + if apply_suggestions and self.pending_permission["suggestions"]: + from claude_agent_sdk import PermissionUpdate + + suggestions = [] + for s in self.pending_permission["suggestions"]: + suggestions.append(PermissionUpdate(**s)) + + self.permission_result = PermissionResultAllow( + updated_permissions=suggestions + ) + else: + self.permission_result = PermissionResultAllow() + else: + self.permission_result = PermissionResultDeny(message="User denied") + + if self.permission_event: + self.permission_event.set() + + async def send_message(self, message: str | dict) -> SendMessageResponse: + """ + Send a message and get the response. + + Args: + message: The user's message + + Returns: + SendMessageResponse with assistant's reply + + Raises: + HTTPException: If session not connected + """ + if not self.client or self.status != "connected": + raise HTTPException(status_code=400, detail="Session not connected") + + self.last_activity = datetime.now(timezone.utc) + self.message_count += 1 + + if isinstance(message, dict): + wire_message = { + "type": "user", + "message": message, + "parent_tool_use_id": None, + "session_id": "default", + } + + async def message_stream(): + yield wire_message + + await self.client.query(message_stream()) + else: + await self.client.query(message) + + messages = [] + cost_usd = None + num_turns = None + + async for msg in self.client.receive_response(): + if isinstance(msg, AssistantMessage): + for block in msg.content: + if isinstance(block, TextBlock): + messages.append(MessageBlock(type="text", content=block.text)) + elif isinstance(block, ToolUseBlock): + messages.append( + MessageBlock( + type="tool_use", + tool_name=block.name, + tool_input=block.input, + ) + ) + elif isinstance(msg, ResultMessage): + cost_usd = msg.total_cost_usd + num_turns = msg.num_turns + + return SendMessageResponse( + messages=messages, + session_id=self.session_id, + cost_usd=cost_usd, + num_turns=num_turns, + ) + + async def send_message_stream(self, message: str | dict): + """ + Send a message and stream the response in real-time. + + Includes slide detection - emits 'slide_complete' events when + slides are fully generated. + + Args: + message: The user's message + + Yields: + Dictionary events with type and data for each step + + Raises: + HTTPException: If session not connected + """ + print(f"\n[Session] send_message_stream START") + print(f"[Session] session_id: {self.session_id}") + + if not self.client or self.status != "connected": + raise HTTPException(status_code=400, detail="Session not connected") + + self.last_activity = datetime.now(timezone.utc) + self.message_count += 1 + + # Reset slide detector for new message + self.slide_detector.reset() + + # Send initial event + yield { + "type": "start", + "session_id": self.session_id, + "message": message if isinstance(message, str) else str(message), + } + + # Send message to SDK + if isinstance(message, dict): + wire_message = { + "type": "user", + "message": message, + "parent_tool_use_id": None, + "session_id": "default", + } + + async def message_stream(): + yield wire_message + + await self.client.query(message_stream()) + else: + await self.client.query(message) + + # Track last reported permission + last_permission_id = None + response_iterator = self.client.receive_response() + sdk_done = False + real_session_id = self.session_id + + while not sdk_done: + # Check permission queue + try: + permission = self.permission_queue.get_nowait() + permission_id = permission.get("request_id") + if permission_id != last_permission_id: + yield { + "type": "permission", + "permission": permission, + } + last_permission_id = permission_id + except asyncio.QueueEmpty: + pass + + # Get next SDK message + try: + msg = await anext(response_iterator) + except StopAsyncIteration: + sdk_done = True + break + + if isinstance(msg, SystemMessage): + continue + elif isinstance(msg, UserMessage): + yield { + "type": "user_message", + "content": msg.content, + } + elif isinstance(msg, AssistantMessage): + for block in msg.content: + if isinstance(block, TextBlock): + # Feed to slide detector + new_slides = self.slide_detector.feed(block.text) + + # Emit slide_complete events for detected slides + for slide in new_slides: + yield { + "type": "slide_complete", + "slide_index": slide.index, + "html": slide.html, + "timestamp": int(time.time() * 1000), + } + print(f"[Session] Slide {slide.index} detected and streamed") + + # Also emit the text content + yield { + "type": "text", + "content": block.text, + } + elif isinstance(block, ToolUseBlock): + yield { + "type": "tool_use", + "tool_name": block.name, + "tool_input": block.input, + "tool_use_id": block.id, + } + elif isinstance(msg, ResultMessage): + real_session_id = msg.session_id if hasattr(msg, "session_id") else self.session_id + + # Update session ID if changed + if real_session_id != self.session_id: + print(f"[Session] Session ID changed: {self.session_id} -> {real_session_id}") + + yield { + "type": "result", + "cost_usd": msg.total_cost_usd, + "num_turns": msg.num_turns, + "session_id": real_session_id, + } + + # Check remaining permissions + while True: + try: + permission = self.permission_queue.get_nowait() + permission_id = permission.get("request_id") + if permission_id != last_permission_id: + yield { + "type": "permission", + "permission": permission, + } + last_permission_id = permission_id + except asyncio.QueueEmpty: + break + + # Send completion event + yield { + "type": "done", + "session_id": real_session_id, + "slides_detected": len(self.slide_detector.get_all_slides()), + } + print(f"[Session] send_message_stream END") + + async def interrupt(self): + """ + Interrupt the current operation. + + Raises: + HTTPException: If session not connected or SDK call fails + """ + if not self.client or self.status != "connected": + raise HTTPException(status_code=400, detail="Session not connected") + + try: + await self.client.interrupt() + self.last_activity = datetime.now(timezone.utc) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed to interrupt: {str(e)}" + ) + + async def set_permission_mode(self, mode: str): + """ + Change the permission mode for this session. + + Args: + mode: Permission mode ("default", "acceptEdits", "plan", "bypassPermissions") + + Raises: + HTTPException: If session not connected or SDK call fails + """ + if not self.client or self.status != "connected": + raise HTTPException(status_code=400, detail="Session not connected") + + try: + await self.client.set_permission_mode(mode) + self.last_activity = datetime.now(timezone.utc) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Failed to set permission mode: {str(e)}" + ) + + def get_status(self) -> SessionStatus: + """ + Get current session status. + + Returns: + SessionStatus object + """ + pending_perm = None + if self.pending_permission: + pending_perm = PermissionRequest(**self.pending_permission) + + return SessionStatus( + session_id=self.session_id, + status=self.status, + pending_permission=pending_perm, + current_model=self.current_model, + ) diff --git a/backend/core/session_manager.py b/backend/core/session_manager.py new file mode 100644 index 0000000..5e3cd8f --- /dev/null +++ b/backend/core/session_manager.py @@ -0,0 +1,457 @@ +""" +Session Manager. + +This module contains the SessionManager class which manages multiple +concurrent Claude Agent sessions, handling creation, restoration, +and cleanup operations. + +Adapted from sample-claude-code-web-agent-on-bedrock-agentcore for slide generation. +""" + +import json +import os +import re +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Optional + +from fastapi import HTTPException + +from ..models import SessionInfo +from .session import AgentSession + + +# System message patterns to filter out from previews +SYSTEM_MESSAGE_PATTERNS = [ + r"^", + r"^", + r"^", + r"^Caveat:", + r"^This session is being continued from a previous", +] + +SYSTEM_MESSAGE_REGEX = re.compile("|".join(SYSTEM_MESSAGE_PATTERNS)) + + +def _is_system_message(content: str) -> bool: + """Check if a message content is a system message that should be filtered.""" + if not content: + return False + return bool(SYSTEM_MESSAGE_REGEX.search(content)) + + +def _extract_text_content(content: Any) -> Optional[str]: + """Extract text content from various message content formats.""" + if isinstance(content, str): + return content + if isinstance(content, list) and len(content) > 0: + first_block = content[0] + if isinstance(first_block, dict): + return first_block.get("text", "") + if isinstance(first_block, str): + return first_block + return None + + +def _parse_jsonl_sessions(file_path: Path) -> dict[str, Any]: + """ + Parse a JSONL session file and extract session metadata. + + Returns a dict with session information including messages and metadata. + """ + sessions: dict[str, dict] = {} + + try: + with open(file_path, encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + + try: + entry = json.loads(line) + + # Handle summary entries + if entry.get("type") == "summary" and entry.get("summary"): + session_id = entry.get("sessionId") + if session_id and session_id in sessions: + sessions[session_id]["summary"] = entry["summary"] + continue + + session_id = entry.get("sessionId") + if not session_id: + continue + + if session_id not in sessions: + sessions[session_id] = { + "id": session_id, + "summary": "New Session", + "message_count": 0, + "last_activity": datetime.now(timezone.utc), + "cwd": entry.get("cwd", ""), + "last_user_message": None, + "last_assistant_message": None, + } + + session = sessions[session_id] + + # Track messages + msg = entry.get("message", {}) + role = msg.get("role") + content = msg.get("content") + + if role == "user" and content: + text_content = _extract_text_content(content) + if text_content and not _is_system_message(text_content): + session["last_user_message"] = text_content + + elif role == "assistant" and content: + if entry.get("isApiErrorMessage"): + continue + text_content = _extract_text_content(content) + if text_content and not _is_system_message(text_content): + session["last_assistant_message"] = text_content + + session["message_count"] += 1 + + if entry.get("timestamp"): + try: + session["last_activity"] = datetime.fromisoformat( + entry["timestamp"].replace("Z", "+00:00") + ) + except (ValueError, AttributeError): + pass + + except json.JSONDecodeError: + continue + + # Set final summary based on messages if no summary exists + for session in sessions.values(): + if session["summary"] == "New Session": + last_msg = session["last_user_message"] or session["last_assistant_message"] + if last_msg: + session["summary"] = last_msg[:50] + "..." if len(last_msg) > 50 else last_msg + + return {"sessions": list(sessions.values())} + + except Exception: + return {"sessions": []} + + +class SessionManager: + """ + Manages multiple concurrent Claude Agent sessions. + + Each session maintains its own SDK client, conversation history, + and permission state. Supports session creation, restoration, + and cleanup. + """ + + def __init__(self): + """Initialize the session manager.""" + self.sessions: dict[str, AgentSession] = {} + self.session_dir = Path.home() / ".claude" / "projects" + + async def create_session( + self, + user_id: Optional[str] = None, + resume_session_id: Optional[str] = None, + model: Optional[str] = None, + cwd: Optional[str] = None, + ) -> str: + """ + Create a new session or resume an existing one. + + Args: + user_id: User ID for tracking + resume_session_id: Optional session ID to resume + model: Optional model name override + cwd: Working directory for the session + + Returns: + The session ID (new or resumed) + """ + session_id = resume_session_id or str(uuid.uuid4()) + + if session_id in self.sessions: + raise HTTPException(status_code=400, detail="Session already active") + + session = AgentSession( + session_id, + user_id, + model, + cwd, + ) + await session.connect(resume_session_id) + + self.sessions[session_id] = session + + return session_id + + async def get_session( + self, + session_id: str, + auto_resume: bool = True, + user_id: Optional[str] = None, + cwd: Optional[str] = None, + ) -> AgentSession: + """ + Get an active session by ID, optionally auto-resuming if not in memory. + + Args: + session_id: The session ID + auto_resume: Whether to automatically resume session if not active + user_id: User ID for session creation + cwd: Working directory + + Returns: + The AgentSession instance + + Raises: + HTTPException: If session not found and auto_resume is disabled + """ + if session_id in self.sessions: + return self.sessions[session_id] + + if not auto_resume: + raise HTTPException(status_code=404, detail="Session not found") + + # Try to find session file on disk for resumption + print(f"[SessionManager] Session {session_id} not in memory, checking for session file...") + + session_file = None + session_cwd = None + + if self.session_dir.exists(): + for project_dir in self.session_dir.iterdir(): + if not project_dir.is_dir(): + continue + + potential_file = project_dir / f"{session_id}.jsonl" + if potential_file.exists(): + session_file = potential_file + + # Extract cwd from session file + try: + parsed = _parse_jsonl_sessions(potential_file) + if parsed["sessions"]: + session_cwd = parsed["sessions"][0].get("cwd", "") + except Exception: + pass + + break + + # Resume if session file found + if session_file: + print(f"[SessionManager] Found session file: {session_file}") + resume_cwd = session_cwd if session_cwd else cwd + + resumed_session_id = await self.create_session( + user_id=user_id, + resume_session_id=session_id, + model=os.environ.get("ANTHROPIC_MODEL"), + cwd=resume_cwd, + ) + + print(f"[SessionManager] Auto-resumed session: {resumed_session_id}") + return self.sessions[resumed_session_id] + + # Create new session + print(f"[SessionManager] No session file found, creating new session: {session_id}") + + session = AgentSession( + session_id, + user_id, + os.environ.get("ANTHROPIC_MODEL"), + cwd, + ) + + await session.connect(resume_session_id=None) + self.sessions[session_id] = session + + print(f"[SessionManager] Created new session: {session_id}") + return self.sessions[session_id] + + def update_session_id(self, old_session_id: str, new_session_id: str): + """ + Update session ID after SDK provides real session_id. + + Args: + old_session_id: Old/temporary session ID + new_session_id: New/real session ID from SDK + """ + if old_session_id not in self.sessions: + raise HTTPException(status_code=404, detail=f"Session {old_session_id} not found") + + if new_session_id in self.sessions: + return + + session = self.sessions.pop(old_session_id) + session.session_id = new_session_id + self.sessions[new_session_id] = session + print(f"[SessionManager] Updated session ID: {old_session_id} -> {new_session_id}") + + async def close_session(self, session_id: str): + """ + Close and cleanup a session. + + Args: + session_id: The session ID to close + """ + if session_id in self.sessions: + session = self.sessions[session_id] + await session.disconnect() + del self.sessions[session_id] + + def list_sessions(self, cwd: Optional[str] = None) -> list[SessionInfo]: + """ + List all active sessions, optionally filtered by cwd. + + Args: + cwd: Optional working directory to filter by + + Returns: + List of SessionInfo objects + """ + result = [] + for session_id, session in self.sessions.items(): + if cwd and session.cwd != cwd: + continue + + result.append( + SessionInfo( + session_id=session_id, + created_at=session.created_at.isoformat(), + last_activity=session.last_activity.isoformat(), + status=session.status, + message_count=session.message_count, + cwd=session.cwd, + ) + ) + return result + + def list_available_sessions( + self, + cwd: Optional[str] = None, + limit: int = 20, + offset: int = 0, + ) -> dict[str, Any]: + """ + List all available sessions (both active and persisted on disk). + + Args: + cwd: Optional working directory to filter by + limit: Maximum number of sessions to return + offset: Number of sessions to skip + + Returns: + Dict with sessions list and pagination info + """ + all_sessions: dict[str, dict] = {} + session_ids_seen: set[str] = set() + + # Add active sessions + for session_id, session in self.sessions.items(): + if cwd and session.cwd != cwd: + continue + + path_key = session.cwd.replace("/", "-").replace("_", "-") if session.cwd else "default" + + session_data = { + "id": session_id, + "summary": "Active session", + "message_count": session.message_count, + "last_activity": session.last_activity, + "cwd": session.cwd or "", + "project": path_key, + "active": True, + } + + # Try to get metadata from session file + session_file_path = self.session_dir / path_key / f"{session_id}.jsonl" + if session_file_path.exists(): + parsed = _parse_jsonl_sessions(session_file_path) + for s in parsed["sessions"]: + if s["id"] == session_id: + session_data.update({ + "summary": s["summary"], + "message_count": s["message_count"], + "last_activity": s["last_activity"], + }) + break + + all_sessions[session_id] = session_data + session_ids_seen.add(session_id) + + # Scan persisted sessions from disk + if self.session_dir.exists(): + if cwd: + path_key = cwd.replace("/", "-").replace("_", "-") + project_dirs = [self.session_dir / path_key] + else: + project_dirs = list(self.session_dir.iterdir()) + + for project_dir in project_dirs: + if not project_dir.exists() or not project_dir.is_dir(): + continue + + for session_file in project_dir.glob("*.jsonl"): + session_id = session_file.stem + + if session_id in session_ids_seen: + continue + + parsed = _parse_jsonl_sessions(session_file) + + for s in parsed["sessions"]: + if s["id"] not in all_sessions: + all_sessions[s["id"]] = { + **s, + "project": project_dir.name, + "active": False, + } + + session_ids_seen.add(session_id) + + # Sort by last activity (newest first) + visible_sessions = list(all_sessions.values()) + visible_sessions.sort( + key=lambda x: x["last_activity"] + if isinstance(x["last_activity"], datetime) + else datetime.fromisoformat(str(x["last_activity"]).replace("Z", "+00:00")), + reverse=True, + ) + + total = len(visible_sessions) + paginated = visible_sessions[offset : offset + limit] + has_more = offset + limit < total + + # Format output + result_sessions = [] + for s in paginated: + last_activity = s["last_activity"] + if isinstance(last_activity, datetime): + modified = last_activity.isoformat() + else: + modified = str(last_activity) + + result = { + "session_id": s["id"], + "modified": modified, + "preview": s.get("summary", "No preview")[:100], + "project": s.get("project", ""), + "message_count": s.get("message_count", 0), + "active": s.get("active", False), + "cwd": s.get("cwd", ""), + } + + result_sessions.append(result) + + return { + "sessions": result_sessions, + "has_more": has_more, + "total": total, + "offset": offset, + "limit": limit, + } diff --git a/backend/core/slide_detector.py b/backend/core/slide_detector.py new file mode 100644 index 0000000..e9aa64d --- /dev/null +++ b/backend/core/slide_detector.py @@ -0,0 +1,196 @@ +""" +Slide Detection Module. + +Extracts slide HTML content from agent responses using regex patterns. +Ported from the Next.js frontend implementation. +""" + +import re +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class DetectedSlide: + """Represents a detected slide from agent output.""" + + index: int + html: str + raw_content: str + + +class SlideDetector: + """ + Detects and extracts slides from agent message content. + + Uses emoji markers to identify slide boundaries and extracts + HTML content from code blocks within those boundaries. + + Patterns: + - Slide markers: SLIDE_START:N ... SLIDE_END:N (with emoji markers) + - HTML extraction: ```html-slide ... ``` or ```html ... ``` code blocks + """ + + # Pattern to match slide boundaries with emoji markers + # Format: SLIDE_START:N ... SLIDE_END:N + SLIDE_PATTERN = re.compile( + r"\U0001F3AF" # Target emoji + r"SLIDE_START:(\d+)" + r"\U0001F3AF" # Target emoji + r"([\s\S]*?)" + r"\U0001F3AF" # Target emoji + r"SLIDE_END:\1" + r"\U0001F3AF" # Target emoji + ) + + # Pattern to extract HTML from html-slide code blocks (preferred) + HTML_SLIDE_PATTERN = re.compile(r"```html-slide\s*([\s\S]*?)\s*```") + + # Fallback pattern for regular html code blocks + HTML_PATTERN = re.compile(r"```html\s*([\s\S]*?)\s*```") + + def __init__(self): + """Initialize the slide detector.""" + self._buffer = "" + self._detected_slides: dict[int, DetectedSlide] = {} + + def reset(self): + """Reset the detector state for a new message stream.""" + self._buffer = "" + self._detected_slides.clear() + + def feed(self, content: str) -> list[DetectedSlide]: + """ + Feed content to the detector and return any newly detected slides. + + Args: + content: New content to process (can be partial) + + Returns: + List of newly detected slides (may be empty) + """ + self._buffer += content + new_slides = [] + + # Find all complete slide matches + matches = list(self.SLIDE_PATTERN.finditer(self._buffer)) + + for match in matches: + slide_index = int(match.group(1)) + slide_content = match.group(2) + + # Skip if already detected + if slide_index in self._detected_slides: + continue + + # Extract HTML from the slide content + html = self._extract_html(slide_content) + + if html: + slide = DetectedSlide( + index=slide_index, + html=html, + raw_content=slide_content, + ) + self._detected_slides[slide_index] = slide + new_slides.append(slide) + + # Trim buffer to remove processed content + if matches: + last_match = matches[-1] + self._buffer = self._buffer[last_match.end() :] + + return new_slides + + def _extract_html(self, content: str) -> Optional[str]: + """ + Extract HTML from slide content. + + Tries html-slide code blocks first, then falls back to regular html blocks. + + Args: + content: The slide content to extract HTML from + + Returns: + Extracted HTML string or None if not found + """ + # Try html-slide first (preferred format) + match = self.HTML_SLIDE_PATTERN.search(content) + if match: + return match.group(1).strip() + + # Fallback to regular html blocks + match = self.HTML_PATTERN.search(content) + if match: + html = match.group(1).strip() + # Only accept if it looks like a complete HTML document or slide + if html.startswith(" list[DetectedSlide]: + """ + Get all detected slides sorted by index. + + Returns: + List of all detected slides in order + """ + return sorted(self._detected_slides.values(), key=lambda s: s.index) + + def get_slide(self, index: int) -> Optional[DetectedSlide]: + """ + Get a specific slide by index. + + Args: + index: The slide index to retrieve + + Returns: + The detected slide or None if not found + """ + return self._detected_slides.get(index) + + @classmethod + def extract_slides_from_text(cls, text: str) -> list[DetectedSlide]: + """ + One-shot extraction of all slides from complete text. + + Useful for processing complete messages rather than streams. + + Args: + text: Complete text to extract slides from + + Returns: + List of detected slides + """ + detector = cls() + detector.feed(text) + return detector.get_all_slides() + + @classmethod + def extract_html_from_message(cls, content: str) -> Optional[str]: + """ + Extract HTML from a single message content. + + Tries html-slide code blocks first, then falls back to regular html blocks. + + Args: + content: The message content to extract HTML from + + Returns: + Extracted HTML string or None if not found + """ + # Try html-slide first (preferred format) + match = cls.HTML_SLIDE_PATTERN.search(content) + if match: + return match.group(1).strip() + + # Fallback to regular html blocks + match = cls.HTML_PATTERN.search(content) + if match: + html = match.group(1).strip() + # Only accept if it looks like a complete HTML document or slide + if html.startswith("=3.12" +dependencies = [ + "boto3>=1.42.24", + "claude-agent-sdk>=0.1.19", + "fastapi>=0.120.0", + "httpx>=0.28.1", + "uvicorn>=0.38.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-asyncio>=0.23.0", +] diff --git a/backend/server.py b/backend/server.py new file mode 100644 index 0000000..492843c --- /dev/null +++ b/backend/server.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +""" +Slide Forge API Server + +A stateful API server for presentation generation using Claude Agent SDK. +Manages multiple concurrent sessions with slide detection and streaming support. + +Key Features: +- Session-based state management +- Permission callback system +- Real-time slide detection and streaming +- Multi-turn conversation support +""" + +import logging +import os +import sys +from contextlib import asynccontextmanager +from datetime import datetime, timezone + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +# ============================================================================ +# Logging Configuration +# ============================================================================ + +LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper() + +logging.basicConfig( + level=LOG_LEVEL, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], +) + +# Quiet noisy libraries +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("botocore").setLevel(logging.WARNING) +logging.getLogger("boto3").setLevel(logging.WARNING) + +logger = logging.getLogger(__name__) + +from .api import ( + invocations_router, + messages_router, + permissions_router, + sessions_router, +) +from .core import SessionManager + +# ============================================================================ +# Global Session Manager +# ============================================================================ + +session_manager = SessionManager() + + +# ============================================================================ +# FastAPI Application +# ============================================================================ + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan manager.""" + # Startup + print("=" * 80) + print("Slide Forge API Server Starting...") + print(f"Log Level: {LOG_LEVEL}") + print("=" * 80) + + logger.info("Starting Slide Forge API Server") + logger.info(f"Log level set to: {LOG_LEVEL}") + + print("=" * 80) + print("Server startup complete") + print("=" * 80) + + yield + + # Shutdown + print("Shutting down server...") + for session_id in list(session_manager.sessions.keys()): + await session_manager.close_session(session_id) + + print("Server shutdown complete") + + +app = FastAPI( + title="Slide Forge API Server", + description="Stateful API server for presentation generation with Claude Agent SDK", + version="1.0.0", + lifespan=lifespan, +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# ============================================================================ +# Register Routers +# ============================================================================ + +# Unified invocations endpoint for AgentCore +app.include_router(invocations_router, tags=["invocations"]) + +# Session management endpoints +app.include_router(sessions_router, tags=["sessions"]) + +# Message and status endpoints +app.include_router(messages_router, tags=["messages"]) + +# Permission endpoints +app.include_router(permissions_router, tags=["permissions"]) + + +# ============================================================================ +# Health Check +# ============================================================================ + + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + return { + "status": "healthy", + "active_sessions": len(session_manager.sessions), + "timestamp": datetime.now(timezone.utc).isoformat(), + } + + +@app.get("/ping") +async def ping(): + """Ping endpoint for health monitoring.""" + import time + + return { + "status": "Healthy", + "time_of_last_update": int(time.time()), + } + + +# ============================================================================ +# Main Entry Point +# ============================================================================ + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info") diff --git a/deploy/01_build_and_push.sh b/deploy/01_build_and_push.sh new file mode 100755 index 0000000..7186911 --- /dev/null +++ b/deploy/01_build_and_push.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# ============================================================================= +# Slide Forge - Build and Push Docker Image to ECR +# ============================================================================= +# Step 1: Build ARM64 Docker image and push to Amazon ECR +# +# Prerequisites: +# - AWS CLI configured with appropriate permissions +# - Docker installed and running +# - config.env file with required values +# ============================================================================= + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# Load configuration +if [ -f "${SCRIPT_DIR}/config.env" ]; then + source "${SCRIPT_DIR}/config.env" +else + echo "Error: config.env not found. Please copy config.env.template to config.env and fill in the values." + exit 1 +fi + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${GREEN}=======================================${NC}" +echo -e "${GREEN}Step 1: Build and Push Docker Image${NC}" +echo -e "${GREEN}=======================================${NC}" +echo "" + +# Auto-detect AWS Account ID if not set +if [ -z "$AWS_ACCOUNT_ID" ]; then + echo -e "${YELLOW}AWS_ACCOUNT_ID not set in config, detecting...${NC}" + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + echo -e "${GREEN}Detected AWS_ACCOUNT_ID: ${AWS_ACCOUNT_ID}${NC}" +fi + +# Auto-detect AWS Region if not set +if [ -z "$AWS_REGION" ]; then + echo -e "${YELLOW}AWS_REGION not set, using default...${NC}" + AWS_REGION=$(aws configure get region) + AWS_REGION=${AWS_REGION:-us-west-2} + echo -e "${GREEN}Using AWS_REGION: ${AWS_REGION}${NC}" +fi + +# Construct ECR URI and image name +ECR_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com" +FULL_IMAGE_NAME="${ECR_URI}/${ECR_REPOSITORY_NAME}:${DOCKER_IMAGE_VERSION}" + +echo "" +echo -e "${BLUE}Configuration:${NC}" +echo " Project Root: ${PROJECT_ROOT}" +echo " ECR Repository: ${ECR_REPOSITORY_NAME}" +echo " Image Version: ${DOCKER_IMAGE_VERSION}" +echo " Full Image URI: ${FULL_IMAGE_NAME}" +echo "" + +# ============================================================================= +# Create ECR Repository if it doesn't exist +# ============================================================================= +echo -e "${YELLOW}Checking if ECR repository exists...${NC}" +if ! aws ecr describe-repositories --region "${AWS_REGION}" --repository-names "${ECR_REPOSITORY_NAME}" > /dev/null 2>&1; then + echo -e "${YELLOW}Creating ECR repository: ${ECR_REPOSITORY_NAME}${NC}" + aws ecr create-repository \ + --region "${AWS_REGION}" \ + --repository-name "${ECR_REPOSITORY_NAME}" \ + --image-scanning-configuration scanOnPush=true \ + --tags \ + Key=Project,Value="${TAG_PROJECT}" \ + Key=Environment,Value="${TAG_ENVIRONMENT}" \ + Key=ManagedBy,Value="${TAG_MANAGED_BY}" \ + > /dev/null + echo -e "${GREEN}[OK]${NC} ECR repository created" +else + echo -e "${GREEN}[OK]${NC} ECR repository already exists" +fi + +# ============================================================================= +# Login to ECR +# ============================================================================= +echo -e "${YELLOW}Logging into ECR...${NC}" +aws ecr get-login-password --region "${AWS_REGION}" | \ + docker login --username AWS --password-stdin "${ECR_URI}" +echo -e "${GREEN}[OK]${NC} Logged into ECR" + +# ============================================================================= +# Build Docker Image for ARM64 +# ============================================================================= +echo "" +echo -e "${YELLOW}Building Docker image for ARM64 architecture...${NC}" +echo " This may take several minutes on first build..." +echo "" + +docker build \ + --platform linux/arm64 \ + -t "${FULL_IMAGE_NAME}" \ + -f "${SCRIPT_DIR}/Dockerfile" \ + "${PROJECT_ROOT}" + +echo -e "${GREEN}[OK]${NC} Docker image built (ARM64)" + +# ============================================================================= +# Push Image to ECR +# ============================================================================= +echo "" +echo -e "${YELLOW}Pushing image to ECR...${NC}" +docker push "${FULL_IMAGE_NAME}" +echo -e "${GREEN}[OK]${NC} Image pushed to ECR" + +# ============================================================================= +# Save Output for Next Step +# ============================================================================= +echo "" +echo -e "${GREEN}=======================================${NC}" +echo -e "${GREEN}Step 1 Complete!${NC}" +echo -e "${GREEN}=======================================${NC}" +echo "" +echo "Image URI: ${FULL_IMAGE_NAME}" +echo "" + +# Save output for next step +cat > "${SCRIPT_DIR}/.build_output" </dev/null || echo "") + + if [ -z "$S3_WORKSPACE_BUCKET" ] || [ "$S3_WORKSPACE_BUCKET" == "None" ]; then + echo -e "${RED}Error: Could not find S3 workspace bucket.${NC}" + echo "Please set S3_WORKSPACE_BUCKET in config.env" + exit 1 + fi +fi + +if aws s3 ls "s3://${S3_WORKSPACE_BUCKET}" &>/dev/null; then + echo -e "${GREEN}[OK]${NC} S3 bucket exists: ${S3_WORKSPACE_BUCKET}" +else + echo -e "${RED}Error: S3 bucket does not exist: ${S3_WORKSPACE_BUCKET}${NC}" + exit 1 +fi + +# ============================================================================= +# Lookup Existing Cognito Configuration from CDK Stack +# ============================================================================= +echo "" +echo -e "${YELLOW}Checking Cognito configuration...${NC}" + +STACK_NAME="${SLIDE_FORGE_STACK_NAME:-slide-forge}" + +# Function to lookup CloudFormation export +lookup_cfn_export() { + local export_name="$1" + aws cloudformation list-exports \ + --region "${AWS_REGION}" \ + --query "Exports[?Name=='${export_name}'].Value | [0]" \ + --output text 2>/dev/null || echo "" +} + +# Lookup Cognito User Pool ID if not provided +if [ -z "$COGNITO_USER_POOL_ID" ]; then + echo -e "${YELLOW}Looking up Cognito User Pool ID from CDK stack...${NC}" + + # Try CloudFormation export first + COGNITO_USER_POOL_ID=$(lookup_cfn_export "${STACK_NAME}-cognito-user-pool-id") + + # Fallback: try alternative export name + if [ -z "$COGNITO_USER_POOL_ID" ] || [ "$COGNITO_USER_POOL_ID" == "None" ]; then + COGNITO_USER_POOL_ID=$(lookup_cfn_export "${STACK_NAME}-user-pool-id") + fi + + # Fallback: try stack outputs directly + if [ -z "$COGNITO_USER_POOL_ID" ] || [ "$COGNITO_USER_POOL_ID" == "None" ]; then + COGNITO_USER_POOL_ID=$(aws cloudformation describe-stacks \ + --stack-name "${STACK_NAME}" \ + --region "${AWS_REGION}" \ + --query "Stacks[0].Outputs[?contains(OutputKey, 'UserPoolId') || contains(OutputKey, 'CognitoUserPoolId')].OutputValue | [0]" \ + --output text 2>/dev/null || echo "") + fi + + if [ -z "$COGNITO_USER_POOL_ID" ] || [ "$COGNITO_USER_POOL_ID" == "None" ]; then + echo -e "${RED}Error: Could not find Cognito User Pool ID.${NC}" + echo "Please set COGNITO_USER_POOL_ID in config.env" + exit 1 + fi +fi + +# Lookup Cognito Client ID if not provided +if [ -z "$COGNITO_CLIENT_ID" ]; then + echo -e "${YELLOW}Looking up Cognito Client ID from CDK stack...${NC}" + + # Try CloudFormation export first + COGNITO_CLIENT_ID=$(lookup_cfn_export "${STACK_NAME}-cognito-client-id") + + # Fallback: try stack outputs directly + if [ -z "$COGNITO_CLIENT_ID" ] || [ "$COGNITO_CLIENT_ID" == "None" ]; then + COGNITO_CLIENT_ID=$(aws cloudformation describe-stacks \ + --stack-name "${STACK_NAME}" \ + --region "${AWS_REGION}" \ + --query "Stacks[0].Outputs[?contains(OutputKey, 'ClientId') || contains(OutputKey, 'CognitoClientId')].OutputValue | [0]" \ + --output text 2>/dev/null || echo "") + fi + + if [ -z "$COGNITO_CLIENT_ID" ] || [ "$COGNITO_CLIENT_ID" == "None" ]; then + echo -e "${RED}Error: Could not find Cognito Client ID.${NC}" + echo "Please set COGNITO_CLIENT_ID in config.env" + exit 1 + fi +fi + +# Set Cognito region (usually same as AWS_REGION) +COGNITO_REGION="${COGNITO_REGION:-${AWS_REGION}}" + +# Construct discovery URL +COGNITO_DISCOVERY_URL="https://cognito-idp.${COGNITO_REGION}.amazonaws.com/${COGNITO_USER_POOL_ID}/.well-known/openid-configuration" + +echo -e "${GREEN}[OK]${NC} Using existing Cognito from slide-forge stack" +echo " User Pool ID: ${COGNITO_USER_POOL_ID}" +echo " Client ID: ${COGNITO_CLIENT_ID}" +echo " Discovery URL: ${COGNITO_DISCOVERY_URL}" + +# ============================================================================= +# Create IAM Execution Role +# ============================================================================= +echo "" +echo -e "${YELLOW}Checking IAM execution role...${NC}" + +FULL_ROLE_NAME="${IAM_ROLE_NAME}-${AWS_REGION}-${DEPLOYMENT_ENV:-prod}" + +if aws iam get-role --role-name "${FULL_ROLE_NAME}" &>/dev/null; then + ROLE_ARN=$(aws iam get-role --role-name "${FULL_ROLE_NAME}" --query 'Role.Arn' --output text) + echo -e "${GREEN}[OK]${NC} IAM role already exists: ${ROLE_ARN}" +else + echo -e "${YELLOW}Creating IAM role: ${FULL_ROLE_NAME}${NC}" + + # Create trust policy for AgentCore + cat > /tmp/trust-policy.json < /tmp/role-policy.json </dev/null || echo "") + +# Prepare environment variables +ENV_VARS="AWS_DEFAULT_REGION=${AWS_REGION}" +[ -n "${ANTHROPIC_MODEL}" ] && ENV_VARS="${ENV_VARS},ANTHROPIC_MODEL=${ANTHROPIC_MODEL}" +[ -n "${ANTHROPIC_SMALL_FAST_MODEL}" ] && ENV_VARS="${ENV_VARS},ANTHROPIC_SMALL_FAST_MODEL=${ANTHROPIC_SMALL_FAST_MODEL}" +[ -n "${ANTHROPIC_DEFAULT_HAIKU_MODEL}" ] && ENV_VARS="${ENV_VARS},ANTHROPIC_DEFAULT_HAIKU_MODEL=${ANTHROPIC_DEFAULT_HAIKU_MODEL}" +[ -n "${DISABLE_PROMPT_CACHING}" ] && ENV_VARS="${ENV_VARS},DISABLE_PROMPT_CACHING=${DISABLE_PROMPT_CACHING}" +[ -n "${CLAUDE_CODE_USE_BEDROCK}" ] && ENV_VARS="${ENV_VARS},CLAUDE_CODE_USE_BEDROCK=${CLAUDE_CODE_USE_BEDROCK}" +ENV_VARS="${ENV_VARS},S3_WORKSPACE_BUCKET=${S3_WORKSPACE_BUCKET}" + +# Prepare authorizer configuration (using existing Cognito) +AUTHORIZER_CONFIG="customJWTAuthorizer={discoveryUrl=${COGNITO_DISCOVERY_URL},allowedClients=[${COGNITO_CLIENT_ID}]}" + +if [ -n "$EXISTING_RUNTIME" ]; then + echo -e "${YELLOW}Updating existing AgentCore Runtime: ${EXISTING_RUNTIME}${NC}" + + aws bedrock-agentcore-control update-agent-runtime \ + --agent-runtime-id "${EXISTING_RUNTIME}" \ + --region "${AWS_REGION}" \ + --agent-runtime-artifact "containerConfiguration={containerUri=${DOCKER_IMAGE_URI}}" \ + --network-configuration "networkMode=PUBLIC" \ + --role-arn "${ROLE_ARN}" \ + --request-header-configuration "requestHeaderAllowlist=[Authorization]" \ + --environment-variables "${ENV_VARS}" \ + --authorizer-configuration "${AUTHORIZER_CONFIG}" \ + --output json > /tmp/runtime-output.json + + RUNTIME_ID="${EXISTING_RUNTIME}" + echo -e "${GREEN}[OK]${NC} AgentCore Runtime updated" +else + echo -e "${YELLOW}Creating new AgentCore Runtime: ${RUNTIME_NAME}${NC}" + + aws bedrock-agentcore-control create-agent-runtime \ + --agent-runtime-name "${RUNTIME_NAME}" \ + --region "${AWS_REGION}" \ + --agent-runtime-artifact "containerConfiguration={containerUri=${DOCKER_IMAGE_URI}}" \ + --network-configuration "networkMode=PUBLIC" \ + --role-arn "${ROLE_ARN}" \ + --request-header-configuration "requestHeaderAllowlist=[Authorization]" \ + --environment-variables "${ENV_VARS}" \ + --authorizer-configuration "${AUTHORIZER_CONFIG}" \ + --output json > /tmp/runtime-output.json + + RUNTIME_ID=$(jq -r '.agentRuntimeId' /tmp/runtime-output.json) + echo -e "${GREEN}[OK]${NC} AgentCore Runtime created: ${RUNTIME_ID}" +fi + +# Extract runtime details +RUNTIME_ARN=$(jq -r '.agentRuntimeArn' /tmp/runtime-output.json) +WORKLOAD_IDENTITY_ARN=$(jq -r '.workloadIdentityDetails.workloadIdentityArn // "N/A"' /tmp/runtime-output.json) +STATUS=$(jq -r '.status' /tmp/runtime-output.json) + +# Construct Runtime URL +ENCODED_ARN=$(echo "${RUNTIME_ARN}" | sed 's/:/%3A/g' | sed 's/\//%2F/g') +RUNTIME_URL="https://bedrock-agentcore.${AWS_REGION}.amazonaws.com/runtimes/${ENCODED_ARN}" + +rm /tmp/runtime-output.json + +# ============================================================================= +# Save Outputs +# ============================================================================= +cat > "${SCRIPT_DIR}/.agentcore_output" <> /root/.bashrc && \ + echo "[ -f /app/backend/.venv/bin/activate ] && source /app/backend/.venv/bin/activate" >> /root/.bashrc && \ + # Create workspace directory for agent sessions + mkdir -p /workspace && \ + # Clean up apt cache + rm -rf /var/lib/apt/lists/* + +# Copy backend application code (pyproject.toml is in backend/) +COPY backend/ /app/backend/ + +# Copy serve script +COPY deploy/serve /app/serve +RUN chmod +x /app/serve + +# Set working directory to backend where pyproject.toml lives +WORKDIR /app/backend + +# Install Python dependencies using uv +# Note: pyproject.toml is in backend/, so we run uv sync there +RUN uv sync --frozen || uv sync + +# Set working directory back to /app for runtime +WORKDIR /app + +# Expose port 8080 (AgentCore standard port) +EXPOSE 8080 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8080/health || exit 1 + +# Run the server on port 8080 +CMD ["/app/serve"] diff --git a/deploy/config.env.template b/deploy/config.env.template new file mode 100644 index 0000000..8245e8f --- /dev/null +++ b/deploy/config.env.template @@ -0,0 +1,119 @@ +# ============================================================================= +# Slide Forge - AgentCore Deployment Configuration +# ============================================================================= +# Copy this file to config.env and fill in the required values. +# Required fields are marked with [REQUIRED] +# ============================================================================= + +# ============================================================================= +# AWS Configuration +# ============================================================================= +# AWS region for deployment (must support Bedrock AgentCore) +AWS_REGION=us-west-2 + +# AWS Account ID - Leave empty to auto-detect from AWS credentials +# [OPTIONAL] Will be detected automatically if not set +AWS_ACCOUNT_ID= + +# ============================================================================= +# Project Configuration +# ============================================================================= +PROJECT_NAME=slide-forge +DEPLOYMENT_ENV=prod + +# ============================================================================= +# Docker/ECR Configuration +# ============================================================================= +# ECR repository name for the AgentCore container image +ECR_REPOSITORY_NAME=slide-forge/agentcore + +# Docker image version tag +DOCKER_IMAGE_VERSION=latest + +# Note: Docker images are built for ARM64 architecture (required by Bedrock AgentCore) + +# ============================================================================= +# AgentCore Runtime Configuration +# ============================================================================= +# Name for the AgentCore runtime (must be unique per account/region) +AGENT_RUNTIME_NAME=slide_forge + +# IAM role name prefix for AgentCore execution +IAM_ROLE_NAME=SlideForgeAgentCoreRuntime + +# ============================================================================= +# Existing Slide-Forge CDK Stack Resources +# ============================================================================= +# These values should match your deployed slide-forge CDK stack + +# [REQUIRED] CDK Stack name (used to lookup existing Cognito resources) +# This is the stack name used when you deployed the slide-forge CDK stack +# Example: slide-forge-dev, slide-forge-prod +SLIDE_FORGE_STACK_NAME=slide-forge + +# [REQUIRED] Existing S3 bucket for workspace storage +# This should be the uploads bucket created by the CDK stack +# Example: slide-forge-uploads-us-west-2-123456789012 +S3_WORKSPACE_BUCKET= + +# ============================================================================= +# Cognito/Auth Configuration (for AgentCore JWT authorizer) +# ============================================================================= +# Use existing Cognito from slide-forge CDK stack +# +# Option 1: Auto-lookup from CDK stack (recommended) +# Leave the values below empty and the script will lookup from CloudFormation exports +# +# Option 2: Manual configuration +# Provide the values directly if you know them + +# Cognito User Pool ID from existing slide-forge stack +# Example: us-west-2_aBcDeFgHi +COGNITO_USER_POOL_ID= + +# Cognito App Client ID from existing slide-forge stack +COGNITO_CLIENT_ID= + +# Cognito OIDC Discovery URL (auto-generated if User Pool ID is provided) +# Format: https://cognito-idp.{region}.amazonaws.com/{userPoolId}/.well-known/openid-configuration +COGNITO_DISCOVERY_URL= + +# Cognito region (usually same as AWS_REGION) +COGNITO_REGION= + +# ============================================================================= +# Bedrock Model Configuration +# ============================================================================= +# Primary model for slide generation (Sonnet recommended for quality) +ANTHROPIC_MODEL=us.anthropic.claude-sonnet-4-20250514-v1:0 + +# Fast model for quick tasks (Haiku recommended for speed) +ANTHROPIC_SMALL_FAST_MODEL=us.anthropic.claude-haiku-4-20250514-v1:0 +ANTHROPIC_DEFAULT_HAIKU_MODEL=us.anthropic.claude-haiku-4-20250514-v1:0 + +# Disable prompt caching (0=enabled, 1=disabled) +DISABLE_PROMPT_CACHING=0 + +# Use AWS Bedrock for Claude API (required for AgentCore) +CLAUDE_CODE_USE_BEDROCK=1 + +# ============================================================================= +# Claude Agent SDK Tools Configuration +# ============================================================================= +# ALLOWED_TOOLS: Comma-separated list of tools to enable in agent sessions +# Leave empty to enable all tools (default) +# Available tools: Read, Write, Edit, Glob, Grep, Bash, NotebookEdit, WebFetch, +# Task, TodoWrite, BashOutput, KillShell, AskUserQuestion, +# Skill, SlashCommand, ExitPlanMode, ListMcpResourcesTool, ReadMcpResourceTool +# ALLOWED_TOOLS=Read,Write,Edit,Glob,Grep,Bash + +# AUTO_ALLOW_TOOLS: Comma-separated list of tools to auto-approve without user permission +# Leave empty to use default (Read, Write, Edit, Bash, Glob, Grep) +# AUTO_ALLOW_TOOLS=Read,Glob,Grep + +# ============================================================================= +# Tags (for resource organization) +# ============================================================================= +TAG_ENVIRONMENT=production +TAG_PROJECT=slide-forge +TAG_MANAGED_BY=deployment-script diff --git a/deploy/serve b/deploy/serve new file mode 100755 index 0000000..4dd7ae2 --- /dev/null +++ b/deploy/serve @@ -0,0 +1,35 @@ +#!/bin/bash +# ============================================================================= +# Slide Forge AgentCore Startup Script +# ============================================================================= +# Starts the FastAPI server using uvicorn on port 8080 +# Logs are written to both stdout and a timestamped log file +# ============================================================================= + +set -e + +# Change to app directory (parent of backend module) +cd /app/ + +# Create logs directory if it doesn't exist +mkdir -p logs + +# Add backend to Python path and activate virtual environment +export PYTHONPATH="/app:$PYTHONPATH" +source /app/backend/.venv/bin/activate + +# Generate log filename with timestamp +LOG_FILE="logs/server_$(date +%Y%m%d_%H%M%S).log" + +echo "========================================" +echo "Slide Forge AgentCore Server Starting..." +echo "========================================" +echo "Working directory: $(pwd)" +echo "Log file: $LOG_FILE" +echo "Port: 8080" +echo "========================================" + +# Run server with output to both stdout and log file +# Using 'tee' to duplicate output stream +# Note: backend.server is the module path from /app/ +python -m uvicorn backend.server:app --host 0.0.0.0 --port 8080 2>&1 | tee "$LOG_FILE" diff --git a/frontend/package.json b/frontend/package.json index 650535f..0058970 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -26,6 +26,7 @@ "@ariakit/react": "^0.4.17", "@aws-sdk/client-bedrock-runtime": "^3.982.0", "@aws-sdk/client-s3": "^3.982.0", + "@aws-sdk/credential-providers": "^3.983.0", "@dnd-kit/core": "^6.3.1", "@dnd-kit/sortable": "^10.0.0", "@dnd-kit/utilities": "^3.2.2", @@ -103,6 +104,7 @@ "@uploadthing/react": "^7.3.1", "@use-gesture/react": "^10.3.1", "ai": "^4.3.19", + "aws4fetch": "^1.0.20", "bcryptjs": "^3.0.3", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index abd130b..b5180ce 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -43,6 +43,9 @@ importers: '@aws-sdk/client-s3': specifier: ^3.982.0 version: 3.982.0 + '@aws-sdk/credential-providers': + specifier: ^3.983.0 + version: 3.983.0 '@dnd-kit/core': specifier: ^6.3.1 version: 6.3.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) @@ -274,6 +277,9 @@ importers: ai: specifier: ^4.3.19 version: 4.3.19(react@19.1.0)(zod@3.25.76) + aws4fetch: + specifier: ^1.0.20 + version: 1.0.20 bcryptjs: specifier: ^3.0.3 version: 3.0.3 @@ -695,6 +701,14 @@ packages: resolution: {integrity: sha512-PCe3TYV/2kAG5F9ZhZL7CKdKHMzAnXtERI0MdANfCr1PhmuMop2xqSN0A16wYzHsINbeJZZV8GNoshx9W53xSA==} engines: {node: '>=20.0.0'} + '@aws-sdk/client-cognito-identity@3.980.0': + resolution: {integrity: sha512-nLgMW2drTzv+dTo3ORCcotQPcrUaTQ+xoaDTdSaUXdZO7zbbVyk7ysE5GDTnJdZWcUjHOSB8xfNQhOTTNVPhFw==} + engines: {node: '>=20.0.0'} + + '@aws-sdk/client-cognito-identity@3.983.0': + resolution: {integrity: sha512-ZbDx0koMsnj6wDH1BGKcbsO5DB34XfJB8/u/WNIyqQp04LXqXTcLCV1TgflRIyJ6RwYxsssic2mQ8HfZPGRqEg==} + engines: {node: '>=20.0.0'} + '@aws-sdk/client-s3@3.982.0': resolution: {integrity: sha512-k0ANYAtPiON9BwLXcDgJXkmmCAGEuSk2pZOvrMej2kNhs3xTXoPshIUR5UMCD9apYiWtXJJfXMZSgaME+iWNaQ==} engines: {node: '>=20.0.0'} @@ -711,6 +725,10 @@ packages: resolution: {integrity: sha512-ThlLhTqX68jvoIVv+pryOdb5coP1cX1/MaTbB9xkGDCbWbsqQcLqzPxuSoW1DCnAAIacmXCWpzUNOB9pv+xXQw==} engines: {node: '>=20.0.0'} + '@aws-sdk/credential-provider-cognito-identity@3.972.3': + resolution: {integrity: sha512-dW/DqTk90XW7hIngqntAVtJJyrkS51wcLhGz39lOMe0TlSmZl+5R/UGnAZqNbXmWuJHLzxe+MLgagxH41aTsAQ==} + engines: {node: '>=20.0.0'} + '@aws-sdk/credential-provider-env@3.972.4': resolution: {integrity: sha512-/8dnc7+XNMmViEom2xsNdArQxQPSgy4Z/lm6qaFPTrMFesT1bV3PsBhb19n09nmxHdrtQskYmViddUIjUQElXg==} engines: {node: '>=20.0.0'} @@ -743,6 +761,10 @@ packages: resolution: {integrity: sha512-hIzw2XzrG8jzsUSEatehmpkd5rWzASg5IHUfA+m01k/RtvfAML7ZJVVohuKdhAYx+wV2AThLiQJVzqn7F0khrw==} engines: {node: '>=20.0.0'} + '@aws-sdk/credential-providers@3.983.0': + resolution: {integrity: sha512-G2nmPoHdEhLJMae0Y4CpkR5OlsQKUXAi7LNLUOZfNMFCstPQfI6uEHqTmKT9EyrbQkD3Y+rAbRTxTt3FMm+B4A==} + engines: {node: '>=20.0.0'} + '@aws-sdk/eventstream-handler-node@3.972.4': resolution: {integrity: sha512-LPIN505kUqL3xwtoGYgYkctkUUuVUD4pzZfSo+CahavNft+zty5xWYWhKfnZOKBkYCMUl2Hl/9mkoPeYwxfQvQ==} engines: {node: '>=20.0.0'} @@ -799,6 +821,10 @@ packages: resolution: {integrity: sha512-VVkaH27digrJfdVrT64rjkllvOp4oRiZuuJvrylLXAKl18ujToJR7AqpDldL/LS63RVne3QWIpkygIymxFtliQ==} engines: {node: '>=20.0.0'} + '@aws-sdk/nested-clients@3.983.0': + resolution: {integrity: sha512-4bUzDkJlSPwfegO23ZSBrheuTI8UyAgNzptm1K6fZAIOIc1vnFl12TonecbssAfmM0/UdyTn5QDomwEfIdmJkQ==} + engines: {node: '>=20.0.0'} + '@aws-sdk/region-config-resolver@3.972.3': resolution: {integrity: sha512-v4J8qYAWfOMcZ4MJUyatntOicTzEMaU7j3OpkRCGGFSL2NgXQ5VbxauIyORA+pxdKZ0qQG2tCQjQjZDlXEC3Ow==} engines: {node: '>=20.0.0'} @@ -819,10 +845,18 @@ packages: resolution: {integrity: sha512-VkykWbqMjlSgBFDyrY3nOSqupMc6ivXuGmvci6Q3NnLq5kC+mKQe2QBZ4nrWRE/jqOxeFP2uYzLtwncYYcvQDg==} engines: {node: '>=20.0.0'} + '@aws-sdk/util-endpoints@3.980.0': + resolution: {integrity: sha512-AjKBNEc+rjOZQE1HwcD9aCELqg1GmUj1rtICKuY8cgwB73xJ4U/kNyqKKpN2k9emGqlfDY2D8itIp/vDc6OKpw==} + engines: {node: '>=20.0.0'} + '@aws-sdk/util-endpoints@3.982.0': resolution: {integrity: sha512-M27u8FJP7O0Of9hMWX5dipp//8iglmV9jr7R8SR8RveU+Z50/8TqH68Tu6wUWBGMfXjzbVwn1INIAO5lZrlxXQ==} engines: {node: '>=20.0.0'} + '@aws-sdk/util-endpoints@3.983.0': + resolution: {integrity: sha512-t/VbL2X3gvDEjC4gdySOeFFOZGQEBKwa23pRHeB7hBLBZ119BB/2OEFtTFWKyp3bnMQgxpeVeGS7/hxk6wpKJw==} + engines: {node: '>=20.0.0'} + '@aws-sdk/util-format-url@3.972.3': resolution: {integrity: sha512-n7F2ycckcKFXa01vAsT/SJdjFHfKH9s96QHcs5gn8AaaigASICeME8WdUL9uBp8XV/OVwEt8+6gzn6KFUgQa8g==} engines: {node: '>=20.0.0'} @@ -3135,6 +3169,9 @@ packages: resolution: {integrity: sha512-0bDNnY/u6pPwHDMoF0FieU354oBi0a8rD9FcsLwzcGWbc8KS8KPIi7y+s13OlVY+gMWc/9xEMUgNE6Qm8ZllYQ==} engines: {node: '>=4'} + aws4fetch@1.0.20: + resolution: {integrity: sha512-/djoAN709iY65ETD6LKCtyyEI04XIBP5xVvfmNxsEP0uJB5tyaGBztSryRr4HqMStr9R06PisQE7m9zDTXKu6g==} + axios@1.12.2: resolution: {integrity: sha512-vMJzPewAlRyOgxV2dU0Cuz2O8zzzx9VYtbJOaBgXFeLc4IV/Eg50n4LowmehOOR61S8ZMpc2K5Sa7g6A4jfkUw==} @@ -6268,6 +6305,94 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/client-cognito-identity@3.980.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.973.6 + '@aws-sdk/credential-provider-node': 3.972.5 + '@aws-sdk/middleware-host-header': 3.972.3 + '@aws-sdk/middleware-logger': 3.972.3 + '@aws-sdk/middleware-recursion-detection': 3.972.3 + '@aws-sdk/middleware-user-agent': 3.972.6 + '@aws-sdk/region-config-resolver': 3.972.3 + '@aws-sdk/types': 3.973.1 + '@aws-sdk/util-endpoints': 3.980.0 + '@aws-sdk/util-user-agent-browser': 3.972.3 + '@aws-sdk/util-user-agent-node': 3.972.4 + '@smithy/config-resolver': 4.4.6 + '@smithy/core': 3.22.1 + '@smithy/fetch-http-handler': 5.3.9 + '@smithy/hash-node': 4.2.8 + '@smithy/invalid-dependency': 4.2.8 + '@smithy/middleware-content-length': 4.2.8 + '@smithy/middleware-endpoint': 4.4.13 + '@smithy/middleware-retry': 4.4.30 + '@smithy/middleware-serde': 4.2.9 + '@smithy/middleware-stack': 4.2.8 + '@smithy/node-config-provider': 4.3.8 + '@smithy/node-http-handler': 4.4.9 + '@smithy/protocol-http': 5.3.8 + '@smithy/smithy-client': 4.11.2 + '@smithy/types': 4.12.0 + '@smithy/url-parser': 4.2.8 + '@smithy/util-base64': 4.3.0 + '@smithy/util-body-length-browser': 4.2.0 + '@smithy/util-body-length-node': 4.2.1 + '@smithy/util-defaults-mode-browser': 4.3.29 + '@smithy/util-defaults-mode-node': 4.2.32 + '@smithy/util-endpoints': 3.2.8 + '@smithy/util-middleware': 4.2.8 + '@smithy/util-retry': 4.2.8 + '@smithy/util-utf8': 4.2.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-cognito-identity@3.983.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.973.6 + '@aws-sdk/credential-provider-node': 3.972.5 + '@aws-sdk/middleware-host-header': 3.972.3 + '@aws-sdk/middleware-logger': 3.972.3 + '@aws-sdk/middleware-recursion-detection': 3.972.3 + '@aws-sdk/middleware-user-agent': 3.972.6 + '@aws-sdk/region-config-resolver': 3.972.3 + '@aws-sdk/types': 3.973.1 + '@aws-sdk/util-endpoints': 3.983.0 + '@aws-sdk/util-user-agent-browser': 3.972.3 + '@aws-sdk/util-user-agent-node': 3.972.4 + '@smithy/config-resolver': 4.4.6 + '@smithy/core': 3.22.1 + '@smithy/fetch-http-handler': 5.3.9 + '@smithy/hash-node': 4.2.8 + '@smithy/invalid-dependency': 4.2.8 + '@smithy/middleware-content-length': 4.2.8 + '@smithy/middleware-endpoint': 4.4.13 + '@smithy/middleware-retry': 4.4.30 + '@smithy/middleware-serde': 4.2.9 + '@smithy/middleware-stack': 4.2.8 + '@smithy/node-config-provider': 4.3.8 + '@smithy/node-http-handler': 4.4.9 + '@smithy/protocol-http': 5.3.8 + '@smithy/smithy-client': 4.11.2 + '@smithy/types': 4.12.0 + '@smithy/url-parser': 4.2.8 + '@smithy/util-base64': 4.3.0 + '@smithy/util-body-length-browser': 4.2.0 + '@smithy/util-body-length-node': 4.2.1 + '@smithy/util-defaults-mode-browser': 4.3.29 + '@smithy/util-defaults-mode-node': 4.2.32 + '@smithy/util-endpoints': 3.2.8 + '@smithy/util-middleware': 4.2.8 + '@smithy/util-retry': 4.2.8 + '@smithy/util-utf8': 4.2.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/client-s3@3.982.0': dependencies: '@aws-crypto/sha1-browser': 5.2.0 @@ -6392,6 +6517,16 @@ snapshots: '@smithy/types': 4.12.0 tslib: 2.8.1 + '@aws-sdk/credential-provider-cognito-identity@3.972.3': + dependencies: + '@aws-sdk/client-cognito-identity': 3.980.0 + '@aws-sdk/types': 3.973.1 + '@smithy/property-provider': 4.2.8 + '@smithy/types': 4.12.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/credential-provider-env@3.972.4': dependencies: '@aws-sdk/core': 3.973.6 @@ -6496,6 +6631,31 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/credential-providers@3.983.0': + dependencies: + '@aws-sdk/client-cognito-identity': 3.983.0 + '@aws-sdk/core': 3.973.6 + '@aws-sdk/credential-provider-cognito-identity': 3.972.3 + '@aws-sdk/credential-provider-env': 3.972.4 + '@aws-sdk/credential-provider-http': 3.972.6 + '@aws-sdk/credential-provider-ini': 3.972.4 + '@aws-sdk/credential-provider-login': 3.972.4 + '@aws-sdk/credential-provider-node': 3.972.5 + '@aws-sdk/credential-provider-process': 3.972.4 + '@aws-sdk/credential-provider-sso': 3.972.4 + '@aws-sdk/credential-provider-web-identity': 3.972.4 + '@aws-sdk/nested-clients': 3.983.0 + '@aws-sdk/types': 3.973.1 + '@smithy/config-resolver': 4.4.6 + '@smithy/core': 3.22.1 + '@smithy/credential-provider-imds': 4.2.8 + '@smithy/node-config-provider': 4.3.8 + '@smithy/property-provider': 4.2.8 + '@smithy/types': 4.12.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/eventstream-handler-node@3.972.4': dependencies: '@aws-sdk/types': 3.973.1 @@ -6660,6 +6820,49 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/nested-clients@3.983.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.973.6 + '@aws-sdk/middleware-host-header': 3.972.3 + '@aws-sdk/middleware-logger': 3.972.3 + '@aws-sdk/middleware-recursion-detection': 3.972.3 + '@aws-sdk/middleware-user-agent': 3.972.6 + '@aws-sdk/region-config-resolver': 3.972.3 + '@aws-sdk/types': 3.973.1 + '@aws-sdk/util-endpoints': 3.983.0 + '@aws-sdk/util-user-agent-browser': 3.972.3 + '@aws-sdk/util-user-agent-node': 3.972.4 + '@smithy/config-resolver': 4.4.6 + '@smithy/core': 3.22.1 + '@smithy/fetch-http-handler': 5.3.9 + '@smithy/hash-node': 4.2.8 + '@smithy/invalid-dependency': 4.2.8 + '@smithy/middleware-content-length': 4.2.8 + '@smithy/middleware-endpoint': 4.4.13 + '@smithy/middleware-retry': 4.4.30 + '@smithy/middleware-serde': 4.2.9 + '@smithy/middleware-stack': 4.2.8 + '@smithy/node-config-provider': 4.3.8 + '@smithy/node-http-handler': 4.4.9 + '@smithy/protocol-http': 5.3.8 + '@smithy/smithy-client': 4.11.2 + '@smithy/types': 4.12.0 + '@smithy/url-parser': 4.2.8 + '@smithy/util-base64': 4.3.0 + '@smithy/util-body-length-browser': 4.2.0 + '@smithy/util-body-length-node': 4.2.1 + '@smithy/util-defaults-mode-browser': 4.3.29 + '@smithy/util-defaults-mode-node': 4.2.32 + '@smithy/util-endpoints': 3.2.8 + '@smithy/util-middleware': 4.2.8 + '@smithy/util-retry': 4.2.8 + '@smithy/util-utf8': 4.2.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/region-config-resolver@3.972.3': dependencies: '@aws-sdk/types': 3.973.1 @@ -6698,6 +6901,14 @@ snapshots: dependencies: tslib: 2.8.1 + '@aws-sdk/util-endpoints@3.980.0': + dependencies: + '@aws-sdk/types': 3.973.1 + '@smithy/types': 4.12.0 + '@smithy/url-parser': 4.2.8 + '@smithy/util-endpoints': 3.2.8 + tslib: 2.8.1 + '@aws-sdk/util-endpoints@3.982.0': dependencies: '@aws-sdk/types': 3.973.1 @@ -6706,6 +6917,14 @@ snapshots: '@smithy/util-endpoints': 3.2.8 tslib: 2.8.1 + '@aws-sdk/util-endpoints@3.983.0': + dependencies: + '@aws-sdk/types': 3.973.1 + '@smithy/types': 4.12.0 + '@smithy/url-parser': 4.2.8 + '@smithy/util-endpoints': 3.2.8 + tslib: 2.8.1 + '@aws-sdk/util-format-url@3.972.3': dependencies: '@aws-sdk/types': 3.973.1 @@ -9160,6 +9379,8 @@ snapshots: attr-accept@2.2.5: {} + aws4fetch@1.0.20: {} + axios@1.12.2: dependencies: follow-redirects: 1.15.11 diff --git a/frontend/src/app/api/agent/chat/route.ts b/frontend/src/app/api/agent/chat/route.ts index f5b383d..d65ac8c 100644 --- a/frontend/src/app/api/agent/chat/route.ts +++ b/frontend/src/app/api/agent/chat/route.ts @@ -1,464 +1,905 @@ /** - * Agent ๅฏน่ฏ API - * ๅค„็†็”จๆˆทๆถˆๆฏ๏ผŒไฝฟ็”จ Claude Agent SDK ็”Ÿๆˆๅ“ๅบ” - * ๆ”ฏๆŒๆตๅผๅ“ๅบ”๏ผˆServer-Sent Events๏ผ‰ + * Agent Chat API + * Handles user messages and generates responses using Claude Agent + * Supports streaming responses (Server-Sent Events) * - * ไผ˜ๅŒ–: - * - ็ซ‹ๅณๅปบ็ซ‹ SSE ่ฟžๆŽฅ๏ผˆ< 1 ็ง’ๅ“ๅบ”๏ผ‰ - * - ไฝฟ็”จไผš่ฏๆฑ ่Žทๅ–้ข„็ƒญ็š„ Agent ๅฎžไพ‹๏ผˆ< 5 ็ง’ๅฐฑ็ปช๏ผ‰ - * - ๅ‘้€่ฟ›ๅบฆๆถˆๆฏๅ’Œๅฟƒ่ทณไฟๆŒ่ฟžๆŽฅ + * Architecture: + * - If AGENTCORE_RUNTIME_URL is configured: Forwards to AgentCore Runtime backend + * - Otherwise: Falls back to local Agent SDK (legacy mode) * - * ๅ‚่€ƒ: claude-agent-sdk-demos/simple-chatapp + * The frontend remains a thin proxy that: + * 1. Authenticates the user + * 2. Manages session metadata in S3 + * 3. Forwards requests to AgentCore + * 4. Detects slides in streaming responses and emits slide_complete events + * + * Reference: claude-agent-sdk-demos/simple-chatapp */ import { auth } from "@/server/auth"; -import { agentService } from "@/lib/agent/agent-service"; import { sessionManager } from "@/lib/agent/session-manager"; import { NextResponse } from "next/server"; -import type { ChatRequest, Message } from "@/lib/agent/types"; +import { type ChatRequest, type Message } from "@/lib/agent/types"; import { extractSlidesFromMessages } from "@/lib/agent/utils/extract-slides"; +import { createAgentCoreClient } from "@/lib/agent/agentcore-client"; +import { env } from "@/env"; // Configure route timeout for long-running agent operations export const maxDuration = 180; // 3 minutes (matches CloudFront timeout) -export async function POST(req: Request) { - try { - // 1. ้ชŒ่ฏ็”จๆˆท่บซไปฝ - const session = await auth(); - if (!session?.user?.id) { - return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } +/** + * Check if AgentCore backend is available + */ +function isAgentCoreEnabled(): boolean { + return !!env.AGENTCORE_RUNTIME_URL; +} - // 2. ่งฃๆž่ฏทๆฑ‚ - const { message, sessionId, files, enableWebSearch = true } = (await req.json()) as ChatRequest; +/** + * Handle chat request using AgentCore Runtime backend + */ +async function handleAgentCoreChat( + _req: Request, + userId: string, + chatRequest: ChatRequest, + accessToken: string | undefined +): Promise { + const { message, sessionId, files, enableWebSearch = true } = chatRequest; + + // Check for valid token + if (!accessToken) { + throw new Error("No authentication token available. Please sign in again."); + } - if (!message || !sessionId) { - return NextResponse.json( - { error: "Missing required fields: message, sessionId" }, - { status: 400 }, - ); - } + // Get or create session in S3 (metadata storage) + let dbSession = await sessionManager.getSession(sessionId, userId); - // 3. ่Žทๅ–ๆˆ–ๅˆ›ๅปบไผš่ฏๆ•ฐๆฎ - let dbSession = await sessionManager.getSession( + if (!dbSession) { + console.log( + `[Agent Chat] Session ${sessionId} not found, creating new session` + ); + dbSession = await sessionManager.createSessionWithId( sessionId, - session.user.id, + userId, + "New Agent Session" ); + } - // โœจ ๅฆ‚ๆžœ session ไธๅญ˜ๅœจ๏ผŒ่‡ชๅŠจๅˆ›ๅปบ - if (!dbSession) { - console.log(`[Agent Chat] Session ${sessionId} not found, creating new session`); + // Construct full message with file contents + let fullMessage = message; + if (files && files.length > 0) { + const filesText = files + .map((f) => `File: ${f.name}\nContent:\n${f.content}`) + .join("\n\n"); + fullMessage += `\n\nUploaded files:\n${filesText}`; + } - // ไฝฟ็”จ่‡ชๅฎšไน‰ sessionId ๅˆ›ๅปบ session - dbSession = await sessionManager.createSessionWithId( - sessionId, - session.user.id, - "New Agent Session" - ); - } + // Get existing messages for later saving + const sessionMessages = ( + Array.isArray(dbSession.messages) ? dbSession.messages : [] + ) as unknown as Message[]; - // 4. ๆž„้€ ๅฎŒๆ•ด็š„ๆถˆๆฏ๏ผˆๅŒ…ๅซๆ–‡ไปถๅ†…ๅฎน๏ผ‰ - let fullMessage = message; - if (files && files.length > 0) { - const filesText = files - .map((f) => `File: ${f.name}\nContent:\n${f.content}`) - .join("\n\n"); - fullMessage += `\n\nUploaded files:\n${filesText}`; - } + // Create AgentCore client with JWT token provider + const agentCoreClient = createAgentCoreClient(async () => accessToken); - // 5. ๅ‡†ๅค‡ไผš่ฏๆ•ฐๆฎๅ’Œ้…็ฝฎ - const sessionMessages = (Array.isArray(dbSession.messages) ? dbSession.messages : []) as unknown as Message[]; - const isNewSession = sessionMessages.length === 0; - - // ๆ นๆฎ enableWebSearch ้…็ฝฎ Agent ๅทฅๅ…ท - const agentConfig = { - allowedTools: enableWebSearch - ? ["Read", "Glob", "Grep", "WebSearch", "WebFetch"] - : ["Read", "Glob", "Grep"], // ็ฆ็”จๆœ็ดขๆ—ถๆŽ’้™ค WebSearch ๅ’Œ WebFetch - }; - - // 6. โœ… ็ซ‹ๅณๅปบ็ซ‹ SSE ๆต๏ผˆๆ ธๅฟƒไผ˜ๅŒ–๏ผšๅœจ Agent ๅˆๅง‹ๅŒ–ไน‹ๅ‰๏ผ‰ - const stream = new ReadableStream({ - async start(controller) { - const encoder = new TextEncoder(); - let fullResponse = ""; - let responseComplete = false; - let heartbeatInterval: NodeJS.Timeout | null = null; - - // ่พ…ๅŠฉๅ‡ฝๆ•ฐ๏ผšๅ‘้€ SSE ๆถˆๆฏ - const sendSSE = (type: string, data: any) => { - try { - const message = JSON.stringify({ type, ...data }); - controller.enqueue(encoder.encode(`data: ${message}\n\n`)); - } catch (err) { - console.error('[Agent Chat] Failed to send SSE:', err); - // ๅฎขๆˆท็ซฏๅฏ่ƒฝๅทฒๆ–ญๅผ€่ฟžๆŽฅ - throw err; - } - }; + if (!agentCoreClient) { + throw new Error("AgentCore client not configured"); + } + // Create SSE stream + const stream = new ReadableStream({ + async start(controller) { + const encoder = new TextEncoder(); + let fullResponse = ""; + let heartbeatInterval: NodeJS.Timeout | null = null; + + // Helper to send SSE messages + const sendSSE = (type: string, data: Record) => { try { - // โœ… ๆญฅ้ชค 1: ็ซ‹ๅณๅ‘้€่ฟžๆŽฅ็Šถๆ€๏ผˆ< 1 ็ง’๏ผ‰ - sendSSE('status', { - status: 'connecting', - message: 'Establishing connection...' - }); + const message = JSON.stringify({ type, ...data }); + controller.enqueue(encoder.encode(`data: ${message}\n\n`)); + } catch (err) { + console.error("[Agent Chat] Failed to send SSE:", err); + throw err; + } + }; - // โœ… ๆญฅ้ชค 2: ๅฏๅŠจๅฟƒ่ทณๆœบๅˆถ๏ผˆๆฏ 15 ็ง’๏ผ‰ - heartbeatInterval = setInterval(() => { - try { - sendSSE('heartbeat', { timestamp: Date.now() }); - } catch (err) { - // ๅฎขๆˆท็ซฏๆ–ญๅผ€๏ผŒๆธ…็†่ต„ๆบ - if (heartbeatInterval) clearInterval(heartbeatInterval); - } - }, 15000); + try { + // Step 1: Send connecting status + sendSSE("status", { + status: "connecting", + message: "Establishing connection...", + }); - // โœ… ๆญฅ้ชค 3: ไปŽๆฑ ไธญ่Žทๅ– Agent๏ผˆ3-5 ็ง’๏ผŒๅŽๅฐ่ฟ›่กŒ๏ผ‰ - sendSSE('status', { - status: 'initializing_agent', - message: 'Preparing AI agent...' + // Step 2: Start heartbeat + heartbeatInterval = setInterval(() => { + try { + sendSSE("heartbeat", { timestamp: Date.now() }); + } catch { + if (heartbeatInterval) clearInterval(heartbeatInterval); + } + }, 15000); + + // Step 3: Create or resume AgentCore session + sendSSE("status", { + status: "initializing_agent", + message: "Preparing AI agent...", + }); + + // Get or create AgentCore session + // Use the database session's sdkSessionId if available for resume + const sdkSessionId = dbSession.sdkSessionId; + const agentCoreSession = await agentCoreClient.createSession({ + user_id: userId, + resume_session_id: sdkSessionId || undefined, + }); + + console.log( + `[Agent Chat] Using AgentCore session: ${agentCoreSession.session_id} for db session: ${sessionId}` + ); + + // Update the database session with the AgentCore session ID if new + if (!sdkSessionId || sdkSessionId !== agentCoreSession.session_id) { + await sessionManager.updateSession(sessionId, userId, { + sdkSessionId: agentCoreSession.session_id, }); + } - // โœ… ไฝฟ็”จ agentService ๆŒ‰ๆ•ฐๆฎๅบ“ sessionId ่Žทๅ–ๆˆ–ๅˆ›ๅปบ Agent - // ๅŒไธ€ไธช sessionId ไผšๅค็”จๅŒไธ€ไธช Agent instance๏ผˆไฟ็•™ๅކๅฒ๏ผ‰ - // ไธๅŒ sessionId ไฝฟ็”จไธๅŒ Agent instance๏ผˆๅฎŒๅ…จ้š”็ฆป๏ผ‰ - // โœ… SDK resumeๆ”ฏๆŒ๏ผšๅฆ‚ๆžœๆ•ฐๆฎๅบ“ไธญๆœ‰sdkSessionId๏ผŒไผš่‡ชๅŠจๆขๅคไผš่ฏ - const agentSession = await agentService.getOrCreateSession(sessionId, agentConfig); + // Step 4: Send ready status + sendSSE("status", { + status: "ready", + message: "Agent ready, processing your request...", + }); + + // Step 5: Stream message through AgentCore + // Slide detection buffer + let slideBuffer = ""; + const detectedSlides = new Map< + number, + { index: number; html: string } + >(); + + /** + * Process incoming text for slide detection + * Uses emoji markers to detect complete slides + */ + const processTextForSlides = (text: string) => { + slideBuffer += text; + + const slideRegex = + /๐ŸŽฏSLIDE_START:(\d+)๐ŸŽฏ([\s\S]*?)๐ŸŽฏSLIDE_END:\1๐ŸŽฏ/g; + const matches: Array<{ + slideIndex: number; + slideContent: string; + matchEnd: number; + }> = []; + let match; + + while ((match = slideRegex.exec(slideBuffer)) !== null) { + matches.push({ + slideIndex: parseInt(match[1]!, 10), + slideContent: match[2]!, + matchEnd: match.index + match[0].length, + }); + } - console.log(`[Agent Chat] Using agent session for database sessionId: ${sessionId}, SDK sessionId: ${agentSession.sdkSessionId || 'new'}`); + // Process matched slides + for (const matchData of matches) { + const htmlMatch = matchData.slideContent.match( + /```html-slide\s*([\s\S]*?)\s*```/ + ); + if (htmlMatch?.[1]) { + const slideHTML = htmlMatch[1].trim(); + + detectedSlides.set(matchData.slideIndex, { + index: matchData.slideIndex, + html: slideHTML, + }); + + // Emit slide_complete event + sendSSE("slide_complete", { + slideIndex: matchData.slideIndex, + html: slideHTML, + timestamp: Date.now(), + }); + + console.log( + `[Agent Chat] Slide ${matchData.slideIndex} streamed successfully` + ); + } + } - // โœ… ๆญฅ้ชค 4: ๅ‘้€ๅฐฑ็ปช็Šถๆ€ - sendSSE('status', { - status: 'ready', - message: 'Agent ready, processing your request...' - }); + // Clear processed content from buffer + if (matches.length > 0) { + const lastMatchEnd = matches[matches.length - 1]!.matchEnd; + slideBuffer = slideBuffer.substring(lastMatchEnd); + console.log( + `[Agent Chat] Processed ${matches.length} slides, buffer remaining: ${slideBuffer.length} chars` + ); + } + }; - // โœ… ๆญฅ้ชค 5: ่ฎพ็ฝฎ Agent ็›‘ๅฌๅ™จ๏ผˆๅขžๅผบๅนป็ฏ็‰‡ๆตๅผๆฃ€ๆต‹๏ผ‰ - // ๅนป็ฏ็‰‡ๆฃ€ๆต‹buffer - let slideBuffer = ""; - // โœ… ๆ”ถ้›†ๆ‰€ๆœ‰ๆฃ€ๆต‹ๅˆฐ็š„ๅนป็ฏ็‰‡๏ผˆ็”จไบŽๆ•ฐๆฎๅบ“ๅŒๆญฅ๏ผ‰ - const detectedSlides = new Map(); - - // ๅˆ›ๅปบ็›‘ๅฌๅ™จ - const listener = (chunk: any) => { - try { - // ๅค„็†ไธๅŒ็ฑปๅž‹็š„ๆถˆๆฏ - if (chunk.type === "assistant") { - const content = chunk.message?.content; - - if (typeof content === "string") { - fullResponse += content; - slideBuffer += content; // ็ดฏ็งฏๅˆฐๅนป็ฏ็‰‡buffer - - // ๐ŸŽฏ ๆฃ€ๆต‹ๅฎŒๆ•ดๅนป็ฏ็‰‡๏ผˆไฝฟ็”จ emoji ๆ ‡่ฎฐ๏ผ‰ - const slideRegex = /๐ŸŽฏSLIDE_START:(\d+)๐ŸŽฏ([\s\S]*?)๐ŸŽฏSLIDE_END:\1๐ŸŽฏ/g; - const matches: Array<{ - slideIndex: number; - slideContent: string; - fullMatch: string; - matchEnd: number; - }> = []; - let match; - - // โœ… ๅ…ˆๆ”ถ้›†ๆ‰€ๆœ‰ๅŒน้…๏ผŒไธไฟฎๆ”น buffer - while ((match = slideRegex.exec(slideBuffer)) !== null) { - matches.push({ - slideIndex: parseInt(match[1]!), - slideContent: match[2]!, - fullMatch: match[0], - matchEnd: match.index + match[0].length, - }); - } + // Stream the message + for await (const event of agentCoreClient.sendMessageStream( + agentCoreSession.session_id, + { + message: fullMessage, + enable_web_search: enableWebSearch, + } + )) { + // Handle different event types + switch (event.type) { + case "text": + if (event.content) { + fullResponse += event.content; + processTextForSlides(event.content); + + // Forward text to client + sendSSE("assistant_message", { + content: event.content, + }); + } + break; + + case "tool_use": + sendSSE("tool_use", { + toolName: event.tool_name, + toolInput: event.tool_input, + }); + break; + + case "slide_complete": + // AgentCore already detected a slide - forward and track it + if ( + event.slide_index !== undefined && + event.html + ) { + detectedSlides.set(event.slide_index, { + index: event.slide_index, + html: event.html, + }); + + sendSSE("slide_complete", { + slideIndex: event.slide_index, + html: event.html, + timestamp: event.timestamp || Date.now(), + }); + } + break; + + case "result": + sendSSE("result", { + success: true, + cost_usd: event.cost_usd, + num_turns: event.num_turns, + }); + break; + + case "done": + // Stream complete - close connection + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + + console.log( + `[Agent Chat] SSE stream closed for session ${sessionId}` + ); + + // Background: Save messages and slides to database + const updatedMessages: Message[] = [ + ...sessionMessages, + { role: "user" as const, content: message, timestamp: new Date() }, + { + role: "assistant" as const, + content: fullResponse, + timestamp: new Date(), + }, + ]; + + // Async save (don't block stream close) + Promise.race([ + (async () => { + try { + const updatedSession = await sessionManager.updateMessages( + sessionId, + userId, + updatedMessages + ); + + // Sync detected slides to database + if (detectedSlides.size > 0 && updatedSession) { + const existingSlides = + (updatedSession.slides as unknown[]) || []; + const existingSlidesMap = new Map(); + + if (Array.isArray(existingSlides)) { + existingSlides.forEach((slide: unknown) => { + const s = slide as { index?: number }; + if (s && typeof s.index === "number") { + existingSlidesMap.set(s.index, slide); + } + }); + } + + // Merge new slides + detectedSlides.forEach((data, index) => { + const existingSlide = existingSlidesMap.get(index) as + | { + outlineContent?: string; + modificationCount?: number; + conversationHistory?: unknown[]; + } + | undefined; + existingSlidesMap.set(index, { + id: `slide-${index}`, + index, + html: data.html, + status: "ready" as const, + outlineContent: + existingSlide?.outlineContent || `Slide ${index + 1}`, + modificationCount: + (existingSlide?.modificationCount || 0) + + (existingSlide ? 1 : 0), + conversationHistory: + existingSlide?.conversationHistory || [], + }); + }); - // โœ… ๅค„็†ๆ‰€ๆœ‰ๅŒน้…็š„ๅนป็ฏ็‰‡ - for (const matchData of matches) { - // ไปŽๅ†…ๅฎนไธญๆๅ– HTML๏ผˆๅŽป้™ค ```html-slide ๆ ‡่ฎฐ๏ผ‰ - const htmlMatch = matchData.slideContent.match(/```html-slide\s*([\s\S]*?)\s*```/); - if (htmlMatch && htmlMatch[1]) { - const slideHTML = htmlMatch[1].trim(); - - // โœ… ๆ”ถ้›†ๅˆฐ detectedSlides๏ผˆ็”จไบŽๆ•ฐๆฎๅบ“ๅŒๆญฅ๏ผ‰ - detectedSlides.set(matchData.slideIndex, { - index: matchData.slideIndex, - html: slideHTML, + const mergedSlidesArray = Array.from( + existingSlidesMap.values() + ).sort((a, b) => { + const aIdx = (a as { index: number }).index; + const bIdx = (b as { index: number }).index; + return aIdx - bIdx; }); - // ๐Ÿ“ค ็ซ‹ๅณๅ‘้€ๅนป็ฏ็‰‡ๅฎŒๆˆไบ‹ไปถ - sendSSE('slide_complete', { - slideIndex: matchData.slideIndex, - html: slideHTML, - timestamp: Date.now(), + const existingWorkflowState = + updatedSession.workflowState as Record; + const updatedWorkflowState = { + ...existingWorkflowState, + slides: mergedSlidesArray, + currentSlideIndex: mergedSlidesArray.length - 1, + totalSlides: mergedSlidesArray.length, + lastModifiedAt: new Date(), + }; + + await sessionManager.updateSession(sessionId, userId, { + slides: mergedSlidesArray as unknown, + workflowState: updatedWorkflowState as unknown, }); - console.log(`[Agent Chat] Slide ${matchData.slideIndex} streamed successfully`); + console.log( + `[Agent Chat] Synced ${mergedSlidesArray.length} slides to database (${detectedSlides.size} new/updated)` + ); + } else if (updatedSession) { + // Fallback: extract slides from messages + const extractedSlides = + extractSlidesFromMessages(updatedMessages); + if (extractedSlides.length > 0) { + const existingWorkflowState = + updatedSession.workflowState as Record; + const updatedWorkflowState = { + ...existingWorkflowState, + slides: extractedSlides, + currentSlideIndex: extractedSlides.length - 1, + totalSlides: extractedSlides.length, + lastModifiedAt: new Date(), + }; + + await sessionManager.updateSession(sessionId, userId, { + slides: extractedSlides as unknown, + workflowState: updatedWorkflowState as unknown, + }); + + console.log( + `[Agent Chat] Synced ${extractedSlides.length} slides using message extraction` + ); + } } + } catch (syncError) { + console.error( + `[Agent Chat] Background sync failed:`, + syncError + ); } + })(), + new Promise((_, reject) => + setTimeout( + () => reject(new Error("Database sync timeout")), + 10000 + ) + ), + ]).catch((timeoutError) => { + console.error( + `[Agent Chat] Database sync timeout or failed:`, + timeoutError + ); + }); + + return; // Exit the stream processing + + case "error": + sendSSE("error", { + content: event.error || "Unknown error", + }); + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + return; + + case "status": + sendSSE("status", { + status: event.status, + message: event.message, + }); + break; + + default: + // Forward unknown events as-is + sendSSE(event.type, event as unknown as Record); + } + } - // โœ… ๆธ…้™คๆ‰€ๆœ‰ๅทฒๅค„็†็š„ๅนป็ฏ็‰‡๏ผˆไปŽๆœ€ๅŽไธ€ไธชๅŒน้…ไฝ็ฝฎๅผ€ๅง‹ไฟ็•™๏ผ‰ - if (matches.length > 0) { - const lastMatchEnd = matches[matches.length - 1]!.matchEnd; - slideBuffer = slideBuffer.substring(lastMatchEnd); - console.log(`[Agent Chat] Processed ${matches.length} slides, buffer remaining: ${slideBuffer.length} chars`); - } + // If stream ends without done event + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + } catch (error) { + console.error("[Agent Chat] Error in AgentCore stream:", error); + sendSSE("error", { + content: + error instanceof Error ? error.message : "Failed to process request", + }); + + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + } + }, + }); + + return new Response(stream, { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache, no-transform", + Connection: "keep-alive", + "X-Accel-Buffering": "no", + }, + }); +} - // ๅ‘้€ๆ–‡ๆœฌๅ†…ๅฎน๏ผˆ็”จไบŽๅฏน่ฏๆ˜พ็คบ๏ผ‰ - const data = JSON.stringify({ - type: "assistant_message", - content, +/** + * Handle chat request using local Agent SDK (legacy fallback) + */ +async function handleLocalAgentChat( + _req: Request, + userId: string, + chatRequest: ChatRequest +): Promise { + // Import legacy dependencies only when needed + const { agentService } = await import("@/lib/agent/agent-service"); + + const { message, sessionId, files, enableWebSearch = true } = chatRequest; + + // Get or create session + let dbSession = await sessionManager.getSession(sessionId, userId); + + if (!dbSession) { + console.log( + `[Agent Chat] Session ${sessionId} not found, creating new session` + ); + dbSession = await sessionManager.createSessionWithId( + sessionId, + userId, + "New Agent Session" + ); + } + + // Construct full message with file contents + let fullMessage = message; + if (files && files.length > 0) { + const filesText = files + .map((f) => `File: ${f.name}\nContent:\n${f.content}`) + .join("\n\n"); + fullMessage += `\n\nUploaded files:\n${filesText}`; + } + + // Prepare session data and config + const sessionMessages = ( + Array.isArray(dbSession.messages) ? dbSession.messages : [] + ) as unknown as Message[]; + + // Configure agent tools based on enableWebSearch + const agentConfig = { + allowedTools: enableWebSearch + ? ["Read", "Glob", "Grep", "WebSearch", "WebFetch"] + : ["Read", "Glob", "Grep"], + }; + + // Create SSE stream (legacy implementation) + const stream = new ReadableStream({ + async start(controller) { + const encoder = new TextEncoder(); + let fullResponse = ""; + let heartbeatInterval: NodeJS.Timeout | null = null; + + const sendSSE = (type: string, data: Record) => { + try { + const message = JSON.stringify({ type, ...data }); + controller.enqueue(encoder.encode(`data: ${message}\n\n`)); + } catch (err) { + console.error("[Agent Chat] Failed to send SSE:", err); + throw err; + } + }; + + try { + sendSSE("status", { + status: "connecting", + message: "Establishing connection...", + }); + + heartbeatInterval = setInterval(() => { + try { + sendSSE("heartbeat", { timestamp: Date.now() }); + } catch { + if (heartbeatInterval) clearInterval(heartbeatInterval); + } + }, 15000); + + sendSSE("status", { + status: "initializing_agent", + message: "Preparing AI agent...", + }); + + const agentSession = await agentService.getOrCreateSession( + sessionId, + agentConfig + ); + + console.log( + `[Agent Chat] Using local agent session for database sessionId: ${sessionId}, SDK sessionId: ${agentSession.sdkSessionId || "new"}` + ); + + sendSSE("status", { + status: "ready", + message: "Agent ready, processing your request...", + }); + + // Slide detection buffer + let slideBuffer = ""; + const detectedSlides = new Map< + number, + { index: number; html: string } + >(); + + const listener = (chunk: { + type: string; + message?: { content: string | Array<{ type: string; text?: string; name?: string; input?: unknown }> }; + subtype?: string; + error?: string; + }) => { + try { + if (chunk.type === "assistant") { + const content = chunk.message?.content; + + if (typeof content === "string") { + fullResponse += content; + slideBuffer += content; + + // Slide detection + const slideRegex = + /๐ŸŽฏSLIDE_START:(\d+)๐ŸŽฏ([\s\S]*?)๐ŸŽฏSLIDE_END:\1๐ŸŽฏ/g; + const matches: Array<{ + slideIndex: number; + slideContent: string; + matchEnd: number; + }> = []; + let match; + + while ((match = slideRegex.exec(slideBuffer)) !== null) { + matches.push({ + slideIndex: parseInt(match[1]!, 10), + slideContent: match[2]!, + matchEnd: match.index + match[0].length, }); - controller.enqueue(encoder.encode(`data: ${data}\n\n`)); - } else if (Array.isArray(content)) { - // ๅค„็†็ป“ๆž„ๅŒ–ๅ†…ๅฎน๏ผˆtext blocks, tool_use๏ผ‰ - for (const block of content) { - if (block.type === "text") { - fullResponse += block.text; - slideBuffer += block.text; // ็ดฏ็งฏๅˆฐๅนป็ฏ็‰‡buffer - - // ๐ŸŽฏ ๅŒๆ ทๆฃ€ๆต‹ๅนป็ฏ็‰‡๏ผˆไฝฟ็”จ็›ธๅŒ็š„้€ป่พ‘๏ผ‰ - const slideRegex = /๐ŸŽฏSLIDE_START:(\d+)๐ŸŽฏ([\s\S]*?)๐ŸŽฏSLIDE_END:\1๐ŸŽฏ/g; - const matches: Array<{ - slideIndex: number; - slideContent: string; - fullMatch: string; - matchEnd: number; - }> = []; - let match; - - // โœ… ๅ…ˆๆ”ถ้›†ๆ‰€ๆœ‰ๅŒน้… - while ((match = slideRegex.exec(slideBuffer)) !== null) { - matches.push({ - slideIndex: parseInt(match[1]!), - slideContent: match[2]!, - fullMatch: match[0], - matchEnd: match.index + match[0].length, - }); - } + } - // โœ… ๅค„็†ๆ‰€ๆœ‰ๅŒน้…็š„ๅนป็ฏ็‰‡ - for (const matchData of matches) { - const htmlMatch = matchData.slideContent.match(/```html-slide\s*([\s\S]*?)\s*```/); - if (htmlMatch && htmlMatch[1]) { - const slideHTML = htmlMatch[1].trim(); - - // โœ… ๆ”ถ้›†ๅˆฐ detectedSlides๏ผˆ็”จไบŽๆ•ฐๆฎๅบ“ๅŒๆญฅ๏ผ‰ - detectedSlides.set(matchData.slideIndex, { - index: matchData.slideIndex, - html: slideHTML, - }); - - sendSSE('slide_complete', { - slideIndex: matchData.slideIndex, - html: slideHTML, - timestamp: Date.now(), - }); - - console.log(`[Agent Chat] Slide ${matchData.slideIndex} streamed successfully`); - } - } + for (const matchData of matches) { + const htmlMatch = matchData.slideContent.match( + /```html-slide\s*([\s\S]*?)\s*```/ + ); + if (htmlMatch?.[1]) { + const slideHTML = htmlMatch[1].trim(); - // โœ… ๆธ…้™คๆ‰€ๆœ‰ๅทฒๅค„็†็š„ๅนป็ฏ็‰‡ - if (matches.length > 0) { - const lastMatchEnd = matches[matches.length - 1]!.matchEnd; - slideBuffer = slideBuffer.substring(lastMatchEnd); - console.log(`[Agent Chat] Processed ${matches.length} slides from block, buffer remaining: ${slideBuffer.length} chars`); - } + detectedSlides.set(matchData.slideIndex, { + index: matchData.slideIndex, + html: slideHTML, + }); - const data = JSON.stringify({ - type: "assistant_message", - content: block.text, - }); - controller.enqueue(encoder.encode(`data: ${data}\n\n`)); - } else if (block.type === "tool_use") { - // ้€š็Ÿฅๅ‰็ซฏๆญฃๅœจไฝฟ็”จๅทฅๅ…ท - const data = JSON.stringify({ - type: "tool_use", - toolName: block.name, - toolInput: block.input, + sendSSE("slide_complete", { + slideIndex: matchData.slideIndex, + html: slideHTML, + timestamp: Date.now(), + }); + + console.log( + `[Agent Chat] Slide ${matchData.slideIndex} streamed successfully` + ); + } + } + + if (matches.length > 0) { + const lastMatchEnd = matches[matches.length - 1]!.matchEnd; + slideBuffer = slideBuffer.substring(lastMatchEnd); + } + + sendSSE("assistant_message", { content }); + } else if (Array.isArray(content)) { + for (const block of content) { + if (block.type === "text" && block.text) { + fullResponse += block.text; + slideBuffer += block.text; + + // Same slide detection logic + const slideRegex = + /๐ŸŽฏSLIDE_START:(\d+)๐ŸŽฏ([\s\S]*?)๐ŸŽฏSLIDE_END:\1๐ŸŽฏ/g; + const matches: Array<{ + slideIndex: number; + slideContent: string; + matchEnd: number; + }> = []; + let match; + + while ((match = slideRegex.exec(slideBuffer)) !== null) { + matches.push({ + slideIndex: parseInt(match[1]!, 10), + slideContent: match[2]!, + matchEnd: match.index + match[0].length, }); - controller.enqueue(encoder.encode(`data: ${data}\n\n`)); } + + for (const matchData of matches) { + const htmlMatch = matchData.slideContent.match( + /```html-slide\s*([\s\S]*?)\s*```/ + ); + if (htmlMatch?.[1]) { + const slideHTML = htmlMatch[1].trim(); + + detectedSlides.set(matchData.slideIndex, { + index: matchData.slideIndex, + html: slideHTML, + }); + + sendSSE("slide_complete", { + slideIndex: matchData.slideIndex, + html: slideHTML, + timestamp: Date.now(), + }); + } + } + + if (matches.length > 0) { + const lastMatchEnd = matches[matches.length - 1]!.matchEnd; + slideBuffer = slideBuffer.substring(lastMatchEnd); + } + + sendSSE("assistant_message", { content: block.text }); + } else if (block.type === "tool_use") { + sendSSE("tool_use", { + toolName: block.name, + toolInput: block.input, + }); } } - } else if (chunk.type === "result") { - // Query ๅฎŒๆˆ - const data = JSON.stringify({ - type: "result", - success: chunk.subtype === "success", - }); - controller.enqueue(encoder.encode(`data: ${data}\n\n`)); - responseComplete = true; - - // โœ… ๆญฅ้ชค1๏ผš็ซ‹ๅณๅ‘้€ [DONE] ๅนถๅ…ณ้—ญๆต๏ผˆไธ็ญ‰ๅพ…ๆ•ฐๆฎๅบ“๏ผ‰ - controller.enqueue(encoder.encode("data: [DONE]\n\n")); - agentSession.removeListener(listener); - if (heartbeatInterval) clearInterval(heartbeatInterval); - controller.close(); - - console.log(`[Agent Chat] SSE stream closed for session ${sessionId}`); - - // โœ… ๆญฅ้ชค2๏ผšๆ•ฐๆฎๅบ“ๅŒๆญฅไฝœไธบๅŽๅฐไปปๅŠก๏ผˆไธ้˜ปๅกžๆต๏ผ‰ - const updatedMessages: Message[] = [ - ...sessionMessages, - { role: "user" as const, content: message, timestamp: new Date() }, - { - role: "assistant" as const, - content: fullResponse, - timestamp: new Date(), - }, - ]; - - // ๅผ‚ๆญฅไฟๅญ˜ๅˆฐๆ•ฐๆฎๅบ“๏ผˆๅธฆ่ถ…ๆ—ถๅ’Œ้”™่ฏฏๅค„็†๏ผ‰ - Promise.race([ - // ไธปไปปๅŠก๏ผšไฟๅญ˜ๆถˆๆฏๅ’ŒๅŒๆญฅๅนป็ฏ็‰‡ - (async () => { - try { - // ไฟๅญ˜ๆถˆๆฏ - const updatedSession = await sessionManager.updateMessages( - sessionId, - session.user.id, - updatedMessages - ); + } + } else if (chunk.type === "result") { + sendSSE("result", { + success: chunk.subtype === "success", + }); + + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + agentSession.removeListener(listener); + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + + console.log( + `[Agent Chat] SSE stream closed for session ${sessionId}` + ); + + // Background save + const updatedMessages: Message[] = [ + ...sessionMessages, + { role: "user" as const, content: message, timestamp: new Date() }, + { + role: "assistant" as const, + content: fullResponse, + timestamp: new Date(), + }, + ]; + + Promise.race([ + (async () => { + try { + const updatedSession = await sessionManager.updateMessages( + sessionId, + userId, + updatedMessages + ); + + if (detectedSlides.size > 0 && updatedSession) { + const existingSlides = + (updatedSession.slides as unknown[]) || []; + const existingSlidesMap = new Map(); + + if (Array.isArray(existingSlides)) { + existingSlides.forEach((slide: unknown) => { + const s = slide as { index?: number }; + if (s && typeof s.index === "number") { + existingSlidesMap.set(s.index, slide); + } + }); + } - // โœ… ไฝฟ็”จๆฃ€ๆต‹ๅˆฐ็š„ๅนป็ฏ็‰‡ๆ•ฐๆฎๅนถๅˆๅนถๅˆฐ็Žฐๆœ‰ๅนป็ฏ็‰‡ - if (detectedSlides.size > 0 && updatedSession) { - // โœ… ๆญฅ้ชค1๏ผš่Žทๅ–ๆ•ฐๆฎๅบ“ไธญ็Žฐๆœ‰็š„ๅนป็ฏ็‰‡ - const existingSlides = (updatedSession.slides as any) || []; - const existingSlidesMap = new Map(); - - // ๅฐ†็Žฐๆœ‰ๅนป็ฏ็‰‡ๆ”พๅ…ฅ Map - if (Array.isArray(existingSlides)) { - existingSlides.forEach((slide: any) => { - if (slide && typeof slide.index === 'number') { - existingSlidesMap.set(slide.index, slide); + detectedSlides.forEach((data, index) => { + const existingSlide = existingSlidesMap.get(index) as + | { + outlineContent?: string; + modificationCount?: number; + conversationHistory?: unknown[]; } - }); - } - - // โœ… ๆญฅ้ชค2๏ผšๅˆๅนถๆ–ฐๆฃ€ๆต‹ๅˆฐ็š„ๅนป็ฏ็‰‡๏ผˆ่ฆ†็›–ๅŒ index ็š„๏ผ‰ - detectedSlides.forEach((data, index) => { - const existingSlide = existingSlidesMap.get(index); - existingSlidesMap.set(index, { - id: `slide-${index}`, - index, - html: data.html, - status: "ready" as const, - outlineContent: existingSlide?.outlineContent || `Slide ${index + 1}`, - modificationCount: (existingSlide?.modificationCount || 0) + (existingSlide ? 1 : 0), - conversationHistory: existingSlide?.conversationHistory || [], - }); + | undefined; + existingSlidesMap.set(index, { + id: `slide-${index}`, + index, + html: data.html, + status: "ready" as const, + outlineContent: + existingSlide?.outlineContent || `Slide ${index + 1}`, + modificationCount: + (existingSlide?.modificationCount || 0) + + (existingSlide ? 1 : 0), + conversationHistory: + existingSlide?.conversationHistory || [], }); + }); - // โœ… ๆญฅ้ชค3๏ผš่ฝฌๆขไธบๆ•ฐ็ป„ๅนถๆŽ’ๅบ - const mergedSlidesArray = Array.from(existingSlidesMap.values()) - .sort((a, b) => a.index - b.index); + const mergedSlidesArray = Array.from( + existingSlidesMap.values() + ).sort((a, b) => { + const aIdx = (a as { index: number }).index; + const bIdx = (b as { index: number }).index; + return aIdx - bIdx; + }); - const existingWorkflowState = updatedSession.workflowState as any; + const existingWorkflowState = + updatedSession.workflowState as Record; + const updatedWorkflowState = { + ...existingWorkflowState, + slides: mergedSlidesArray, + currentSlideIndex: mergedSlidesArray.length - 1, + totalSlides: mergedSlidesArray.length, + lastModifiedAt: new Date(), + }; + + await sessionManager.updateSession(sessionId, userId, { + slides: mergedSlidesArray as unknown, + workflowState: updatedWorkflowState as unknown, + }); + + console.log( + `[Agent Chat] Synced ${mergedSlidesArray.length} slides to database` + ); + } else if (updatedSession) { + const extractedSlides = + extractSlidesFromMessages(updatedMessages); + if (extractedSlides.length > 0) { + const existingWorkflowState = + updatedSession.workflowState as Record; const updatedWorkflowState = { ...existingWorkflowState, - slides: mergedSlidesArray, - currentSlideIndex: mergedSlidesArray.length - 1, - totalSlides: mergedSlidesArray.length, + slides: extractedSlides, + currentSlideIndex: extractedSlides.length - 1, + totalSlides: extractedSlides.length, lastModifiedAt: new Date(), }; - await sessionManager.updateSession(sessionId, session.user.id, { - slides: mergedSlidesArray as any, - workflowState: updatedWorkflowState as any, + await sessionManager.updateSession(sessionId, userId, { + slides: extractedSlides as unknown, + workflowState: updatedWorkflowState as unknown, }); - - console.log( - `[Agent Chat] Synced ${mergedSlidesArray.length} slides to database (${detectedSlides.size} new/updated) - using merge strategy` - ); - } else if (updatedSession) { - // ๅ›ž้€€๏ผšไปŽๆถˆๆฏๆๅ–๏ผˆๅ…ผๅฎนๆ—ง้€ป่พ‘๏ผ‰ - const extractedSlides = extractSlidesFromMessages(updatedMessages); - if (extractedSlides.length > 0) { - const existingWorkflowState = updatedSession.workflowState as any; - const updatedWorkflowState = { - ...existingWorkflowState, - slides: extractedSlides, - currentSlideIndex: extractedSlides.length - 1, - totalSlides: extractedSlides.length, - lastModifiedAt: new Date(), - }; - - await sessionManager.updateSession(sessionId, session.user.id, { - slides: extractedSlides as any, - workflowState: updatedWorkflowState as any, - }); - - console.log( - `[Agent Chat] Synced ${extractedSlides.length} slides to database (background) - using message extraction` - ); - } } - } catch (syncError) { - console.error(`[Agent Chat] Background sync failed:`, syncError); - // ไธๅฝฑๅ“็”จๆˆทไฝ“้ชŒ๏ผŒ้™้ป˜ๅคฑ่ดฅ } - })(), - - // ่ถ…ๆ—ถไฟๆŠค๏ผš10็ง’ๅŽๆ”พๅผƒ - new Promise((_, reject) => - setTimeout(() => reject(new Error("Database sync timeout")), 10000) - ), - ]).catch((timeoutError) => { - console.error(`[Agent Chat] Database sync timeout or failed:`, timeoutError); - // ้™้ป˜ๅคฑ่ดฅ๏ผŒไธๅฝฑๅ“็”จๆˆท - }); - } else if (chunk.type === "error") { - const errorData = JSON.stringify({ - type: "error", - content: chunk.error || "Unknown error", - }); - controller.enqueue(encoder.encode(`data: ${errorData}\n\n`)); - agentSession.removeListener(listener); - - // โœ… ๆธ…็†่ต„ๆบ - if (heartbeatInterval) clearInterval(heartbeatInterval); - console.log(`[Agent Chat] Removed listener from session ${sessionId} (agent error)`); - - controller.close(); - } - } catch (error) { - console.error("Listener error:", error); + } catch (syncError) { + console.error( + `[Agent Chat] Background sync failed:`, + syncError + ); + } + })(), + new Promise((_, reject) => + setTimeout( + () => reject(new Error("Database sync timeout")), + 10000 + ) + ), + ]).catch((timeoutError) => { + console.error( + `[Agent Chat] Database sync timeout:`, + timeoutError + ); + }); + } else if (chunk.type === "error") { + sendSSE("error", { + content: chunk.error || "Unknown error", + }); + agentSession.removeListener(listener); + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); } - }; + } catch (error) { + console.error("Listener error:", error); + } + }; - // โœ… ๆญฅ้ชค 6: ๆทปๅŠ ็›‘ๅฌๅ™จๅนถๅ‘้€ๆถˆๆฏ - agentSession.addListener(listener); - agentSession.sendMessage(fullMessage); + agentSession.addListener(listener); + agentSession.sendMessage(fullMessage); + } catch (error) { + console.error("[Agent Chat] Error in SSE setup:", error); + sendSSE("error", { + content: + error instanceof Error ? error.message : "Failed to initialize agent", + }); + + if (heartbeatInterval) clearInterval(heartbeatInterval); + controller.close(); + } + }, + }); + + return new Response(stream, { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache, no-transform", + Connection: "keep-alive", + "X-Accel-Buffering": "no", + }, + }); +} - // Note: ไธๅœจ่ฟ™้‡Œๆธ…็†่ต„ๆบ๏ผ - // ่ต„ๆบๆธ…็†ๅœจ็›‘ๅฌๅ™จ็š„ "result" ไบ‹ไปถๅค„็†ไธญๅฎŒๆˆ - // ๆˆ–ๅœจ "error" ไบ‹ไปถไธญๅฎŒๆˆ +export async function POST(req: Request) { + try { + // 1. Authenticate user + const session = await auth(); + if (!session?.user?.id) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } - } catch (error) { - // ๆ•่Žท SSE ่ฎพ็ฝฎๆˆ– Agent ่Žทๅ–่ฟ‡็จ‹ไธญ็š„้”™่ฏฏ - console.error("[Agent Chat] Error in SSE setup:", error); - sendSSE('error', { - content: error instanceof Error ? error.message : "Failed to initialize agent", - }); + // 2. Parse request + const chatRequest = (await req.json()) as ChatRequest; + const { message, sessionId } = chatRequest; - // ไป…ๅœจๅˆๅง‹ๅŒ–ๅคฑ่ดฅๆ—ถๆธ…็† - if (heartbeatInterval) clearInterval(heartbeatInterval); - console.log(`[Agent Chat] Initialization failed for session ${sessionId}`); + if (!message || !sessionId) { + return NextResponse.json( + { error: "Missing required fields: message, sessionId" }, + { status: 400 } + ); + } - controller.close(); - } - }, - }); - - // 7. ่ฟ”ๅ›ž SSE ๆต๏ผˆไผ˜ๅŒ–็š„ headers๏ผ‰ - return new Response(stream, { - headers: { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache, no-transform", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", // ็ฆ็”จ nginx/ไปฃ็†็ผ“ๅ†ฒ - }, - }); + // 3. Route to appropriate handler + if (isAgentCoreEnabled()) { + console.log("[Agent Chat] Using AgentCore Runtime backend"); + + // Get Cognito access token for AgentCore JWT auth + // Note: Use accessToken (not idToken) because AgentCore validates client_id claim + // which is present in access token but not in id token + const accessToken = session.accessToken; + if (!accessToken) { + console.warn("[Agent Chat] No access token available, user may need to re-authenticate"); + } + + return handleAgentCoreChat(req, session.user.id, chatRequest, accessToken); + } else { + console.log("[Agent Chat] Using local Agent SDK (legacy mode)"); + return handleLocalAgentChat(req, session.user.id, chatRequest); + } } catch (error) { console.error("Chat API error:", error); return NextResponse.json( @@ -466,7 +907,7 @@ export async function POST(req: Request) { error: "Failed to process chat", details: error instanceof Error ? error.message : "Unknown error", }, - { status: 500 }, + { status: 500 } ); } } diff --git a/frontend/src/env.js b/frontend/src/env.js index 33dda52..f7c73d6 100644 --- a/frontend/src/env.js +++ b/frontend/src/env.js @@ -36,6 +36,9 @@ export const env = createEnv({ COGNITO_CLIENT_SECRET: z.string(), COGNITO_ISSUER: z.string(), + // AgentCore Runtime + AGENTCORE_RUNTIME_URL: z.string().optional(), + // Optional Services TAVILY_API_KEY: z.string().optional(), UNSPLASH_ACCESS_KEY: z.string().optional(), @@ -70,6 +73,7 @@ export const env = createEnv({ COGNITO_CLIENT_ID: process.env.COGNITO_CLIENT_ID, COGNITO_CLIENT_SECRET: process.env.COGNITO_CLIENT_SECRET, COGNITO_ISSUER: process.env.COGNITO_ISSUER, + AGENTCORE_RUNTIME_URL: process.env.AGENTCORE_RUNTIME_URL, TAVILY_API_KEY: process.env.TAVILY_API_KEY, UNSPLASH_ACCESS_KEY: process.env.UNSPLASH_ACCESS_KEY, NEXTAUTH_SECRET: process.env.NEXTAUTH_SECRET, diff --git a/frontend/src/lib/agent/agentcore-client.ts b/frontend/src/lib/agent/agentcore-client.ts new file mode 100644 index 0000000..ac5ba4c --- /dev/null +++ b/frontend/src/lib/agent/agentcore-client.ts @@ -0,0 +1,478 @@ +/** + * AgentCore Client - Communicates with AgentCore Runtime backend + * + * This client handles authentication and communication with the AgentCore + * Runtime service using Cognito JWT tokens. + * + * Key insight: All requests go through the /invocations endpoint with + * path routing wrapped in the payload. The backend's /invocations handler + * routes to the appropriate internal endpoint. + * + * Features: + * - Cognito JWT token authentication + * - Unified /invocations endpoint pattern + * - Session management (create/resume) + * - Streaming message responses (SSE) + */ + +import { env } from "@/env"; + +/** + * Event types emitted by AgentCore streaming responses + */ +export interface AgentCoreEvent { + type: + | "start" + | "status" + | "text" + | "tool_use" + | "slide_complete" + | "permission" + | "result" + | "done" + | "error"; + content?: string; + status?: string; + message?: string; + tool_name?: string; + tool_input?: Record; + slide_index?: number; + html?: string; + timestamp?: number; + cost_usd?: number; + num_turns?: number; + slides_detected?: number; + error?: string; + request_id?: string; + allowed?: boolean; +} + +/** + * Session information returned by AgentCore + */ +export interface AgentCoreSession { + session_id: string; + created_at: string; + status: string; +} + +/** + * Request to create a session + */ +export interface CreateSessionRequest { + user_id?: string; + resume_session_id?: string; + model?: string; + cwd?: string; +} + +/** + * Request to send a message + */ +export interface SendMessageRequest { + message: string | Record; + model?: string; + enable_web_search?: boolean; +} + +/** + * Invocation request format for AgentCore + */ +interface InvocationRequest { + path: string; + method: "GET" | "POST" | "DELETE" | "PUT"; + payload?: Record; + path_params?: Record; + query_params?: Record; +} + +/** + * Function type for getting JWT tokens + */ +export type GetTokenFn = () => Promise; + +/** + * AgentCore Client for communicating with the AgentCore Runtime backend + * + * Uses Cognito JWT tokens for authentication. All requests are routed + * through the /invocations endpoint with path routing info wrapped in + * the payload. + */ +export class AgentCoreClient { + private baseUrl: string; + private getToken: GetTokenFn; + private currentSessionId: string | null = null; + + /** + * Create a new AgentCore client + * + * @param baseUrl - Base URL of the AgentCore Runtime (includes /runtimes/{arn}) + * @param getToken - Function to get the Cognito JWT token + */ + constructor(baseUrl: string, getToken: GetTokenFn) { + // Remove trailing slash if present + this.baseUrl = baseUrl.replace(/\/$/, ""); + this.getToken = getToken; + } + + /** + * Make an authenticated invocation request to AgentCore + * + * All requests go through the /invocations endpoint with path routing + * wrapped in the payload. + */ + private async invoke( + request: InvocationRequest, + sessionId?: string + ): Promise { + const token = await this.getToken(); + + if (!token) { + throw new Error("No authentication token available"); + } + + const url = `${this.baseUrl}/invocations`; + + const headers: Record = { + "Content-Type": "application/json", + Accept: "application/json", + Authorization: `Bearer ${token}`, + }; + + // Add session ID header if available + const effectiveSessionId = sessionId || this.currentSessionId; + if (effectiveSessionId) { + headers["X-Amzn-Bedrock-AgentCore-Runtime-Session-Id"] = effectiveSessionId; + } + + console.log("[AgentCoreClient] Invoking:", { + url, + request, + sessionId: effectiveSessionId, + }); + + return fetch(url, { + method: "POST", + headers, + body: JSON.stringify(request), + }); + } + + /** + * Create a new session or resume an existing one + * + * @param request - Session creation parameters + * @returns Session information + */ + async createSession( + request: CreateSessionRequest = {} + ): Promise { + const invocationRequest: InvocationRequest = { + path: "/sessions", + method: "POST", + payload: request, + }; + + const response = await this.invoke(invocationRequest); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to create session: ${response.status} ${response.statusText} - ${errorText}` + ); + } + + const session = (await response.json()) as AgentCoreSession; + + // Store session ID for subsequent requests + this.currentSessionId = session.session_id; + + return session; + } + + /** + * Send a message to a session (non-streaming) + * + * @param sessionId - The session ID + * @param request - Message request + * @returns Response from the agent + */ + async sendMessage( + sessionId: string, + request: SendMessageRequest + ): Promise<{ + messages: Array<{ + type: string; + content?: string; + tool_name?: string; + tool_input?: Record; + }>; + session_id: string; + cost_usd?: number; + num_turns?: number; + }> { + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}/messages`, + method: "POST", + payload: request as Record, + path_params: { session_id: sessionId }, + }; + + const response = await this.invoke(invocationRequest, sessionId); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to send message: ${response.status} ${response.statusText} - ${errorText}` + ); + } + + return response.json(); + } + + /** + * Send a message to a session with streaming response (SSE) + * + * This method returns an async iterable that yields events as they arrive + * from the AgentCore backend. + * + * @param sessionId - The session ID + * @param request - Message request + * @returns Async iterable of events + */ + async *sendMessageStream( + sessionId: string, + request: SendMessageRequest + ): AsyncIterable { + const token = await this.getToken(); + + if (!token) { + throw new Error("No authentication token available"); + } + + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}/messages/stream`, + method: "POST", + payload: request as Record, + path_params: { session_id: sessionId }, + }; + + const url = `${this.baseUrl}/invocations`; + + const headers: Record = { + "Content-Type": "application/json", + Accept: "text/event-stream", + Authorization: `Bearer ${token}`, + "X-Amzn-Bedrock-AgentCore-Runtime-Session-Id": sessionId, + }; + + console.log("[AgentCoreClient] Streaming invocation:", { + url, + request: invocationRequest, + sessionId, + }); + + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(invocationRequest), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to stream message: ${response.status} ${response.statusText} - ${errorText}` + ); + } + + if (!response.body) { + throw new Error("Response body is null"); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + try { + while (true) { + const { value, done } = await reader.read(); + + if (done) { + // Process any remaining data in buffer + if (buffer.trim()) { + const lines = buffer.split("\n"); + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6).trim(); + if (data && data !== "[DONE]") { + try { + yield JSON.parse(data) as AgentCoreEvent; + } catch (e) { + console.error("[AgentCoreClient] Parse error:", e, "Data:", data); + } + } + } + } + } + break; + } + + // Decode chunk and add to buffer + buffer += decoder.decode(value, { stream: true }); + + // Process complete lines + const lines = buffer.split("\n"); + // Keep the last potentially incomplete line in buffer + buffer = lines.pop() || ""; + + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6).trim(); + if (data && data !== "[DONE]") { + try { + yield JSON.parse(data) as AgentCoreEvent; + } catch (e) { + console.error("[AgentCoreClient] Parse error:", e, "Data:", data); + } + } + } + } + } + } finally { + reader.releaseLock(); + } + } + + /** + * Get the status of a session + * + * @param sessionId - The session ID + * @returns Session status including pending permissions + */ + async getSessionStatus(sessionId: string): Promise<{ + session_id: string; + status: string; + pending_permission?: { + request_id: string; + tool_name: string; + tool_input: Record; + suggestions: Array>; + }; + current_model?: string; + }> { + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}/status`, + method: "GET", + path_params: { session_id: sessionId }, + }; + + const response = await this.invoke(invocationRequest, sessionId); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to get session status: ${response.status} ${response.statusText} - ${errorText}` + ); + } + + return response.json(); + } + + /** + * Close a session + * + * @param sessionId - The session ID to close + */ + async closeSession(sessionId: string): Promise { + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}`, + method: "DELETE", + path_params: { session_id: sessionId }, + }; + + const response = await this.invoke(invocationRequest, sessionId); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to close session: ${response.status} ${response.statusText} - ${errorText}` + ); + } + } + + /** + * Interrupt the current operation in a session + * + * @param sessionId - The session ID + */ + async interruptSession(sessionId: string): Promise { + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}/interrupt`, + method: "POST", + path_params: { session_id: sessionId }, + }; + + const response = await this.invoke(invocationRequest, sessionId); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to interrupt session: ${response.status} ${response.statusText} - ${errorText}` + ); + } + } + + /** + * Respond to a permission request + * + * @param sessionId - The session ID + * @param requestId - The permission request ID + * @param allowed - Whether to allow the operation + * @param applySuggestions - Whether to apply suggested changes + */ + async respondToPermission( + sessionId: string, + requestId: string, + allowed: boolean, + applySuggestions: boolean = false + ): Promise { + const invocationRequest: InvocationRequest = { + path: `/sessions/${sessionId}/permissions/respond`, + method: "POST", + payload: { + request_id: requestId, + allowed, + apply_suggestions: applySuggestions, + }, + path_params: { session_id: sessionId }, + }; + + const response = await this.invoke(invocationRequest, sessionId); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Failed to respond to permission: ${response.status} ${response.statusText} - ${errorText}` + ); + } + } +} + +/** + * Create an AgentCore client instance with the configured runtime URL + * + * @param getToken - Function to get the Cognito JWT token + * @returns AgentCore client or null if runtime URL is not configured + */ +export function createAgentCoreClient(getToken: GetTokenFn): AgentCoreClient | null { + const runtimeUrl = env.AGENTCORE_RUNTIME_URL; + + if (!runtimeUrl) { + console.warn( + "[AgentCoreClient] AGENTCORE_RUNTIME_URL not configured, AgentCore features disabled" + ); + return null; + } + + return new AgentCoreClient(runtimeUrl, getToken); +} diff --git a/frontend/src/server/auth.ts b/frontend/src/server/auth.ts index 81d0751..0ac0255 100644 --- a/frontend/src/server/auth.ts +++ b/frontend/src/server/auth.ts @@ -18,6 +18,9 @@ declare module "next-auth" { role: string; isAdmin: boolean; } & DefaultSession["user"]; + accessToken?: string; + idToken?: string; + error?: string; } interface User { @@ -26,6 +29,16 @@ declare module "next-auth" { } } +declare module "next-auth/jwt" { + interface JWT { + accessToken?: string; + idToken?: string; + refreshToken?: string; + expiresAt?: number; + error?: string; + } +} + export const { auth, handlers, signIn, signOut } = NextAuth({ trustHost: true, debug: process.env.NODE_ENV === "development", @@ -47,8 +60,14 @@ export const { auth, handlers, signIn, signOut } = NextAuth({ token.isAdmin = user.role === "ADMIN"; } - // OAuth login: get latest user info from S3 + // OAuth login: store tokens and get latest user info from S3 if (account && token.id) { + // Store Cognito tokens for AgentCore + token.accessToken = account.access_token; + token.idToken = account.id_token; + token.refreshToken = account.refresh_token; + token.expiresAt = account.expires_at; + const profile = await getUserProfile(token.id as string); if (profile) { token.hasAccess = profile.hasAccess; @@ -77,6 +96,13 @@ export const { auth, handlers, signIn, signOut } = NextAuth({ } } + // Check if token is expired + if (token.expiresAt && Date.now() >= (token.expiresAt as number) * 1000) { + // Token expired - mark for refresh + // Note: Cognito tokens can be refreshed using refresh_token + token.error = "TokenExpired"; + } + return token; }, @@ -86,6 +112,13 @@ export const { auth, handlers, signIn, signOut } = NextAuth({ session.user.location = token.location as string; session.user.role = (token.role as string) ?? "USER"; session.user.isAdmin = token.role === "ADMIN"; + + // Expose tokens for AgentCore API calls + // Use idToken for AgentCore JWT auth (contains user claims) + session.accessToken = token.accessToken as string | undefined; + session.idToken = token.idToken as string | undefined; + session.error = token.error as string | undefined; + return session; }, diff --git a/infrastructure/lib/compute/agentcore-construct.ts b/infrastructure/lib/compute/agentcore-construct.ts new file mode 100644 index 0000000..733107e --- /dev/null +++ b/infrastructure/lib/compute/agentcore-construct.ts @@ -0,0 +1,593 @@ +/** + * AgentCore Construct + * Deploys a container to AWS Bedrock AgentCore Runtime + * + * This construct creates: + * - ECR repository for the agent container + * - IAM role with AgentCore trust policy + * - Custom Resource to manage AgentCore Runtime via bedrock-agentcore-control APIs + */ + +import * as cdk from 'aws-cdk-lib'; +import * as ecr from 'aws-cdk-lib/aws-ecr'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as cr from 'aws-cdk-lib/custom-resources'; +import * as logs from 'aws-cdk-lib/aws-logs'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Construct } from 'constructs'; + +export interface AgentCoreConstructProps { + /** + * Stack name for resource naming + */ + stackName: string; + + /** + * Name for the AgentCore runtime + */ + runtimeName: string; + + /** + * S3 bucket for workspace storage + */ + workspaceBucket: s3.IBucket; + + /** + * Cognito User Pool ID for JWT authentication + */ + cognitoUserPoolId: string; + + /** + * Cognito Client ID for JWT authentication + */ + cognitoClientId: string; + + /** + * Docker image URI (if not using ECR from this construct) + * If not provided, an ECR repository will be created + */ + dockerImageUri?: string; + + /** + * Environment variables to pass to the AgentCore runtime + */ + environmentVariables?: Record; + + /** + * Network mode: PUBLIC or PRIVATE + * @default PUBLIC + */ + networkMode?: 'PUBLIC' | 'PRIVATE'; +} + +export class AgentCoreConstruct extends Construct { + public readonly ecrRepository: ecr.Repository; + public readonly runtimeRole: iam.Role; + public readonly runtimeArn: string; + public readonly runtimeUrl: string; + + constructor(scope: Construct, id: string, props: AgentCoreConstructProps) { + super(scope, id); + + const region = cdk.Stack.of(this).region; + const accountId = cdk.Stack.of(this).account; + + // ========================================================================= + // 1. ECR Repository for Agent Container + // ========================================================================= + + this.ecrRepository = new ecr.Repository(this, 'AgentCoreRepo', { + repositoryName: `${props.stackName}-agentcore`, + removalPolicy: cdk.RemovalPolicy.DESTROY, + emptyOnDelete: true, + imageScanOnPush: true, + lifecycleRules: [ + { + description: 'Keep last 10 images', + maxImageCount: 10, + rulePriority: 1, + }, + ], + }); + + // ========================================================================= + // 2. IAM Execution Role for AgentCore Runtime + // ========================================================================= + + this.runtimeRole = new iam.Role(this, 'RuntimeRole', { + roleName: `${props.stackName}-agentcore-runtime-role`, + description: 'Execution role for Bedrock AgentCore Runtime', + + // Trust policy for AgentCore service + assumedBy: new iam.ServicePrincipal('bedrock-agentcore.amazonaws.com', { + conditions: { + StringEquals: { + 'aws:SourceAccount': accountId, + }, + ArnLike: { + 'aws:SourceArn': `arn:aws:bedrock-agentcore:${region}:${accountId}:*`, + }, + }, + }), + }); + + // --- ECR Image Access --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'ECRImageAccess', + effect: iam.Effect.ALLOW, + actions: ['ecr:BatchGetImage', 'ecr:GetDownloadUrlForLayer'], + resources: [ + this.ecrRepository.repositoryArn, + // Also allow any repository in the account (for flexibility) + `arn:aws:ecr:${region}:${accountId}:repository/*`, + ], + }) + ); + + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'ECRTokenAccess', + effect: iam.Effect.ALLOW, + actions: ['ecr:GetAuthorizationToken'], + resources: ['*'], + }) + ); + + // --- CloudWatch Logs --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'CloudWatchLogsDescribe', + effect: iam.Effect.ALLOW, + actions: ['logs:DescribeLogStreams', 'logs:CreateLogGroup'], + resources: [ + `arn:aws:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/runtimes/*`, + ], + }) + ); + + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'CloudWatchLogGroupsDescribe', + effect: iam.Effect.ALLOW, + actions: ['logs:DescribeLogGroups'], + resources: [`arn:aws:logs:${region}:${accountId}:log-group:*`], + }) + ); + + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'CloudWatchLogStream', + effect: iam.Effect.ALLOW, + actions: ['logs:CreateLogStream', 'logs:PutLogEvents'], + resources: [ + `arn:aws:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/runtimes/*:log-stream:*`, + ], + }) + ); + + // --- X-Ray Tracing --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'XRayTracing', + effect: iam.Effect.ALLOW, + actions: [ + 'xray:PutTraceSegments', + 'xray:PutTelemetryRecords', + 'xray:GetSamplingRules', + 'xray:GetSamplingTargets', + ], + resources: ['*'], + }) + ); + + // --- CloudWatch Metrics --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'CloudWatchMetrics', + effect: iam.Effect.ALLOW, + actions: ['cloudwatch:PutMetricData'], + resources: ['*'], + conditions: { + StringEquals: { + 'cloudwatch:namespace': 'bedrock-agentcore', + }, + }, + }) + ); + + // --- AgentCore Workload Identity --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'GetAgentAccessToken', + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock-agentcore:GetWorkloadAccessToken', + 'bedrock-agentcore:GetWorkloadAccessTokenForJWT', + 'bedrock-agentcore:GetWorkloadAccessTokenForUserId', + ], + resources: [ + `arn:aws:bedrock-agentcore:${region}:${accountId}:workload-identity-directory/default`, + `arn:aws:bedrock-agentcore:${region}:${accountId}:workload-identity-directory/default/workload-identity/*`, + ], + }) + ); + + // --- Bedrock Model Invocation --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'BedrockModelInvocation', + effect: iam.Effect.ALLOW, + actions: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream'], + resources: [ + // Foundation models (all regions) + `arn:aws:bedrock:*::foundation-model/*`, + // Inference profiles in this account + `arn:aws:bedrock:${region}:${accountId}:inference-profile/*`, + // Application inference profiles + `arn:aws:bedrock:${region}:${accountId}:application-inference-profile/*`, + ], + }) + ); + + // Global Inference Profile permissions + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'BedrockGetInferenceProfile', + effect: iam.Effect.ALLOW, + actions: ['bedrock:GetInferenceProfile', 'bedrock:ListInferenceProfiles'], + resources: ['*'], + }) + ); + + // --- S3 Workspace Access --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'S3WorkspaceAccess', + effect: iam.Effect.ALLOW, + actions: ['s3:GetObject', 's3:PutObject', 's3:ListBucket', 's3:DeleteObject'], + resources: [props.workspaceBucket.bucketArn, `${props.workspaceBucket.bucketArn}/*`], + }) + ); + + // --- Secrets Manager Access (optional, for API keys) --- + this.runtimeRole.addToPolicy( + new iam.PolicyStatement({ + sid: 'SecretsManagerAccess', + effect: iam.Effect.ALLOW, + actions: ['secretsmanager:GetSecretValue'], + resources: [`arn:aws:secretsmanager:${region}:${accountId}:secret:${props.stackName}/*`], + }) + ); + + // ========================================================================= + // 3. Custom Resource Lambda for AgentCore Management + // ========================================================================= + + const agentCoreManagerCode = this.createAgentCoreManagerCode(); + + const agentCoreManagerFunction = new lambda.Function(this, 'AgentCoreManager', { + functionName: `${props.stackName}-agentcore-manager`, + runtime: lambda.Runtime.PYTHON_3_12, + handler: 'index.handler', + code: lambda.Code.fromInline(agentCoreManagerCode), + timeout: cdk.Duration.minutes(10), + memorySize: 256, + environment: { + LOG_LEVEL: 'INFO', + }, + logRetention: logs.RetentionDays.ONE_WEEK, + }); + + // Grant Lambda permissions to manage AgentCore + agentCoreManagerFunction.addToRolePolicy( + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + 'bedrock-agentcore-control:CreateAgentRuntime', + 'bedrock-agentcore-control:UpdateAgentRuntime', + 'bedrock-agentcore-control:DeleteAgentRuntime', + 'bedrock-agentcore-control:GetAgentRuntime', + 'bedrock-agentcore-control:ListAgentRuntimes', + ], + resources: ['*'], + }) + ); + + // Grant Lambda permissions to pass the runtime role + agentCoreManagerFunction.addToRolePolicy( + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: ['iam:PassRole'], + resources: [this.runtimeRole.roleArn], + }) + ); + + // ========================================================================= + // 4. Custom Resource Provider + // ========================================================================= + + const provider = new cr.Provider(this, 'AgentCoreProvider', { + onEventHandler: agentCoreManagerFunction, + logRetention: logs.RetentionDays.ONE_WEEK, + }); + + // Construct discovery URL for Cognito + const cognitoDiscoveryUrl = `https://cognito-idp.${region}.amazonaws.com/${props.cognitoUserPoolId}/.well-known/openid-configuration`; + + // Build environment variables string + const envVarsArray: string[] = [`AWS_DEFAULT_REGION=${region}`]; + if (props.environmentVariables) { + Object.entries(props.environmentVariables).forEach(([key, value]) => { + if (value) { + envVarsArray.push(`${key}=${value}`); + } + }); + } + envVarsArray.push(`S3_WORKSPACE_BUCKET=${props.workspaceBucket.bucketName}`); + + // Determine Docker image URI + const dockerImageUri = props.dockerImageUri || `${this.ecrRepository.repositoryUri}:latest`; + + // ========================================================================= + // 5. AgentCore Runtime Custom Resource + // ========================================================================= + + const agentCoreRuntime = new cdk.CustomResource(this, 'AgentCoreRuntime', { + serviceToken: provider.serviceToken, + properties: { + RuntimeName: props.runtimeName, + DockerImageUri: dockerImageUri, + RoleArn: this.runtimeRole.roleArn, + NetworkMode: props.networkMode || 'PUBLIC', + EnvironmentVariables: envVarsArray.join(','), + CognitoDiscoveryUrl: cognitoDiscoveryUrl, + CognitoClientId: props.cognitoClientId, + // Add timestamp to force update when properties change + Timestamp: Date.now().toString(), + }, + }); + + // Ensure runtime is created after the role + agentCoreRuntime.node.addDependency(this.runtimeRole); + + // Store runtime outputs + this.runtimeArn = agentCoreRuntime.getAttString('RuntimeArn'); + this.runtimeUrl = agentCoreRuntime.getAttString('RuntimeUrl'); + + // ========================================================================= + // Outputs + // ========================================================================= + + new cdk.CfnOutput(this, 'ECRRepositoryUri', { + value: this.ecrRepository.repositoryUri, + description: 'ECR Repository URI for AgentCore container', + exportName: `${props.stackName}-agentcore-ecr-uri`, + }); + + new cdk.CfnOutput(this, 'RuntimeRoleArn', { + value: this.runtimeRole.roleArn, + description: 'IAM Role ARN for AgentCore Runtime', + exportName: `${props.stackName}-agentcore-role-arn`, + }); + + new cdk.CfnOutput(this, 'AgentCoreRuntimeArn', { + value: this.runtimeArn, + description: 'AgentCore Runtime ARN', + exportName: `${props.stackName}-agentcore-runtime-arn`, + }); + + new cdk.CfnOutput(this, 'AgentCoreRuntimeUrl', { + value: this.runtimeUrl, + description: 'AgentCore Runtime URL', + exportName: `${props.stackName}-agentcore-runtime-url`, + }); + } + + /** + * Creates the Python code for the AgentCore manager Lambda function + */ + private createAgentCoreManagerCode(): string { + return ` +import json +import logging +import boto3 +import urllib.parse + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +def handler(event, context): + """ + Custom Resource handler for AgentCore Runtime management. + + Handles CREATE, UPDATE, and DELETE operations for AgentCore Runtimes. + """ + logger.info(f"Received event: {json.dumps(event)}") + + request_type = event['RequestType'] + properties = event['ResourceProperties'] + + runtime_name = properties['RuntimeName'] + docker_image_uri = properties['DockerImageUri'] + role_arn = properties['RoleArn'] + network_mode = properties.get('NetworkMode', 'PUBLIC') + env_vars = properties.get('EnvironmentVariables', '') + cognito_discovery_url = properties['CognitoDiscoveryUrl'] + cognito_client_id = properties['CognitoClientId'] + + client = boto3.client('bedrock-agentcore-control') + + try: + if request_type == 'Create': + return create_runtime( + client, runtime_name, docker_image_uri, role_arn, + network_mode, env_vars, cognito_discovery_url, cognito_client_id + ) + elif request_type == 'Update': + old_properties = event.get('OldResourceProperties', {}) + physical_resource_id = event.get('PhysicalResourceId', '') + return update_runtime( + client, physical_resource_id, runtime_name, docker_image_uri, role_arn, + network_mode, env_vars, cognito_discovery_url, cognito_client_id + ) + elif request_type == 'Delete': + physical_resource_id = event.get('PhysicalResourceId', '') + return delete_runtime(client, physical_resource_id) + else: + raise ValueError(f"Unknown request type: {request_type}") + + except Exception as e: + logger.error(f"Error handling {request_type}: {str(e)}") + raise + + +def create_runtime(client, runtime_name, docker_image_uri, role_arn, network_mode, env_vars, cognito_discovery_url, cognito_client_id): + """Create a new AgentCore Runtime.""" + logger.info(f"Creating AgentCore Runtime: {runtime_name}") + + # Check if runtime already exists + existing_runtime = find_runtime_by_name(client, runtime_name) + if existing_runtime: + logger.info(f"Runtime already exists: {existing_runtime['agentRuntimeId']}") + return format_response(existing_runtime) + + response = client.create_agent_runtime( + agentRuntimeName=runtime_name, + agentRuntimeArtifact={ + 'containerConfiguration': { + 'containerUri': docker_image_uri + } + }, + networkConfiguration={ + 'networkMode': network_mode + }, + roleArn=role_arn, + requestHeaderConfiguration={ + 'requestHeaderAllowlist': ['Authorization'] + }, + environmentVariables=env_vars, + authorizerConfiguration={ + 'customJWTAuthorizer': { + 'discoveryUrl': cognito_discovery_url, + 'allowedClients': [cognito_client_id] + } + } + ) + + logger.info(f"Created runtime: {response}") + return format_response(response) + + +def update_runtime(client, physical_resource_id, runtime_name, docker_image_uri, role_arn, network_mode, env_vars, cognito_discovery_url, cognito_client_id): + """Update an existing AgentCore Runtime.""" + logger.info(f"Updating AgentCore Runtime: {physical_resource_id}") + + # Extract runtime ID from physical resource ID + runtime_id = physical_resource_id + if not runtime_id or runtime_id == 'None': + # If no valid runtime ID, create a new one + return create_runtime( + client, runtime_name, docker_image_uri, role_arn, + network_mode, env_vars, cognito_discovery_url, cognito_client_id + ) + + try: + response = client.update_agent_runtime( + agentRuntimeId=runtime_id, + agentRuntimeArtifact={ + 'containerConfiguration': { + 'containerUri': docker_image_uri + } + }, + networkConfiguration={ + 'networkMode': network_mode + }, + roleArn=role_arn, + requestHeaderConfiguration={ + 'requestHeaderAllowlist': ['Authorization'] + }, + environmentVariables=env_vars, + authorizerConfiguration={ + 'customJWTAuthorizer': { + 'discoveryUrl': cognito_discovery_url, + 'allowedClients': [cognito_client_id] + } + } + ) + + logger.info(f"Updated runtime: {response}") + return format_response(response) + + except client.exceptions.ResourceNotFoundException: + logger.warning(f"Runtime not found, creating new: {runtime_name}") + return create_runtime( + client, runtime_name, docker_image_uri, role_arn, + network_mode, env_vars, cognito_discovery_url, cognito_client_id + ) + + +def delete_runtime(client, physical_resource_id): + """Delete an AgentCore Runtime.""" + logger.info(f"Deleting AgentCore Runtime: {physical_resource_id}") + + runtime_id = physical_resource_id + if not runtime_id or runtime_id == 'None': + logger.warning("No runtime ID to delete") + return { + 'PhysicalResourceId': physical_resource_id, + 'Data': {} + } + + try: + client.delete_agent_runtime(agentRuntimeId=runtime_id) + logger.info(f"Deleted runtime: {runtime_id}") + except client.exceptions.ResourceNotFoundException: + logger.warning(f"Runtime already deleted: {runtime_id}") + except Exception as e: + logger.error(f"Error deleting runtime: {str(e)}") + # Don't fail on delete - CloudFormation rollback should succeed + + return { + 'PhysicalResourceId': physical_resource_id, + 'Data': {} + } + + +def find_runtime_by_name(client, runtime_name): + """Find an existing runtime by name.""" + try: + response = client.list_agent_runtimes() + for runtime in response.get('agentRuntimes', []): + if runtime.get('agentRuntimeName') == runtime_name: + return runtime + except Exception as e: + logger.error(f"Error listing runtimes: {str(e)}") + return None + + +def format_response(response): + """Format the response for CloudFormation.""" + runtime_id = response.get('agentRuntimeId', '') + runtime_arn = response.get('agentRuntimeArn', '') + + # Construct runtime URL + # URL format: https://bedrock-agentcore.{region}.amazonaws.com/runtimes/{encoded_arn} + region = boto3.session.Session().region_name + encoded_arn = urllib.parse.quote(runtime_arn, safe='') + runtime_url = f"https://bedrock-agentcore.{region}.amazonaws.com/runtimes/{encoded_arn}" + + return { + 'PhysicalResourceId': runtime_id, + 'Data': { + 'RuntimeId': runtime_id, + 'RuntimeArn': runtime_arn, + 'RuntimeUrl': runtime_url, + 'Status': response.get('status', 'UNKNOWN'), + 'WorkloadIdentityArn': response.get('workloadIdentityDetails', {}).get('workloadIdentityArn', 'N/A') + } + } +`.trim(); + } +} diff --git a/infrastructure/lib/compute/fargate-nextjs-service.ts b/infrastructure/lib/compute/fargate-nextjs-service.ts new file mode 100644 index 0000000..c175a75 --- /dev/null +++ b/infrastructure/lib/compute/fargate-nextjs-service.ts @@ -0,0 +1,456 @@ +/** + * Fargate Next.js Service Construct + * + * Simplified Fargate deployment for the Next.js frontend. + * This replaces the EC2-based ECS service with a stateless Fargate service. + * + * Key differences from ECS EC2 service: + * - No EC2 instances, ASG, or capacity providers + * - No sticky sessions (agent state is managed by AgentCore) + * - Uses awsvpc network mode (Fargate requirement) + * - Simplified configuration without volume mounts + */ + +import * as cdk from 'aws-cdk-lib'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as ecs from 'aws-cdk-lib/aws-ecs'; +import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as logs from 'aws-cdk-lib/aws-logs'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; +import * as kms from 'aws-cdk-lib/aws-kms'; +import { Platform } from 'aws-cdk-lib/aws-ecr-assets'; +import { Construct } from 'constructs'; +import { ECS_CONFIG } from '../common/constants'; + +export interface FargateNextjsServiceConstructProps { + /** + * VPC for the Fargate tasks + */ + vpc: ec2.IVpc; + + /** + * Application Load Balancer (passed from main stack) + */ + alb: elbv2.IApplicationLoadBalancer; + + /** + * Security group for the ALB + */ + albSecurityGroup: ec2.ISecurityGroup; + + /** + * Security group for ECS/Fargate tasks + */ + ecsSecurityGroup: ec2.ISecurityGroup; + + /** + * S3 bucket for uploads + */ + uploadsBucket: s3.IBucket; + + /** + * S3 bucket for logs + */ + logsBucket: s3.IBucket; + + /** + * KMS key for encryption + */ + kmsKey: kms.IKey; + + /** + * Stack name for resource naming + */ + stackName: string; + + /** + * CloudFront distribution domain for NEXTAUTH_URL + */ + distributionDomain: string; + + /** + * AgentCore Runtime URL (backend API endpoint) + */ + agentCoreRuntimeUrl: string; + + /** + * Environment variable configuration + */ + envConfig?: { + claudeUseBedrock?: boolean; + anthropicApiKey?: string; + llmApiKey?: string; + llmBaseUrl?: string; + llmModelName?: string; + tavilyApiKey?: string; + uploadthingToken?: string; + unsplashAccessKey?: string; + }; + + /** + * Cognito authentication configuration + */ + cognitoConfig?: { + clientId: string; + clientSecret: cdk.SecretValue; + issuer: string; + }; + + /** + * Desired number of tasks (default: 1) + */ + desiredCount?: number; + + /** + * CPU units for the task (default: 1024 = 1 vCPU) + */ + cpu?: number; + + /** + * Memory in MiB for the task (default: 2048 = 2 GB) + */ + memoryMiB?: number; +} + +export class FargateNextjsServiceConstruct extends Construct { + public readonly cluster: ecs.Cluster; + public readonly service: ecs.FargateService; + public readonly alb: elbv2.IApplicationLoadBalancer; + public readonly taskRole: iam.Role; + + constructor(scope: Construct, id: string, props: FargateNextjsServiceConstructProps) { + super(scope, id); + + // ========================================================================= + // 1. ECS Cluster + // ========================================================================= + + this.cluster = new ecs.Cluster(this, 'Cluster', { + vpc: props.vpc, + clusterName: `${props.stackName}-fargate-cluster`, + containerInsights: true, + }); + + // ========================================================================= + // 2. Secrets + // ========================================================================= + + // NextAuth secret for session management + const nextAuthSecret = new secretsmanager.Secret(this, 'NextAuthSecret', { + secretName: `${props.stackName}/nextauth-secret`, + generateSecretString: { + excludeCharacters: '{}[]"\\', + passwordLength: 32, + }, + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); + + // ========================================================================= + // 3. CloudWatch Log Group + // ========================================================================= + + const logGroup = new logs.LogGroup(this, 'LogGroup', { + logGroupName: `/ecs/${props.stackName}-fargate`, + retention: logs.RetentionDays.ONE_WEEK, + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); + + // ========================================================================= + // 4. IAM Roles + // ========================================================================= + + // Task Execution Role (for pulling images, writing logs) + const taskExecutionRole = new iam.Role(this, 'TaskExecutionRole', { + assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonECSTaskExecutionRolePolicy'), + ], + }); + + // Grant access to secrets + nextAuthSecret.grantRead(taskExecutionRole); + + // Task Role (for application permissions) + this.taskRole = new iam.Role(this, 'TaskRole', { + assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), + inlinePolicies: { + BedrockAccess: new iam.PolicyDocument({ + statements: [ + new iam.PolicyStatement({ + sid: 'BedrockInvokeModel', + effect: iam.Effect.ALLOW, + actions: ['bedrock:InvokeModel', 'bedrock:InvokeModelWithResponseStream'], + resources: [ + // Foundation models + `arn:aws:bedrock:*::foundation-model/*`, + // Inference profiles + `arn:aws:bedrock:*:${cdk.Aws.ACCOUNT_ID}:inference-profile/*`, + // Application inference profiles + `arn:aws:bedrock:*:${cdk.Aws.ACCOUNT_ID}:application-inference-profile/*`, + ], + }), + new iam.PolicyStatement({ + sid: 'BedrockGetInferenceProfile', + effect: iam.Effect.ALLOW, + actions: ['bedrock:GetInferenceProfile', 'bedrock:ListInferenceProfiles'], + resources: ['*'], + }), + ], + }), + }, + }); + + // Grant S3 permissions + props.kmsKey.grantEncryptDecrypt(this.taskRole); + props.uploadsBucket.grantReadWrite(this.taskRole); + + // ========================================================================= + // 5. Task Definition (Fargate) + // ========================================================================= + + const taskDefinition = new ecs.FargateTaskDefinition(this, 'TaskDef', { + cpu: props.cpu || 1024, // 1 vCPU default (Fargate supports: 256, 512, 1024, 2048, 4096) + memoryLimitMiB: props.memoryMiB || 2048, // 2 GB default + executionRole: taskExecutionRole, + taskRole: this.taskRole, + runtimePlatform: { + cpuArchitecture: ecs.CpuArchitecture.X86_64, + operatingSystemFamily: ecs.OperatingSystemFamily.LINUX, + }, + }); + + // ========================================================================= + // 6. Environment Variables + // ========================================================================= + + const environment: Record = { + NODE_ENV: 'production', + PORT: '3000', + HOSTNAME: '0.0.0.0', + AWS_REGION: cdk.Aws.REGION, + + // AI Services - Frontend calls AgentCore backend + AGENTCORE_RUNTIME_URL: props.agentCoreRuntimeUrl, + CLAUDE_CODE_USE_BEDROCK: props.envConfig?.claudeUseBedrock !== false ? '1' : '0', + ENABLE_CLAUDE_AGENT: 'true', + + // S3 Configuration + UPLOADS_BUCKET: props.uploadsBucket.bucketName, + + // NextAuth URLs + NEXTAUTH_URL: `https://${props.distributionDomain}`, + }; + + // Optional third-party service configuration + if (props.envConfig?.llmBaseUrl) { + environment.LLM_BASE_URL = props.envConfig.llmBaseUrl; + } + if (props.envConfig?.llmModelName) { + environment.LLM_MODEL_NAME = props.envConfig.llmModelName; + } + + // ========================================================================= + // 7. Secrets Configuration + // ========================================================================= + + const secrets: Record = { + NEXTAUTH_SECRET: ecs.Secret.fromSecretsManager(nextAuthSecret), + }; + + // Optional API keys as secrets + if (props.envConfig?.anthropicApiKey) { + const anthropicSecret = new secretsmanager.Secret(this, 'AnthropicApiKey', { + secretName: `${props.stackName}/anthropic-api-key`, + secretStringValue: cdk.SecretValue.unsafePlainText(props.envConfig.anthropicApiKey), + }); + secrets.ANTHROPIC_API_KEY = ecs.Secret.fromSecretsManager(anthropicSecret); + } + + if (props.envConfig?.llmApiKey) { + const llmSecret = new secretsmanager.Secret(this, 'LlmApiKey', { + secretName: `${props.stackName}/llm-api-key`, + secretStringValue: cdk.SecretValue.unsafePlainText(props.envConfig.llmApiKey), + }); + secrets.LLM_API_KEY = ecs.Secret.fromSecretsManager(llmSecret); + } + + if (props.envConfig?.tavilyApiKey) { + const tavilySecret = new secretsmanager.Secret(this, 'TavilyApiKey', { + secretName: `${props.stackName}/tavily-api-key`, + secretStringValue: cdk.SecretValue.unsafePlainText(props.envConfig.tavilyApiKey), + }); + secrets.TAVILY_API_KEY = ecs.Secret.fromSecretsManager(tavilySecret); + } + + if (props.envConfig?.uploadthingToken) { + const uploadthingSecret = new secretsmanager.Secret(this, 'UploadthingToken', { + secretName: `${props.stackName}/uploadthing-token`, + secretStringValue: cdk.SecretValue.unsafePlainText(props.envConfig.uploadthingToken), + }); + secrets.UPLOADTHING_TOKEN = ecs.Secret.fromSecretsManager(uploadthingSecret); + } + + if (props.envConfig?.unsplashAccessKey) { + const unsplashSecret = new secretsmanager.Secret(this, 'UnsplashAccessKey', { + secretName: `${props.stackName}/unsplash-access-key`, + secretStringValue: cdk.SecretValue.unsafePlainText(props.envConfig.unsplashAccessKey), + }); + secrets.UNSPLASH_ACCESS_KEY = ecs.Secret.fromSecretsManager(unsplashSecret); + } + + // Cognito configuration + if (props.cognitoConfig) { + const cognitoClientSecret = new secretsmanager.Secret(this, 'CognitoClientSecret', { + secretName: `${props.stackName}/cognito-client-secret`, + secretStringValue: props.cognitoConfig.clientSecret, + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); + + cognitoClientSecret.grantRead(taskExecutionRole); + secrets.COGNITO_CLIENT_SECRET = ecs.Secret.fromSecretsManager(cognitoClientSecret); + environment.COGNITO_CLIENT_ID = props.cognitoConfig.clientId; + environment.COGNITO_ISSUER = props.cognitoConfig.issuer; + } + + // ========================================================================= + // 8. Container Definition + // ========================================================================= + + const container = taskDefinition.addContainer('nextjs', { + image: ecs.ContainerImage.fromAsset('../frontend', { + file: 'Dockerfile.production', + platform: Platform.LINUX_AMD64, + }), + environment, + secrets, + logging: ecs.LogDrivers.awsLogs({ + streamPrefix: 'nextjs', + logGroup: logGroup, + }), + healthCheck: { + command: ['CMD-SHELL', 'wget --spider http://localhost:3000/api/health || exit 1'], + interval: cdk.Duration.seconds(30), + timeout: cdk.Duration.seconds(10), + retries: 5, + startPeriod: cdk.Duration.seconds(120), + }, + }); + + container.addPortMappings({ + containerPort: 3000, + protocol: ecs.Protocol.TCP, + }); + + // ========================================================================= + // 9. ALB and Target Group + // ========================================================================= + + this.alb = props.alb; + + // Target Group - No sticky sessions needed (stateless frontend) + const targetGroup = new elbv2.ApplicationTargetGroup(this, 'TargetGroup', { + vpc: props.vpc, + port: 3000, + protocol: elbv2.ApplicationProtocol.HTTP, + targetType: elbv2.TargetType.IP, // Fargate uses IP target type (awsvpc mode) + + // NOTE: No sticky sessions - agent state is managed by AgentCore backend + // stickinessCookieDuration: undefined, + + healthCheck: { + enabled: true, + healthyHttpCodes: '200', + path: '/api/health', + protocol: elbv2.Protocol.HTTP, + port: 'traffic-port', + interval: cdk.Duration.seconds(30), + timeout: cdk.Duration.seconds(10), + healthyThresholdCount: 2, + unhealthyThresholdCount: 5, + }, + deregistrationDelay: cdk.Duration.seconds(30), + }); + + // Listener + this.alb.addListener('Listener', { + port: 80, + protocol: elbv2.ApplicationProtocol.HTTP, + defaultAction: elbv2.ListenerAction.forward([targetGroup]), + }); + + // ========================================================================= + // 10. Fargate Service + // ========================================================================= + + this.service = new ecs.FargateService(this, 'Service', { + cluster: this.cluster, + serviceName: `${props.stackName}-fargate-service`, + taskDefinition, + desiredCount: props.desiredCount || ECS_CONFIG.desiredCount, + securityGroups: [props.ecsSecurityGroup], + vpcSubnets: { + subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, + }, + enableExecuteCommand: true, + healthCheckGracePeriod: cdk.Duration.seconds(120), + // Deployment configuration + minHealthyPercent: 50, + maxHealthyPercent: 200, + // Use REPLICA deployment (default) + circuitBreaker: { + enable: true, + rollback: true, + }, + }); + + // Attach to target group + this.service.attachToApplicationTargetGroup(targetGroup); + + // ========================================================================= + // 11. Auto Scaling (optional) + // ========================================================================= + + const scaling = this.service.autoScaleTaskCount({ + minCapacity: 1, + maxCapacity: 4, + }); + + // Scale based on CPU utilization + scaling.scaleOnCpuUtilization('CpuScaling', { + targetUtilizationPercent: 70, + scaleInCooldown: cdk.Duration.minutes(5), + scaleOutCooldown: cdk.Duration.minutes(2), + }); + + // Scale based on memory utilization + scaling.scaleOnMemoryUtilization('MemoryScaling', { + targetUtilizationPercent: 80, + scaleInCooldown: cdk.Duration.minutes(5), + scaleOutCooldown: cdk.Duration.minutes(2), + }); + + // ========================================================================= + // Outputs + // ========================================================================= + + new cdk.CfnOutput(this, 'ClusterName', { + value: this.cluster.clusterName, + description: 'ECS Fargate cluster name', + exportName: `${props.stackName}-fargate-cluster-name`, + }); + + new cdk.CfnOutput(this, 'ServiceName', { + value: this.service.serviceName, + description: 'ECS Fargate service name', + exportName: `${props.stackName}-fargate-service-name`, + }); + + new cdk.CfnOutput(this, 'ALBDnsName', { + value: this.alb.loadBalancerDnsName, + description: 'ALB DNS name', + exportName: `${props.stackName}-fargate-alb-dns`, + }); + } +} diff --git a/infrastructure/lib/slide-forge-stack.ts b/infrastructure/lib/slide-forge-stack.ts index 82570d4..b1c35c4 100644 --- a/infrastructure/lib/slide-forge-stack.ts +++ b/infrastructure/lib/slide-forge-stack.ts @@ -4,11 +4,11 @@ import { Construct } from 'constructs'; import { VpcConstruct } from './network/vpc'; import { S3BucketsConstruct } from './storage/s3-buckets'; import { StaticAssetsDeployment } from './storage/static-assets-deployment'; -import { EcsNextjsServiceConstruct } from './compute/ecs-nextjs-service'; +import { AgentCoreConstruct } from './compute/agentcore-construct'; +import { FargateNextjsServiceConstruct } from './compute/fargate-nextjs-service'; import { CloudFrontConstruct } from './cdn/cloudfront'; import { CognitoConstruct } from './auth/cognito'; import { AdminUserCreator } from './auth/admin-user-creator'; -import { AgentSdkRole } from './auth/agent-sdk-role'; import { getEnvConfig, validateEnvConfig, printEnvStatus } from '../config/env-config'; export interface SlideForgeStackProps extends cdk.StackProps { @@ -49,18 +49,12 @@ export class SlideForgeStack extends cdk.Stack { stackName, }); - // 3. Create Cognito User Pool (ๅœจ ECS ไน‹ๅ‰๏ผŒไธ้œ€่ฆ CloudFront URL) + // 3. Create Cognito User Pool (before compute resources) const cognitoConstruct = new CognitoConstruct(this, 'Auth', { stackName, adminEmail: envConfig.cognito.adminEmail, }); - // 5. Create Claude Agent SDK IAM Role (ๅœจ ECS ไน‹ๅ‰) - const agentSdkRole = new AgentSdkRole(this, 'AgentSdkRole', { - stackName, - uploadsBucket: s3Construct.uploadsBucket, - }); - // 6. Create Application Load Balancer (before CloudFront) const alb = new elbv2.ApplicationLoadBalancer(this, 'ALB', { vpc: vpcConstruct.vpc, @@ -85,11 +79,29 @@ export class SlideForgeStack extends cdk.Stack { stackName, }); - // 4. Create ECS Service (after CloudFront, with distribution domain) - // Note: Data storage uses S3 instead of database (PostgreSQL removed) - const ecsConstruct = new EcsNextjsServiceConstruct(this, 'Compute', { + // 4. Create AgentCore Backend (for AI agent processing) + // AgentCore runs the Strands-based agent in Bedrock AgentCore Runtime + const agentCoreConstruct = new AgentCoreConstruct(this, 'AgentCore', { + stackName, + runtimeName: `${stackName}-agent-runtime`, + workspaceBucket: s3Construct.uploadsBucket, + cognitoUserPoolId: cognitoConstruct.userPool.userPoolId, + cognitoClientId: cognitoConstruct.oidc.clientId, + networkMode: 'PUBLIC', + environmentVariables: { + // AI Configuration + CLAUDE_CODE_USE_BEDROCK: envConfig.claudeConfig.useBedrock ? '1' : '0', + // Optional third-party API keys (if configured) + ...(envConfig.thirdParty.tavilyApiKey && { TAVILY_API_KEY: envConfig.thirdParty.tavilyApiKey }), + ...(envConfig.thirdParty.unsplashAccessKey && { UNSPLASH_ACCESS_KEY: envConfig.thirdParty.unsplashAccessKey }), + }, + }); + + // 5. Create Fargate Frontend Service (stateless Next.js frontend) + // The frontend calls AgentCore for agent operations via AGENTCORE_RUNTIME_URL + const fargateConstruct = new FargateNextjsServiceConstruct(this, 'Compute', { vpc: vpcConstruct.vpc, - alb: alb, // Pass existing ALB + alb: alb, albSecurityGroup: vpcConstruct.albSecurityGroup, ecsSecurityGroup: vpcConstruct.ecsSecurityGroup, uploadsBucket: s3Construct.uploadsBucket, @@ -97,7 +109,9 @@ export class SlideForgeStack extends cdk.Stack { kmsKey: s3Construct.kmsKey, stackName, distributionDomain: cloudfrontConstruct.distribution.distributionDomainName, - // ไผ ้€’็Žฏๅขƒๅ˜้‡้…็ฝฎ + // Pass AgentCore Runtime URL for backend API calls + agentCoreRuntimeUrl: agentCoreConstruct.runtimeUrl, + // Environment configuration for frontend envConfig: { claudeUseBedrock: envConfig.claudeConfig.useBedrock, anthropicApiKey: envConfig.claudeConfig.anthropicApiKey, @@ -108,26 +122,14 @@ export class SlideForgeStack extends cdk.Stack { uploadthingToken: envConfig.thirdParty.uploadthingToken, unsplashAccessKey: envConfig.thirdParty.unsplashAccessKey, }, - // ไผ ้€’ Cognito ้…็ฝฎ + // Cognito authentication configuration cognitoConfig: { clientId: cognitoConstruct.oidc.clientId, clientSecret: cognitoConstruct.oidc.clientSecret, issuer: cognitoConstruct.oidc.issuer, }, - // ไผ ้€’ Agent SDK Role ARN - agentSdkRoleArn: agentSdkRole.role.roleArn, }); - // ๆŽˆๆƒ ECS Task Role ๅฏไปฅไปฃๅ…ฅ Agent SDK Role - ecsConstruct.taskRole.addToPolicy( - new cdk.aws_iam.PolicyStatement({ - sid: 'AssumeAgentSdkRole', - effect: cdk.aws_iam.Effect.ALLOW, - actions: ['sts:AssumeRole'], - resources: [agentSdkRole.role.roleArn], - }) - ); - // 9. ๅŠจๆ€ๆ›ดๆ–ฐ Cognito Callback URLs๏ผˆไฝฟ็”จ CloudFront URL๏ผ‰ const applicationUrl = `https://${cloudfrontConstruct.distribution.distributionDomainName}`; @@ -197,8 +199,16 @@ export class SlideForgeStack extends cdk.Stack { exportName: `${stackName}-cognito-issuer`, }); - // Agent SDK Role Outputs are already defined in AgentSdkRole construct - // Removed duplicate outputs to avoid CloudFormation export name conflicts + // AgentCore Outputs (main outputs are in the construct, add summary here) + new cdk.CfnOutput(this, 'AgentCoreRuntimeUrl', { + value: agentCoreConstruct.runtimeUrl, + description: 'AgentCore Runtime URL for backend API', + }); + + new cdk.CfnOutput(this, 'AgentCoreECRRepository', { + value: agentCoreConstruct.ecrRepository.repositoryUri, + description: 'ECR Repository for AgentCore container images', + }); new cdk.CfnOutput(this, 'DeploymentInstructions', { value: [ @@ -207,24 +217,32 @@ export class SlideForgeStack extends cdk.Stack { 'Slide-Forge Deployment Successful!', '========================================', '', - '๐Ÿ“‹ Next Steps:', + 'Next Steps:', + '', + '1. Build and push the AgentCore container:', + ` cd agentcore && docker build -t ${agentCoreConstruct.ecrRepository.repositoryUri}:latest .`, + ` aws ecr get-login-password | docker login --username AWS --password-stdin ${agentCoreConstruct.ecrRepository.repositoryUri}`, + ` docker push ${agentCoreConstruct.ecrRepository.repositoryUri}:latest`, '', - '1. Create optional secrets in AWS Secrets Manager (if not already configured):', + '2. Create optional secrets in AWS Secrets Manager (if not already configured):', ` aws secretsmanager create-secret --name ${stackName}/tavily-api-key --secret-string "tvly-..."`, ` aws secretsmanager create-secret --name ${stackName}/uploadthing-token --secret-string "sk_live_..."`, '', - '2. Build and upload static assets:', - ' pnpm build', + '3. Build and upload static assets:', + ' cd frontend && pnpm build', ` aws s3 sync .next/static s3://${s3Construct.staticBucket.bucketName}/_next/static`, ` aws s3 sync public s3://${s3Construct.staticBucket.bucketName}/public`, '', - '3. Invalidate CloudFront cache:', + '4. Invalidate CloudFront cache:', ` aws cloudfront create-invalidation --distribution-id ${cloudfrontConstruct.distribution.distributionId} --paths "/*"`, '', - '4. Access your application:', + '5. Access your application:', ` https://${cloudfrontConstruct.distribution.distributionDomainName}`, '', - 'Note: This deployment uses S3 for data storage (no database required).', + 'Architecture:', + '- Frontend: Fargate (stateless Next.js)', + '- Backend: Bedrock AgentCore Runtime (Strands-based agent)', + '- Storage: S3 (no database required)', '', '========================================', ].join('\n'), From 83b81ee58d8227553209cc7bfe163780e42bbf46 Mon Sep 17 00:00:00 2001 From: ianleely Date: Thu, 5 Feb 2026 11:00:42 +0000 Subject: [PATCH 2/3] feat: add JWT token refresh and fix AgentCore deployment - Add Cognito JWT token auto-refresh mechanism (60s before expiry) - Add AgentCoreAuthError handling for 401 responses in chat API - Fix AgentCore construct for two-phase deployment (skipRuntimeCreation) - Fix runtime name validation (use underscores instead of hyphens) - Update .gitignore with Python cache and CDK output patterns - Update README documentation to reflect current architecture Co-Authored-By: Claude Opus 4.5 --- .gitignore | 40 +- README.md | 739 ++++-------------- frontend/src/app/api/agent/chat/route.ts | 45 +- frontend/src/lib/agent/agentcore-client.ts | 34 +- frontend/src/server/auth.ts | 128 ++- infrastructure/README.md | 384 ++++----- .../lib/compute/agentcore-construct.ts | 82 +- infrastructure/lib/slide-forge-stack.ts | 7 +- 8 files changed, 596 insertions(+), 863 deletions(-) diff --git a/.gitignore b/.gitignore index 42cabca..6a5c85c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. # dependencies -/node_modules +node_modules/ /.pnp .pnp.js .yarn/install-state.gz @@ -10,7 +10,7 @@ /coverage # next.js -/.next/ +.next/ /out/ # production @@ -26,15 +26,45 @@ yarn-debug.log* yarn-error.log* # local env files -.env.local .env +.env.local +.env.*.local + # vercel .vercel # typescript *.tsbuildinfo next-env.d.ts + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +.venv/ +venv/ +ENV/ + +# CDK +cdk.out/ +cdk.context.json + +# Deploy scripts temp files +deploy/.build_output/ +deploy/.agentcore_output/ +deploy/config.env + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Reference resources (not part of main project) resource/ + +# Generated worker files /public/pdf.worker.min.mjs -frontend/node_modules -frontend/.next/ +/public/gif.worker.js diff --git a/README.md b/README.md index dc131de..9f4426b 100644 --- a/README.md +++ b/README.md @@ -1,639 +1,212 @@ -# ๐ŸŽจ Slide Forge +# Slide Forge -An intelligent presentation creation platform powered by AI. Generate stunning, professional presentations from text input or file uploads in minutes. +An intelligent presentation creation platform powered by AI. Generate stunning, professional presentations from text input or file uploads through natural conversation with Claude. -## ๐Ÿ“ Project Structure +## Project Structure ``` slide-forge/ -โ”œโ”€โ”€ frontend/ # Next.js application (main codebase) +โ”œโ”€โ”€ frontend/ # Next.js application โ”‚ โ”œโ”€โ”€ src/ # Application source code -โ”‚ โ”œโ”€โ”€ public/ # Static assets -โ”‚ โ”œโ”€โ”€ prisma/ # Database schema -โ”‚ โ””โ”€โ”€ package.json +โ”‚ โ””โ”€โ”€ public/ # Static assets โ”‚ -โ”œโ”€โ”€ infrastructure/ # AWS CDK deployment infrastructure -โ”‚ โ”œโ”€โ”€ lib/ # CDK constructs (VPC, ECS, Aurora, etc.) -โ”‚ โ”œโ”€โ”€ docker/ # Production Dockerfile +โ”œโ”€โ”€ backend/ # AgentCore backend (Strands Agent) +โ”‚ โ”œโ”€โ”€ api/ # API endpoints +โ”‚ โ”œโ”€โ”€ core/ # Session management +โ”‚ โ””โ”€โ”€ Dockerfile # Container image +โ”‚ +โ”œโ”€โ”€ infrastructure/ # AWS CDK deployment +โ”‚ โ”œโ”€โ”€ lib/ # CDK constructs โ”‚ โ””โ”€โ”€ README.md # Deployment guide โ”‚ -โ””โ”€โ”€ resource/ # Reference architectures +โ””โ”€โ”€ deploy/ # Deployment scripts ``` -## โšก Quick Start (One Command!) - -```bash -# First time installation -./scripts/install.sh +## Architecture -# Subsequent starts -./scripts/start.sh +``` + CloudFront (CDN) + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ โ”‚ โ”‚ + S3 Static Private ALB Cognito + (assets) (VPC Origin) (Auth) + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ ECS Fargate โ”‚ + โ”‚ (Next.js) โ”‚ + โ”‚ Stateless โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ โ”‚ โ”‚ + AgentCore S3 Uploads Secrets + Runtime (Sessions) Manager + (Claude) ``` -**That's it!** The script will automatically: -- โœ… Check environment (Node.js, pnpm, Docker) -- โœ… Install dependencies -- โœ… Start PostgreSQL in Docker -- โœ… Setup database schema -- โœ… Configure environment -- โœ… Start the dev server +**Key Components:** +- **Frontend**: Stateless Next.js on ECS Fargate (1-4 tasks, auto-scaling) +- **Backend**: AWS Bedrock AgentCore Runtime with Strands Agent + Claude +- **Auth**: Amazon Cognito (OIDC + JWT with auto-refresh) +- **Storage**: S3 (no database - all data stored in S3) +- **CDN**: CloudFront with VPC Origin for private ALB access -Access the app at: **http://localhost:8080** ๐Ÿš€ +## Quick Start -### ๐Ÿ“ฆ Database Management Scripts +### Local Development ```bash -# Start database only -./db-start.sh - -# Stop database (with options to keep or delete data) -./db-stop.sh +# 1. Clone and install +git clone https://github.com/IanLiYi1996/slide-forge +cd slide-forge/frontend +pnpm install -# View database logs -docker logs slide-forge-db +# 2. Configure environment +cp .env.example .env +# Edit .env with your configuration -# Connect to database directly -docker exec -it slide-forge-db psql -U presentation_user -d slide_forge +# 3. Start development server +pnpm dev ``` -For detailed instructions, see [QUICK_START.md](QUICK_START.md) - -## โœจ Features - -### ๐Ÿค– AI-Powered Generation - -- **Smart Outline Generation**: Automatically create structured presentation outlines from topics or content -- **AI Image Generation**: Generate high-quality slide visuals using yunwu API (Gemini 3 Pro Image) -- **Multi-turn Editing**: Refine and modify generated slides with natural language instructions -- **Web Search Integration**: Enhance outlines with real-time web search data - -### ๐ŸŽฏ Claude Agent (New!) +Access the app at: **http://localhost:3000** -- **Conversational AI**: Chat with Claude Agent powered by Amazon Bedrock to create presentations through natural dialogue -- **Intelligent Tools**: Claude can automatically search the web, read files, and help refine your slides -- **Session Management**: Save and resume conversations, maintain full context across multiple turns -- **Multi-modal Input**: Upload files for analysis, ask questions, and iterate on your content -- **Smart Sidebar**: Quick access to recent Agent sessions directly from the sidebar +### Environment Configuration -### ๐Ÿ“„ Flexible Input Methods +Create `.env` in the frontend directory: -- **Text Input**: Describe your topic directly in the input field -- **File Upload**: Upload documents and automatically extract content - - Supported formats: `.txt`, `.md`, `.docx`, `.pdf` - - Drag-and-drop support - - File size limit: 16MB - - Automatic text extraction and parsing - -### ๐ŸŽจ Customization Options - -- **10+ Built-in Themes**: Professional, creative, minimal, bold, and more -- **Custom Theme Creator**: Design your own themes with custom colors and fonts -- **Template Styles**: Multiple presentation templates (corporate, creative, minimal, etc.) -- **Image Configuration**: - - Aspect ratios: 16:9, 4:3, 1:1, 9:16, 21:9, and more - - Resolutions: 1K (fast), 2K (balanced), 4K (high quality) -- **Multi-language Support**: 12+ languages including English, Chinese, Japanese, Korean, Spanish, etc. -- **Flexible Slide Count**: Generate 1-50 slides as needed - -### ๐Ÿ“ค Export Options - -- **PDF Export**: Generate PDF documents with all slides +```env +# Authentication (Cognito) +COGNITO_CLIENT_ID="" +COGNITO_CLIENT_SECRET="" +COGNITO_ISSUER="" +NEXTAUTH_SECRET="" +NEXTAUTH_URL="http://localhost:3000" + +# AI Configuration (choose one) +CLAUDE_CODE_USE_BEDROCK=1 # Use AWS Bedrock (recommended) +# OR +ANTHROPIC_API_KEY=sk-ant-... # Use Anthropic API directly + +# AWS Region +AWS_REGION=us-east-1 + +# Optional Services +TAVILY_API_KEY="" # Web search +UNSPLASH_ACCESS_KEY="" # Stock images +YUNWU_API_KEY="" # AI image generation +``` + +## Features + +### AI-Powered Generation +- **Conversational AI**: Chat with Claude to create presentations naturally +- **Smart Outline Generation**: Automatically create structured outlines from topics +- **AI Image Generation**: Generate high-quality slide visuals +- **Web Search Integration**: Enhance content with real-time web data + +### Flexible Input +- **Text Input**: Describe your topic directly +- **File Upload**: Upload documents (.txt, .md, .docx, .pdf) +- **Multi-modal**: Combine text and file uploads + +### Customization +- **10+ Built-in Themes**: Professional, creative, minimal, bold, etc. +- **Custom Themes**: Design your own color schemes and fonts +- **Image Options**: Multiple aspect ratios and resolutions +- **Multi-language**: 12+ languages supported + +### Export Options +- **PDF Export**: Generate PDF documents - **PPTX Export**: Export to PowerPoint format - **Image Export**: Download individual slides as images -### ๐Ÿ’พ Smart Features - -- **Auto-save**: Automatic saving of work in progress -- **Conversation History**: Maintains context for multi-turn slide modifications -- **Persistent Storage**: All generated images stored permanently via UploadThing - -## ๐Ÿš€ Getting Started - -### Prerequisites - -Before you begin, ensure you have the following installed: - -- Node.js 18.x or higher -- npm, yarn, or pnpm package manager (pnpm recommended) -- **Docker & Docker Compose** (for local database) -- Required API keys: - - OpenAI API key or OpenAI-compatible API (for text generation) - - yunwu API key (for AI image generation with Gemini 3 Pro Image) - - AWS Cognito credentials (for authentication) - - UploadThing token (for file storage) - - Tavily API key (optional, for web search) - -### Installation - -1. **Clone the repository** - - ```bash - git clone https://github.com/IanLiYi1996/slide-forge - cd slide-forge - ``` - -2. **Install dependencies** - - ```bash - pnpm install - ``` - -3. **Set up environment variables** - - Create a `.env` file in the root directory with the following variables: - - ```env - # Database (Docker default) - DATABASE_URL="postgresql://presentation_user:presentation_password@localhost:5432/slide_forge" - - # Authentication - NEXTAUTH_SECRET="" # Generate with: openssl rand -base64 32 - NEXTAUTH_URL="http://localhost:8080" - - # AWS Cognito Provider (for user authentication) - COGNITO_CLIENT_ID="" - COGNITO_CLIENT_SECRET="" - COGNITO_ISSUER="" - - # AI Providers - Text Generation (OpenAI Compatible) - LLM_API_KEY="" # Required: API key for LLM service - LLM_BASE_URL="" # Optional: Leave empty to use OpenAI - LLM_MODEL_NAME="gpt-4o-mini" # Optional: Model name (default: gpt-4o-mini) - - # AI Providers - Image Generation - YUNWU_API_KEY="" # For AI image generation (Gemini 3 Pro Image) - - # File Storage - UPLOADTHING_TOKEN="" # For storing generated images and uploads - - # Optional Services - TAVILY_API_KEY="" # For web search in outline generation (optional) - - # Claude Agent SDK - Amazon Bedrock (NEW!) - CLAUDE_CODE_USE_BEDROCK="1" # Enable Bedrock as API provider - ENABLE_CLAUDE_AGENT="true" # Enable Claude Agent feature - - # AWS Credentials (for Claude Agent via Bedrock) - # Choose one method: - # Method 1: Direct credentials - AWS_ACCESS_KEY_ID="" - AWS_SECRET_ACCESS_KEY="" - AWS_REGION="us-east-1" - - # Method 2: AWS Profile (recommended for local development) - # AWS_PROFILE="your-profile-name" - # AWS_REGION="us-east-1" - - # Method 3: IAM Role (automatic in AWS environments like EC2/ECS) - # AWS_REGION="us-east-1" - ``` - - > ๐Ÿ’ก **Tip**: Copy `.env.example` to `.env` and fill in your actual values. - > - > ๐Ÿ“ **AWS Cognito Setup**: For detailed instructions on setting up AWS Cognito authentication, see [COGNITO_SETUP.md](COGNITO_SETUP.md). - > - > ๐Ÿ”‘ **yunwu API**: Get your API key from [yunwu.ai](https://yunwu.ai) to enable AI image generation. - > - > ๐Ÿค– **Claude Agent Setup**: See detailed setup guide below - -### API Keys Configuration - -> ๐Ÿ“– **Detailed Guide**: See [docs/API_CONFIGURATION.md](docs/API_CONFIGURATION.md) for complete setup instructions, testing commands, and troubleshooting. - -#### Quick Reference Table - -| Service | Required | Used For | Get Key From | -|---------|----------|----------|--------------| -| **LLM API** | Required | Text generation | https://platform.openai.com/api-keys (or your provider) | -| **yunwu API** | Optional | Image generation | https://yunwu.ai | -| **AWS Bedrock** | For Chat to Slides | Claude Agent | AWS Console > Bedrock | -| **Tavily API** | Optional | Web search | https://tavily.com | -| **UploadThing** | Required | File storage | https://uploadthing.com | -| **Unsplash API** | Optional | Stock images | https://unsplash.com/developers | - -#### Detailed Configuration - -#### 1. Text Generation (OpenAI Compatible) - -Unified configuration that supports OpenAI and compatible services: - -**Using OpenAI (Default)**: -```env -LLM_API_KEY="sk-..." # Required: Your OpenAI API key -# LLM_BASE_URL="" # Leave empty or omit for OpenAI -# LLM_MODEL_NAME="gpt-4o-mini" # Optional: defaults to gpt-4o-mini -``` - -**Get your key**: https://platform.openai.com/api-keys - -**Using Alternative Providers**: -```env -LLM_API_KEY="sk-..." # Your API key -LLM_BASE_URL="http://localhost:1234/v1" # Custom endpoint -LLM_MODEL_NAME="gpt-4o-mini" # Your model name -``` - -**Popular providers**: -- **OpenAI** (default): Leave `LLM_BASE_URL` empty -- **LM Studio**: `http://localhost:1234/v1` -- **Ollama**: `http://localhost:11434/v1` -- **vLLM**: `http://localhost:8000/v1` -- **ไบ‘้›พ API (yunwu.ai)**: `https://api.xiaomimimo.com/v1` - -**Used for**: All text generation tasks (outline, slides content) - -#### 2. Image Generation API - -```env -YUNWU_API_KEY="sk-..." -``` - -**Get your key**: https://yunwu.ai - -**Used for**: AI-powered slide image generation (Gemini 3 Pro Image) - -#### 3. Claude Agent SDK (Amazon Bedrock) - -For the Chat to Slides feature, configure AWS Bedrock access: - -**Option A: AWS Profile (Recommended for local development)** -```env -CLAUDE_CODE_USE_BEDROCK="1" -ENABLE_CLAUDE_AGENT="true" -AWS_PROFILE="your-profile-name" -AWS_REGION="us-east-1" -``` - -**Option B: Direct Credentials** -```env -CLAUDE_CODE_USE_BEDROCK="1" -ENABLE_CLAUDE_AGENT="true" -AWS_ACCESS_KEY_ID="AKIA..." -AWS_SECRET_ACCESS_KEY="..." -AWS_REGION="us-east-1" -``` - -**Option C: IAM Role (Automatic in AWS environments)** -```env -CLAUDE_CODE_USE_BEDROCK="1" -ENABLE_CLAUDE_AGENT="true" -AWS_REGION="us-east-1" -``` - -**Setup steps**: -1. **Enable model access** in AWS Console: - - Navigate to AWS Console > Amazon Bedrock > Model access - - Request access to Claude models (Sonnet 3.5, Opus, Haiku) - - Wait for approval (usually instant) - -2. **Configure IAM permissions**: - ```json - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "bedrock:InvokeModel", - "bedrock:InvokeModelWithResponseStream" - ], - "Resource": "arn:aws:bedrock:*::foundation-model/anthropic.claude-*" - } - ] - } - ``` - -3. **Verify setup**: - ```bash - # Test AWS credentials - aws sts get-caller-identity - - # Test Bedrock access - aws bedrock list-foundation-models --region us-east-1 - ``` - -**Used for**: Chat to Slides conversational interface, web search, file analysis - -#### 5. Web Search API (Optional) - -```env -TAVILY_API_KEY="tvly-..." -``` +## AWS Deployment -**Get your key**: https://tavily.com +For production deployment to AWS, see [infrastructure/README.md](infrastructure/README.md). -**Used for**: Real-time web search in Chat to Slides +### Quick Deploy -#### 6. File Storage +```bash +cd infrastructure +cp .env.example .env +# Edit .env with your configuration -```env -UPLOADTHING_TOKEN="..." +pnpm install +pnpm deploy ``` -**Get your token**: https://uploadthing.com - -**Used for**: Storing uploaded files and generated images - -### Database Setup - -The project uses Docker to run PostgreSQL locally for development. - -1. **Start the database with Docker** - - ```bash - # Start PostgreSQL in Docker (runs in background) - docker-compose up -d - - # Check database status - docker-compose ps - ``` - - The database will be accessible at `localhost:5432` with credentials: - - Username: `presentation_user` - - Password: `presentation_password` - - Database: `slide_forge` - -2. **Initialize the database schema** - - ```bash - pnpm db:push - ``` - -3. **Stop the database (when needed)** - - ```bash - # Stop database - docker-compose down - - # Stop and remove all data (โš ๏ธ Warning: deletes all data) - docker-compose down -v - ``` - -4. **Start the development server** - - ```bash - pnpm dev - ``` - -1. **Open the application** - - Navigate to [http://localhost:8080](http://localhost:8080) in your browser. - -## ๐Ÿ“– Usage Guide - -### Method 1: Quick Generation (Traditional) - -1. **Configure Settings** (Optional) - - Click **"ๅ›พ็‰‡่ฎพ็ฝฎ"** to configure image aspect ratio and resolution - - Click **"ไธป้ข˜้ฃŽๆ ผ"** to select or customize presentation theme - - Adjust number of slides (1-50) and language - -2. **Input Your Content** - - Choose one of two methods: - - **Method A: Text Input** - - Type or paste your presentation topic directly into the input field - - Press `Ctrl + Enter` or click "Generate Presentation" - - **Method B: File Upload** - - Click **"Upload File"** or drag-and-drop a file (.txt, .md, .docx, .pdf) - - Content will be automatically extracted and filled into the input field - - Review and edit the extracted content if needed - - Click "Generate Presentation" - -3. **Review Outline** - - AI will generate a structured outline with your specified number of slides - - Review the outline structure - - Toggle web search for enhanced content (optional) - - Click "Start Generating Slides" - -4. **Generate Slides** - - AI will generate each slide one by one - - Preview each slide image - - Optionally modify slides with natural language instructions (e.g., "make colors warmer", "add more illustrations") - - Confirm each slide to proceed to the next - - Click "Finish & Save" when done - -5. **Export & Share** - - View your completed presentation - - Export as PDF, PPTX, or individual images - - Share or download for your use - -### Method 2: Conversational AI with Claude Agent (New!) - -1. **Access Claude Agent** - - Click **"Claude Agent"** in the left sidebar (with Beta badge) - - Or navigate to existing Agent sessions in the sidebar +### Cost Estimates -2. **Start a Conversation** - - Click **"Start New Conversation"** from the quick start card - - Or click **"New Session"** button in the top right +| Environment | Monthly Cost | +|-------------|--------------| +| Development | ~$80 | +| Production | ~$180 | -3. **Chat with Claude** - - Describe what presentation you want to create - - Example: "Create a 10-slide presentation about artificial intelligence in healthcare" - - Claude will automatically search the web for current information - - Upload files for Claude to analyze and incorporate +Main costs: Fargate tasks, ALB, NAT Gateway, S3/CloudFront -4. **Iterate and Refine** - - Continue the conversation to refine your slides - - Ask Claude to add more detail, change tone, or reorganize content - - Request specific slide counts or formatting - - All context is preserved throughout the conversation +## API Keys Reference -5. **Manage Sessions** - - Sessions are automatically saved in the sidebar - - Click any session in the sidebar to resume - - Hover and click the delete button to remove sessions - - View all sessions from the Agent main page +| Service | Required | Purpose | +|---------|----------|---------| +| AWS Bedrock | Yes* | Claude AI (via Bedrock) | +| Anthropic API | Yes* | Claude AI (direct) | +| Cognito | Yes | User authentication | +| Tavily | No | Web search enhancement | +| Unsplash | No | Stock images | +| Yunwu | No | AI image generation | -### Image Configuration +*One of AWS Bedrock or Anthropic API is required -Configure image generation parameters via the **"ๅ›พ็‰‡่ฎพ็ฝฎ"** dialog: +## Development -**Aspect Ratios:** +### Frontend (Next.js) -- `16:9` - Widescreen (Recommended for most presentations) -- `4:3` - Standard/Classic -- `1:1` - Square -- `9:16` - Portrait -- `21:9` - Ultra-wide -- And more... - -**Resolutions:** - -- `1K` - Fast generation, suitable for drafts -- `2K` - Balanced quality and speed (Recommended) -- `4K` - Highest quality, slower generation - -### File Upload Tips - -- **Supported formats**: .txt, .md, .docx, .pdf -- **Maximum size**: 16MB per file -- **Best practices**: - - Use well-structured documents for better outline generation - - PDF files should be text-based (not scanned images) - - DOCX files will have text extracted, formatting may be lost - - Review extracted content before generating - -## ๐Ÿ› ๏ธ Tech Stack - -### Core Framework - -- **Next.js 15** - React framework with App Router -- **React 19** - UI library -- **TypeScript** - Type safety -- **Tailwind CSS** - Styling - -### AI & APIs - -- **OpenAI API** - Text generation (outline, content) -- **yunwu API** - Image generation (Gemini 3 Pro Image) -- **Tavily API** - Web search integration -- **Claude Agent SDK** - Conversational AI via Amazon Bedrock -- **Amazon Bedrock** - Claude model access with enterprise security - -### Database & Storage - -- **PostgreSQL 15** - Primary database (runs in Docker for local dev) -- **Docker & Docker Compose** - Container orchestration -- **Prisma** - ORM and schema management -- **UploadThing** - File upload and storage - -### Authentication - -- **NextAuth.js** - Authentication framework -- **AWS Cognito** - Identity provider - -### Document Processing - -- **mammoth** - DOCX text extraction -- **unpdf** - PDF text extraction - -### UI Components - -- **Radix UI** - Accessible component primitives -- **Plate** - Rich text editor -- **Lucide Icons** - Icon library -- **Sonner** - Toast notifications - -## ๐Ÿ“‚ Project Structure - -``` -src/ -โ”œโ”€โ”€ app/ -โ”‚ โ”œโ”€โ”€ _actions/ # Server actions -โ”‚ โ”‚ โ”œโ”€โ”€ image/ # Image generation actions -โ”‚ โ”‚ โ””โ”€โ”€ presentation/ # Presentation CRUD actions -โ”‚ โ”œโ”€โ”€ api/ # API routes -โ”‚ โ”‚ โ”œโ”€โ”€ agent/ # ๐Ÿ†• Claude Agent API endpoints -โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ chat/ # Chat streaming endpoint -โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ session/ # Session management -โ”‚ โ”‚ โ”œโ”€โ”€ parse-file/ # File parsing endpoint -โ”‚ โ”‚ โ”œโ”€โ”€ presentation/ # Presentation endpoints -โ”‚ โ”‚ โ””โ”€โ”€ uploadthing/ # File upload configuration -โ”‚ โ””โ”€โ”€ presentation/ # Presentation pages -โ”‚ โ””โ”€โ”€ agent/ # ๐Ÿ†• Claude Agent pages -โ”‚ โ”œโ”€โ”€ page.tsx # Agent session list -โ”‚ โ””โ”€โ”€ [sessionId]/page.tsx # Agent chat interface -โ”œโ”€โ”€ components/ -โ”‚ โ”œโ”€โ”€ layout/ # Layout components -โ”‚ โ”‚ โ”œโ”€โ”€ GlobalSidebar.tsx # Main sidebar (collapsible) -โ”‚ โ”‚ โ””โ”€โ”€ RecentAgentSessions.tsx # ๐Ÿ†• Agent session history -โ”‚ โ”œโ”€โ”€ presentation/ # Presentation-specific components -โ”‚ โ”‚ โ”œโ”€โ”€ agent/ # ๐Ÿ†• Agent chat components -โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ AgentChat.tsx # Chat interface -โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ MarkdownMessage.tsx # Markdown rendering -โ”‚ โ”‚ โ”œโ”€โ”€ dashboard/ # Dashboard components -โ”‚ โ”‚ โ”œโ”€โ”€ editor/ # Slide editor -โ”‚ โ”‚ โ”œโ”€โ”€ generation/ # Slide generation UI -โ”‚ โ”‚ โ”œโ”€โ”€ outline/ # Outline display -โ”‚ โ”‚ โ””โ”€โ”€ theme/ # Theme customization -โ”‚ โ””โ”€โ”€ ui/ # Reusable UI components -โ”œโ”€โ”€ lib/ -โ”‚ โ”œโ”€โ”€ agent/ # ๐Ÿ†• Claude Agent SDK integration -โ”‚ โ”‚ โ”œโ”€โ”€ agent-service.ts # Agent core service -โ”‚ โ”‚ โ”œโ”€โ”€ session-manager.ts # Session management -โ”‚ โ”‚ โ””โ”€โ”€ types.ts # Agent type definitions -โ”‚ โ”œโ”€โ”€ file-parsers/ # File parsing utilities -โ”‚ โ”œโ”€โ”€ presentation/ # Presentation utilities -โ”‚ โ””โ”€โ”€ model-picker.ts # AI model configuration -โ”œโ”€โ”€ states/ -โ”‚ โ”œโ”€โ”€ agent-state.ts # ๐Ÿ†• Agent state management -โ”‚ โ””โ”€โ”€ presentation-state.ts # Presentation state management -โ”œโ”€โ”€ server/ -โ”‚ โ”œโ”€โ”€ auth.ts # Authentication configuration -โ”‚ โ””โ”€โ”€ db.ts # Database client -โ””โ”€โ”€ types/ # TypeScript type definitions +```bash +cd frontend +pnpm dev # Start development server +pnpm build # Build for production +pnpm lint # Run linter +pnpm typecheck # TypeScript check ``` -## ๐Ÿค Contributing - -Contributions are welcome! Please feel free to submit a Pull Request. - -## ๐Ÿ“„ License - -This project is licensed under the MIT License. - -## ๐Ÿ”ง Additional Features - -### Database Management - -The project uses Docker for easy local database management: +### Infrastructure (CDK) -**Quick Commands:** ```bash -./db-start.sh # Start database -./db-stop.sh # Stop database (interactive menu) +cd infrastructure +npx cdk synth # Synthesize CloudFormation +npx cdk deploy # Deploy to AWS +npx cdk diff # Show pending changes +npx cdk destroy # Tear down resources ``` -**Advanced Commands:** -```bash -# View database logs -docker logs slide-forge-db - -# Follow logs in real-time -docker logs -f slide-forge-db - -# Connect to database -docker exec -it slide-forge-db psql -U presentation_user -d slide_forge - -# Backup database -docker exec slide-forge-db pg_dump -U presentation_user slide_forge > backup.sql +### Backend (AgentCore) -# Restore from backup -cat backup.sql | docker exec -i slide-forge-db psql -U presentation_user -d slide_forge - -# View container status -docker-compose ps - -# Restart database -docker-compose restart postgres +```bash +cd backend +docker build -t slide-forge-agentcore . +# Push to ECR after CDK creates the repository ``` -**Troubleshooting:** -- If database won't start: `docker-compose down && docker-compose up -d postgres` -- If port 5432 is in use: Stop other PostgreSQL instances -- To reset database: `./db-stop.sh` โ†’ choose option 2 (delete all data) - -### Sidebar Features +## Tech Stack -- **Collapsible Sidebar**: Click the collapse button (โ—€๏ธ/โ–ถ๏ธ) in the bottom left to expand/collapse -- **Theme Toggle**: Switch between light and dark mode with the moon/sun button -- **Quick Access**: Recent presentations and Agent sessions are displayed in the sidebar -- **Hover Actions**: Hover over items to reveal delete buttons +- **Frontend**: Next.js 15, React 19, TypeScript, Tailwind CSS +- **Backend**: Python, FastAPI, Strands Agent SDK +- **AI**: AWS Bedrock, Claude (Sonnet/Opus/Haiku) +- **Auth**: Amazon Cognito, NextAuth.js +- **Infrastructure**: AWS CDK, ECS Fargate, CloudFront, S3 +- **Storage**: S3 (serverless, no database) -### Claude Agent Features +## Security -- **Long-running Sessions**: Agents maintain context across multiple messages -- **Tool Integration**: Automatic web search, file reading, and content analysis -- **Markdown Support**: Rich text formatting in AI responses (code blocks, tables, lists, etc.) -- **Enterprise Security**: Uses Amazon Bedrock for data privacy and compliance +- All traffic encrypted (TLS 1.2+) +- S3 buckets use KMS encryption with annual key rotation +- Private ALB accessible only via CloudFront VPC Origin +- Cognito Advanced Security Mode enabled +- Invite-only user registration (no self-signup) +- JWT tokens with automatic refresh -## ๐Ÿ™ Acknowledgments +## License -- yunwu.ai for providing the Gemini 3 Pro Image API -- OpenAI for language model capabilities -- Anthropic for Claude Agent SDK and Amazon Bedrock integration -- All open-source libraries that made this project possible +MIT License - see [LICENSE](LICENSE) for details. diff --git a/frontend/src/app/api/agent/chat/route.ts b/frontend/src/app/api/agent/chat/route.ts index d65ac8c..631429e 100644 --- a/frontend/src/app/api/agent/chat/route.ts +++ b/frontend/src/app/api/agent/chat/route.ts @@ -21,7 +21,7 @@ import { sessionManager } from "@/lib/agent/session-manager"; import { NextResponse } from "next/server"; import { type ChatRequest, type Message } from "@/lib/agent/types"; import { extractSlidesFromMessages } from "@/lib/agent/utils/extract-slides"; -import { createAgentCoreClient } from "@/lib/agent/agentcore-client"; +import { createAgentCoreClient, AgentCoreAuthError } from "@/lib/agent/agentcore-client"; import { env } from "@/env"; // Configure route timeout for long-running agent operations @@ -444,10 +444,20 @@ async function handleAgentCoreChat( controller.close(); } catch (error) { console.error("[Agent Chat] Error in AgentCore stream:", error); - sendSSE("error", { - content: - error instanceof Error ? error.message : "Failed to process request", - }); + + // Handle authentication errors specially + if (error instanceof AgentCoreAuthError) { + sendSSE("error", { + content: "Your session has expired. Please sign in again.", + code: "AUTH_ERROR", + requiresReauth: true, + }); + } else { + sendSSE("error", { + content: + error instanceof Error ? error.message : "Failed to process request", + }); + } if (heartbeatInterval) clearInterval(heartbeatInterval); controller.close(); @@ -883,7 +893,20 @@ export async function POST(req: Request) { ); } - // 3. Route to appropriate handler + // 3. Check for authentication errors (token refresh failed) + if (session.error === "RefreshTokenError") { + console.error("[Agent Chat] Session has RefreshTokenError, user must re-authenticate"); + return NextResponse.json( + { + error: "Session expired", + code: "SESSION_EXPIRED", + message: "Your session has expired. Please sign in again.", + }, + { status: 401 } + ); + } + + // 4. Route to appropriate handler if (isAgentCoreEnabled()) { console.log("[Agent Chat] Using AgentCore Runtime backend"); @@ -892,7 +915,15 @@ export async function POST(req: Request) { // which is present in access token but not in id token const accessToken = session.accessToken; if (!accessToken) { - console.warn("[Agent Chat] No access token available, user may need to re-authenticate"); + console.error("[Agent Chat] No access token available"); + return NextResponse.json( + { + error: "Authentication required", + code: "NO_TOKEN", + message: "No authentication token available. Please sign in again.", + }, + { status: 401 } + ); } return handleAgentCoreChat(req, session.user.id, chatRequest, accessToken); diff --git a/frontend/src/lib/agent/agentcore-client.ts b/frontend/src/lib/agent/agentcore-client.ts index ac5ba4c..21c32f8 100644 --- a/frontend/src/lib/agent/agentcore-client.ts +++ b/frontend/src/lib/agent/agentcore-client.ts @@ -17,6 +17,16 @@ import { env } from "@/env"; +/** + * Error types for AgentCore client + */ +export class AgentCoreAuthError extends Error { + constructor(message: string, public statusCode: number = 401) { + super(message); + this.name = "AgentCoreAuthError"; + } +} + /** * Event types emitted by AgentCore streaming responses */ @@ -170,13 +180,19 @@ export class AgentCoreClient { const invocationRequest: InvocationRequest = { path: "/sessions", method: "POST", - payload: request, + payload: request as unknown as Record, }; const response = await this.invoke(invocationRequest); if (!response.ok) { const errorText = await response.text(); + if (response.status === 401 || response.status === 403) { + throw new AgentCoreAuthError( + `Authentication failed: ${response.status} - ${errorText}`, + response.status + ); + } throw new Error( `Failed to create session: ${response.status} ${response.statusText} - ${errorText}` ); @@ -214,7 +230,7 @@ export class AgentCoreClient { const invocationRequest: InvocationRequest = { path: `/sessions/${sessionId}/messages`, method: "POST", - payload: request as Record, + payload: request as unknown as Record, path_params: { session_id: sessionId }, }; @@ -222,6 +238,12 @@ export class AgentCoreClient { if (!response.ok) { const errorText = await response.text(); + if (response.status === 401 || response.status === 403) { + throw new AgentCoreAuthError( + `Authentication failed: ${response.status} - ${errorText}`, + response.status + ); + } throw new Error( `Failed to send message: ${response.status} ${response.statusText} - ${errorText}` ); @@ -253,7 +275,7 @@ export class AgentCoreClient { const invocationRequest: InvocationRequest = { path: `/sessions/${sessionId}/messages/stream`, method: "POST", - payload: request as Record, + payload: request as unknown as Record, path_params: { session_id: sessionId }, }; @@ -280,6 +302,12 @@ export class AgentCoreClient { if (!response.ok) { const errorText = await response.text(); + if (response.status === 401 || response.status === 403) { + throw new AgentCoreAuthError( + `Authentication failed: ${response.status} - ${errorText}`, + response.status + ); + } throw new Error( `Failed to stream message: ${response.status} ${response.statusText} - ${errorText}` ); diff --git a/frontend/src/server/auth.ts b/frontend/src/server/auth.ts index 0ac0255..bb78438 100644 --- a/frontend/src/server/auth.ts +++ b/frontend/src/server/auth.ts @@ -8,6 +8,7 @@ import { } from "@/services/s3/user-service"; import NextAuth, { type DefaultSession, type Session } from "next-auth"; import CognitoProvider from "next-auth/providers/cognito"; +import type { JWT } from "next-auth/jwt"; declare module "next-auth" { interface Session extends DefaultSession { @@ -35,7 +36,103 @@ declare module "next-auth/jwt" { idToken?: string; refreshToken?: string; expiresAt?: number; - error?: string; + error?: "RefreshTokenError" | "TokenExpired"; + } +} + +// Cache for the token endpoint URL +let cachedTokenEndpoint: string | null = null; + +/** + * Get the Cognito token endpoint by fetching the OIDC discovery document + */ +async function getTokenEndpoint(): Promise { + if (cachedTokenEndpoint) { + return cachedTokenEndpoint; + } + + try { + const discoveryUrl = `${env.COGNITO_ISSUER}/.well-known/openid-configuration`; + console.log("[Auth] Fetching OIDC discovery from:", discoveryUrl); + + const response = await fetch(discoveryUrl); + if (!response.ok) { + console.error("[Auth] Failed to fetch OIDC discovery:", response.status); + return null; + } + + const config = await response.json() as { token_endpoint?: string }; + cachedTokenEndpoint = config.token_endpoint ?? null; + + console.log("[Auth] Token endpoint discovered:", cachedTokenEndpoint); + return cachedTokenEndpoint; + } catch (error) { + console.error("[Auth] Error fetching OIDC discovery:", error); + return null; + } +} + +/** + * Refresh Cognito tokens using the refresh_token grant + * + * Uses the OIDC discovery document to find the token endpoint, + * which handles different Cognito configurations (hosted domain, custom domain, etc.) + * + * @param refreshToken - The Cognito refresh token + * @returns New token set or null if refresh failed + */ +async function refreshCognitoTokens(refreshToken: string): Promise<{ + access_token: string; + id_token: string; + expires_at: number; +} | null> { + try { + const tokenEndpoint = await getTokenEndpoint(); + + if (!tokenEndpoint) { + console.error("[Auth] Could not determine token endpoint"); + return null; + } + + console.log("[Auth] Refreshing Cognito tokens..."); + + const response = await fetch(tokenEndpoint, { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + body: new URLSearchParams({ + grant_type: "refresh_token", + client_id: env.COGNITO_CLIENT_ID, + client_secret: env.COGNITO_CLIENT_SECRET, + refresh_token: refreshToken, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + console.error("[Auth] Token refresh failed:", response.status, errorText); + return null; + } + + const tokens = await response.json() as { + access_token: string; + id_token: string; + expires_in: number; + token_type: string; + }; + + console.log("[Auth] Token refresh successful"); + + return { + access_token: tokens.access_token, + id_token: tokens.id_token, + // expires_in is in seconds, convert to Unix timestamp + expires_at: Math.floor(Date.now() / 1000) + tokens.expires_in, + }; + } catch (error) { + console.error("[Auth] Token refresh error:", error); + return null; } } @@ -96,10 +193,31 @@ export const { auth, handlers, signIn, signOut } = NextAuth({ } } - // Check if token is expired - if (token.expiresAt && Date.now() >= (token.expiresAt as number) * 1000) { - // Token expired - mark for refresh - // Note: Cognito tokens can be refreshed using refresh_token + // Check if token needs refresh (refresh 60 seconds before expiry for safety) + const tokenExpiresAt = token.expiresAt as number | undefined; + const bufferSeconds = 60; + const shouldRefresh = tokenExpiresAt && Date.now() >= (tokenExpiresAt - bufferSeconds) * 1000; + + if (shouldRefresh && token.refreshToken) { + console.log("[Auth] Access token expired or expiring soon, attempting refresh..."); + + const refreshedTokens = await refreshCognitoTokens(token.refreshToken as string); + + if (refreshedTokens) { + // Update token with new values + token.accessToken = refreshedTokens.access_token; + token.idToken = refreshedTokens.id_token; + token.expiresAt = refreshedTokens.expires_at; + token.error = undefined; // Clear any previous error + console.log("[Auth] Token refreshed successfully, new expiry:", new Date(refreshedTokens.expires_at * 1000).toISOString()); + } else { + // Refresh failed - user needs to re-authenticate + console.error("[Auth] Token refresh failed, user must re-authenticate"); + token.error = "RefreshTokenError"; + } + } else if (shouldRefresh && !token.refreshToken) { + // No refresh token available + console.error("[Auth] Token expired but no refresh token available"); token.error = "TokenExpired"; } diff --git a/infrastructure/README.md b/infrastructure/README.md index ca7d3bb..b5a1d14 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -1,318 +1,258 @@ # Slide-Forge AWS Infrastructure -AWS CDK infrastructure for deploying Slide-Forge to AWS with a hybrid architecture: -- **Static assets**: S3 + CloudFront -- **API routes**: ECS EC2 + ALB -- **Data storage**: S3 (serverless, no database required) -- **AI Services**: AWS Bedrock + Claude Agent SDK + OpenAI API -- **Authentication**: Amazon Cognito +AWS CDK infrastructure for deploying Slide-Forge with a modern serverless architecture: -## โšก ๅฟซ้€Ÿ้ƒจ็ฝฒ๏ผˆๆ–ฐ๏ผ‰ +- **Frontend**: ECS Fargate (Stateless Next.js) +- **Backend**: AWS Bedrock AgentCore Runtime (Strands Agent + Claude) +- **CDN**: CloudFront with VPC Origin +- **Auth**: Amazon Cognito (OIDC + JWT) +- **Storage**: S3 (serverless, no database required) -### ไฝฟ็”จ็Žฏๅขƒๅ˜้‡้ƒจ็ฝฒ +## Architecture Overview -```bash -# 1. ้…็ฝฎ็Žฏๅขƒๅ˜้‡ -cp .env.example .env -nano .env # ๅกซๅ…ฅไฝ ็š„ API keys - -# 2. ้ƒจ็ฝฒ -pnpm install -pnpm deploy - -# CDK ไผš่‡ชๅŠจ่ฏปๅ– .env ๅนถ้…็ฝฎๆ‰€ๆœ‰ๆœๅŠก ``` - -**ๆœ€็ฎ€้…็ฝฎ** - ๅช้œ€่ฆไปฅไธ‹ไน‹ไธ€: -```bash -# ไฝฟ็”จ AWS Bedrock -CLAUDE_CODE_USE_BEDROCK=1 -AWS_REGION=us-east-1 - -# ๆˆ–ไฝฟ็”จ Anthropic API -ANTHROPIC_API_KEY=sk-ant-api03-... -AWS_REGION=us-east-1 + Internet (HTTPS) + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ CloudFront โ”‚ + โ”‚ (CDN + Caching) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ โ”‚ โ”‚ + โ–ผ โ–ผ โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ S3 Static โ”‚ โ”‚ Private ALB โ”‚ โ”‚ Cognito โ”‚ + โ”‚ Bucket โ”‚ โ”‚ (VPC Origin)โ”‚ โ”‚ User Pool โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ ECS Fargate โ”‚ + โ”‚ (Next.js 1-4) โ”‚ + โ”‚ Stateless โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ โ”‚ โ”‚ + โ–ผ โ–ผ โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ AgentCore โ”‚ โ”‚ S3 Uploads โ”‚ โ”‚ Secrets โ”‚ + โ”‚ Runtime โ”‚ โ”‚ (Sessions) โ”‚ โ”‚ Manager โ”‚ + โ”‚ (Claude) โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` -่ฏฆ่ง: [QUICK_DEPLOY.md](./QUICK_DEPLOY.md) +## Key Features -## ๐Ÿ“ Project Structure +- **Stateless Frontend**: Fargate tasks auto-scale 1-4, no sticky sessions needed +- **AI-Powered Backend**: Bedrock AgentCore manages Claude agent sessions +- **JWT Auth with Auto-Refresh**: Cognito tokens refresh automatically before expiry +- **No Database**: All data stored in S3 with KMS encryption +- **SSE Streaming**: 300s ALB timeout for long-running agent responses -``` -infrastructure/ -โ”œโ”€โ”€ bin/ -โ”‚ โ””โ”€โ”€ slide-forge.ts # CDK app entry point -โ”œโ”€โ”€ lib/ -โ”‚ โ”œโ”€โ”€ slide-forge-stack.ts # Main stack -โ”‚ โ”œโ”€โ”€ network/vpc.ts # VPC with 3 AZs -โ”‚ โ”œโ”€โ”€ compute/ecs-nextjs-service.ts # ECS EC2 service -โ”‚ โ”œโ”€โ”€ storage/ -โ”‚ โ”‚ โ””โ”€โ”€ s3-buckets.ts # Static, uploads, logs buckets -โ”‚ โ”œโ”€โ”€ auth/ -โ”‚ โ”‚ โ”œโ”€โ”€ cognito.ts # Cognito User Pool -โ”‚ โ”‚ โ”œโ”€โ”€ admin-user-creator.ts -โ”‚ โ”‚ โ””โ”€โ”€ agent-sdk-role.ts -โ”‚ โ”œโ”€โ”€ cdn/cloudfront.ts # CloudFront distribution -โ”‚ โ””โ”€โ”€ common/constants.ts # Shared configuration -โ”œโ”€โ”€ docker/ -โ”‚ โ”œโ”€โ”€ Dockerfile.nextjs # Production Docker image -โ”‚ โ””โ”€โ”€ .dockerignore -โ”œโ”€โ”€ config/ -โ”‚ โ”œโ”€โ”€ dev.json # Development config -โ”‚ โ””โ”€โ”€ prod.json # Production config -โ”œโ”€โ”€ cdk.json -โ”œโ”€โ”€ package.json -โ””โ”€โ”€ tsconfig.json -``` - -## ๐Ÿš€ Quick Start +## Quick Start ### Prerequisites -1. **AWS CLI** configured with credentials -2. **Node.js** 20+ and npm/pnpm -3. **Docker** for building images -4. **AWS CDK** CLI: `npm install -g aws-cdk` +- AWS CLI configured with credentials +- Node.js 20+ and pnpm +- Docker (for building images) +- AWS CDK CLI: `npm install -g aws-cdk` -### Installation +### 1. Configure Environment ```bash cd infrastructure -npm install +cp .env.example .env +# Edit .env with your configuration ``` -### Bootstrap CDK (first time only) - +**Minimum required:** ```bash -# Bootstrap for your AWS account/region -cdk bootstrap aws://ACCOUNT_ID/REGION +# Cognito Admin Email (receives initial password) +COGNITO_ADMIN_EMAIL=your-email@example.com -# Example -cdk bootstrap aws://123456789012/us-east-1 +# AI Configuration (choose one) +CLAUDE_CODE_USE_BEDROCK=1 # Use AWS Bedrock (recommended) +# OR +ANTHROPIC_API_KEY=sk-ant-... # Use Anthropic API directly ``` -## ๐Ÿ“ฆ Deployment Steps - -### 1. Create Required Secrets - -Before deploying, create these secrets in AWS Secrets Manager: +### 2. Bootstrap CDK (first time only) ```bash -# OpenAI API Key (or other LLM provider) -aws secretsmanager create-secret \ - --name slide-forge-development/openai-api-key \ - --secret-string "sk-..." - -# Yunwu API Key (for image generation) -aws secretsmanager create-secret \ - --name slide-forge-development/yunwu-api-key \ - --secret-string "sk-..." - -# Tavily API Key (for search) -aws secretsmanager create-secret \ - --name slide-forge-development/tavily-api-key \ - --secret-string "tvly-..." - -# UploadThing Token -aws secretsmanager create-secret \ - --name slide-forge-development/uploadthing-token \ - --secret-string "sk_live_..." +cdk bootstrap aws://ACCOUNT_ID/REGION ``` -### 2. Deploy Infrastructure +### 3. Deploy Infrastructure ```bash -# Development environment -npm run deploy +pnpm install +pnpm deploy +``` -# Or with explicit environment -cdk deploy --context environment=development +### 4. Build and Push AgentCore Container -# Production environment -cdk deploy --context environment=production +After deployment, build and push the AgentCore container: + +```bash +# Get ECR repository URI from CDK output +export ECR_REPO="123456789012.dkr.ecr.us-east-1.amazonaws.com/slide-forge-agentcore" + +# Build and push +cd ../backend +docker build -t $ECR_REPO:latest . +aws ecr get-login-password | docker login --username AWS --password-stdin $ECR_REPO +docker push $ECR_REPO:latest ``` -### 3. Build and Upload Static Assets +### 5. Upload Static Assets ```bash -# Build Next.js application (from frontend directory) cd ../frontend pnpm build # Get bucket name from CDK output export STATIC_BUCKET="slide-forge-development-static-..." - -# Upload static assets aws s3 sync .next/static s3://$STATIC_BUCKET/_next/static aws s3 sync public s3://$STATIC_BUCKET/public ``` -### 4. Invalidate CloudFront Cache - -```bash -# Get distribution ID from CDK output -export DISTRIBUTION_ID="E123456789ABCD" - -# Invalidate cache -aws cloudfront create-invalidation \ - --distribution-id $DISTRIBUTION_ID \ - --paths "/*" -``` - -### 5. Access Your Application +### 6. Access Application The CloudFront URL will be in the CDK output: ``` https://d1234567890abc.cloudfront.net ``` -## ๐Ÿ”ง Configuration - -### Environment Variables in ECS +## Project Structure -The following environment variables are configured in `lib/compute/ecs-nextjs-service.ts`: +``` +infrastructure/ +โ”œโ”€โ”€ bin/ +โ”‚ โ””โ”€โ”€ slide-forge.ts # CDK app entry point +โ”œโ”€โ”€ lib/ +โ”‚ โ”œโ”€โ”€ slide-forge-stack.ts # Main stack orchestration +โ”‚ โ”œโ”€โ”€ network/ +โ”‚ โ”‚ โ””โ”€โ”€ vpc.ts # VPC with 3 AZs +โ”‚ โ”œโ”€โ”€ compute/ +โ”‚ โ”‚ โ”œโ”€โ”€ fargate-nextjs-service.ts # Stateless Fargate frontend +โ”‚ โ”‚ โ””โ”€โ”€ agentcore-construct.ts # Bedrock AgentCore Runtime +โ”‚ โ”œโ”€โ”€ storage/ +โ”‚ โ”‚ โ””โ”€โ”€ s3-buckets.ts # Static, uploads, logs buckets +โ”‚ โ”œโ”€โ”€ auth/ +โ”‚ โ”‚ โ”œโ”€โ”€ cognito.ts # Cognito User Pool + Client +โ”‚ โ”‚ โ””โ”€โ”€ admin-user-creator.ts # Initial admin user +โ”‚ โ”œโ”€โ”€ cdn/ +โ”‚ โ”‚ โ””โ”€โ”€ cloudfront.ts # CloudFront with VPC Origin +โ”‚ โ””โ”€โ”€ common/ +โ”‚ โ””โ”€โ”€ constants.ts # Shared configuration +โ”œโ”€โ”€ config/ +โ”‚ โ””โ”€โ”€ env-config.ts # Environment variable loader +โ””โ”€โ”€ cdk.json +``` -**From Secrets Manager** (secure): -- `NEXTAUTH_SECRET` - NextAuth.js secret -- `COGNITO_CLIENT_SECRET` - Cognito OAuth client secret -- `LLM_API_KEY` - OpenAI/compatible API key (optional) -- `UPLOADTHING_TOKEN` - File upload service token (optional) -- `TAVILY_API_KEY` - Search API key (optional) +## Configuration -**Note**: Data storage uses S3 instead of a database. All presentation data, user profiles, and sessions are stored in the uploads S3 bucket. +### Environment Variables -**Environment Variables**: -- `NODE_ENV=production` -- `AWS_REGION` - Automatically set -- `CLAUDE_CODE_USE_BEDROCK=1` - Enable Bedrock -- `UPLOADS_BUCKET` - S3 bucket name +| Variable | Required | Description | +|----------|----------|-------------| +| `COGNITO_ADMIN_EMAIL` | Yes | Admin user email (receives login credentials) | +| `CLAUDE_CODE_USE_BEDROCK` | No | Set to `1` to use AWS Bedrock | +| `ANTHROPIC_API_KEY` | No | Anthropic API key (if not using Bedrock) | +| `TAVILY_API_KEY` | No | Tavily API key for web search | +| `UNSPLASH_ACCESS_KEY` | No | Unsplash API key for images | -### Customizing Configuration +### Token Validity -Edit `config/dev.json` or `config/prod.json`: +Cognito token lifetimes (configured in CDK): +- **Access Token**: 1 day +- **ID Token**: 1 day +- **Refresh Token**: 30 days -```json -{ - "environment": "development", - "vpc": { - "maxAzs": 3, - "natGateways": 1 - }, - "ecs": { - "cpu": 1024, - "memory": 2048, - "desiredCount": 1 - } -} -``` +Tokens refresh automatically 60 seconds before expiry. -## ๐Ÿ’ฐ Cost Optimization +## Cost Estimates -### Development Environment (~$50/month) -- ECS EC2 (t3.large): ~$30 +### Development (~$80/month) +- Fargate (1 task): ~$30 - ALB: ~$20 - NAT Gateway (1): ~$33 -- CloudFront (100GB): ~$10 -- S3 (data storage): ~$5 -- Other: ~$7 +- S3 + CloudFront: ~$15 +- AgentCore: Pay per invocation -### Production Environment (~$150/month) -- ECS EC2 (larger instance or multiple): ~$60 +### Production (~$180/month) +- Fargate (2-4 tasks): ~$60-120 - ALB: ~$20 -- NAT Gateway (2, HA): ~$64 -- CloudFront (500GB): ~$50 -- S3 (data storage): ~$10 -- Other: ~$20 +- NAT Gateway (2, HA): ~$66 +- S3 + CloudFront: ~$30 +- AgentCore: Pay per invocation -### Cost Saving Tips -1. Use S3 for data storage (no database costs) -2. Use S3 lifecycle policies for old data -3. CloudFront Price Class 100 -4. VPC Endpoints (reduce NAT costs) -5. Spot Instances for non-production - -## ๐Ÿ› ๏ธ Common Operations +## Common Operations ### View Logs ```bash -# ECS service logs -aws logs tail /ecs/slide-forge-development --follow +# Fargate service logs +aws logs tail /ecs/slide-forge-development-fargate --follow -# ALB access logs -aws s3 ls s3://slide-forge-development-logs-.../alb-access-logs/ +# AgentCore runtime logs +aws logs tail /aws/bedrock-agentcore/runtimes/slide-forge-agent --follow ``` -### Update ECS Service +### Force Deployment ```bash -# Force new deployment (pulls latest image) aws ecs update-service \ - --cluster slide-forge-development-cluster \ - --service slide-forge-development-service \ + --cluster slide-forge-development-fargate-cluster \ + --service slide-forge-development-fargate-service \ --force-new-deployment ``` -### Scale ECS Service +### Scale Service ```bash -# Manual scaling aws ecs update-service \ - --cluster slide-forge-development-cluster \ - --service slide-forge-development-service \ + --cluster slide-forge-development-fargate-cluster \ + --service slide-forge-development-fargate-service \ --desired-count 4 ``` -## ๐Ÿ” Troubleshooting - -### ECS Tasks Not Starting - -1. Check CloudWatch Logs: `/ecs/slide-forge-development` -2. Verify health check endpoint: `/api/health` -3. Check security group rules -4. Verify secrets exist in Secrets Manager -5. Check S3 bucket permissions +## Troubleshooting -### CloudFront 404 Errors +### Fargate Tasks Not Starting +1. Check CloudWatch Logs: `/ecs/slide-forge-development-fargate` +2. Verify health check: `GET /api/health` +3. Check Secrets Manager access +4. Verify ECR image exists -1. Verify static assets uploaded to S3 -2. Check CloudFront origin configuration -3. Wait 5-10 minutes for distribution deployment +### AgentCore Auth Errors +1. Verify Cognito tokens are valid +2. Check AgentCore runtime status in AWS Console +3. Review CloudWatch logs for JWT validation errors -### S3 Data Access Issues +### SSE Streaming Issues +1. ALB idle timeout is 300s (configured) +2. CloudFront origin timeout is 180s (max for VPC origin) +3. Check for proxy buffering issues -1. Verify ECS task role has S3 permissions -2. Check KMS key permissions for encrypted buckets -3. Verify bucket name in environment variables (UPLOADS_BUCKET) - -## ๐Ÿ—‘๏ธ Cleanup - -To delete all resources: +## Cleanup ```bash -npm run destroy +# Destroy all resources +pnpm destroy # Or cdk destroy --all ``` -**Note**: Some resources like S3 buckets and log groups may need manual cleanup. - -## ๐Ÿ“š Additional Resources - -- [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/) -- [Next.js Deployment](https://nextjs.org/docs/deployment) -- [S3 Best Practices](https://docs.aws.amazon.com/AmazonS3/latest/userguide/best-practices-for-s3.html) -- [CloudFront Best Practices](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/best-practices.html) -- [Amazon Cognito](https://docs.aws.amazon.com/cognito/) - -## ๐Ÿค Support - -For issues or questions: -1. Check CloudWatch Logs -2. Review CDK synthesis output: `cdk synth` -3. Validate configuration: `cdk diff` +**Note**: S3 buckets with data may need manual cleanup. ---- +## Security -**ๆžถๆž„ๅ‚่€ƒ**: ๅŸบไบŽ `resource/customer-due-diligence` ้ชŒ่ฏ็š„็”Ÿไบง็บงๆžถๆž„ๆจกๅผ +- All traffic encrypted (TLS 1.2+) +- S3 buckets use KMS encryption with key rotation +- Private ALB accessible only via CloudFront VPC Origin +- Cognito Advanced Security Mode enabled +- No self-signup (invite-only users) diff --git a/infrastructure/lib/compute/agentcore-construct.ts b/infrastructure/lib/compute/agentcore-construct.ts index 733107e..eb5553b 100644 --- a/infrastructure/lib/compute/agentcore-construct.ts +++ b/infrastructure/lib/compute/agentcore-construct.ts @@ -59,6 +59,12 @@ export interface AgentCoreConstructProps { * @default PUBLIC */ networkMode?: 'PUBLIC' | 'PRIVATE'; + + /** + * Skip runtime creation (for initial deployment when image doesn't exist yet) + * @default false + */ + skipRuntimeCreation?: boolean; } export class AgentCoreConstruct extends Construct { @@ -280,16 +286,13 @@ export class AgentCoreConstruct extends Construct { logRetention: logs.RetentionDays.ONE_WEEK, }); - // Grant Lambda permissions to manage AgentCore + // Grant Lambda permissions to manage AgentCore (full access to all AgentCore operations) agentCoreManagerFunction.addToRolePolicy( new iam.PolicyStatement({ effect: iam.Effect.ALLOW, actions: [ - 'bedrock-agentcore-control:CreateAgentRuntime', - 'bedrock-agentcore-control:UpdateAgentRuntime', - 'bedrock-agentcore-control:DeleteAgentRuntime', - 'bedrock-agentcore-control:GetAgentRuntime', - 'bedrock-agentcore-control:ListAgentRuntimes', + 'bedrock-agentcore:*', + 'bedrock-agentcore-control:*', ], resources: ['*'], }) @@ -316,45 +319,54 @@ export class AgentCoreConstruct extends Construct { // Construct discovery URL for Cognito const cognitoDiscoveryUrl = `https://cognito-idp.${region}.amazonaws.com/${props.cognitoUserPoolId}/.well-known/openid-configuration`; - // Build environment variables string - const envVarsArray: string[] = [`AWS_DEFAULT_REGION=${region}`]; + // Build environment variables as a dictionary (JSON string for Lambda) + const envVarsDict: Record = { + AWS_DEFAULT_REGION: region, + }; if (props.environmentVariables) { Object.entries(props.environmentVariables).forEach(([key, value]) => { if (value) { - envVarsArray.push(`${key}=${value}`); + envVarsDict[key] = value; } }); } - envVarsArray.push(`S3_WORKSPACE_BUCKET=${props.workspaceBucket.bucketName}`); + envVarsDict['S3_WORKSPACE_BUCKET'] = props.workspaceBucket.bucketName; // Determine Docker image URI const dockerImageUri = props.dockerImageUri || `${this.ecrRepository.repositoryUri}:latest`; // ========================================================================= - // 5. AgentCore Runtime Custom Resource + // 5. AgentCore Runtime Custom Resource (conditionally created) // ========================================================================= - const agentCoreRuntime = new cdk.CustomResource(this, 'AgentCoreRuntime', { - serviceToken: provider.serviceToken, - properties: { - RuntimeName: props.runtimeName, - DockerImageUri: dockerImageUri, - RoleArn: this.runtimeRole.roleArn, - NetworkMode: props.networkMode || 'PUBLIC', - EnvironmentVariables: envVarsArray.join(','), - CognitoDiscoveryUrl: cognitoDiscoveryUrl, - CognitoClientId: props.cognitoClientId, - // Add timestamp to force update when properties change - Timestamp: Date.now().toString(), - }, - }); + if (props.skipRuntimeCreation) { + // Skip runtime creation - just set placeholder values + // User should push image to ECR and deploy again with skipRuntimeCreation=false + this.runtimeArn = 'PENDING_IMAGE_PUSH'; + this.runtimeUrl = 'PENDING_IMAGE_PUSH'; + } else { + const agentCoreRuntime = new cdk.CustomResource(this, 'AgentCoreRuntime', { + serviceToken: provider.serviceToken, + properties: { + RuntimeName: props.runtimeName, + DockerImageUri: dockerImageUri, + RoleArn: this.runtimeRole.roleArn, + NetworkMode: props.networkMode || 'PUBLIC', + EnvironmentVariables: JSON.stringify(envVarsDict), + CognitoDiscoveryUrl: cognitoDiscoveryUrl, + CognitoClientId: props.cognitoClientId, + // Add timestamp to force update when properties change + Timestamp: Date.now().toString(), + }, + }); - // Ensure runtime is created after the role - agentCoreRuntime.node.addDependency(this.runtimeRole); + // Ensure runtime is created after the role + agentCoreRuntime.node.addDependency(this.runtimeRole); - // Store runtime outputs - this.runtimeArn = agentCoreRuntime.getAttString('RuntimeArn'); - this.runtimeUrl = agentCoreRuntime.getAttString('RuntimeUrl'); + // Store runtime outputs + this.runtimeArn = agentCoreRuntime.getAttString('RuntimeArn'); + this.runtimeUrl = agentCoreRuntime.getAttString('RuntimeUrl'); + } // ========================================================================= // Outputs @@ -413,7 +425,9 @@ def handler(event, context): docker_image_uri = properties['DockerImageUri'] role_arn = properties['RoleArn'] network_mode = properties.get('NetworkMode', 'PUBLIC') - env_vars = properties.get('EnvironmentVariables', '') + env_vars_str = properties.get('EnvironmentVariables', '{}') + # Parse environment variables from JSON string to dict + env_vars = json.loads(env_vars_str) if env_vars_str else {} cognito_discovery_url = properties['CognitoDiscoveryUrl'] cognito_client_id = properties['CognitoClientId'] @@ -464,9 +478,6 @@ def create_runtime(client, runtime_name, docker_image_uri, role_arn, network_mod 'networkMode': network_mode }, roleArn=role_arn, - requestHeaderConfiguration={ - 'requestHeaderAllowlist': ['Authorization'] - }, environmentVariables=env_vars, authorizerConfiguration={ 'customJWTAuthorizer': { @@ -505,9 +516,6 @@ def update_runtime(client, physical_resource_id, runtime_name, docker_image_uri, 'networkMode': network_mode }, roleArn=role_arn, - requestHeaderConfiguration={ - 'requestHeaderAllowlist': ['Authorization'] - }, environmentVariables=env_vars, authorizerConfiguration={ 'customJWTAuthorizer': { diff --git a/infrastructure/lib/slide-forge-stack.ts b/infrastructure/lib/slide-forge-stack.ts index b1c35c4..d452be4 100644 --- a/infrastructure/lib/slide-forge-stack.ts +++ b/infrastructure/lib/slide-forge-stack.ts @@ -81,9 +81,14 @@ export class SlideForgeStack extends cdk.Stack { // 4. Create AgentCore Backend (for AI agent processing) // AgentCore runs the Strands-based agent in Bedrock AgentCore Runtime + // Note: Runtime name must match pattern [a-zA-Z][a-zA-Z0-9_]{0,47} + // Set SKIP_AGENTCORE_RUNTIME=true to deploy infrastructure first without runtime + const skipRuntimeCreation = process.env.SKIP_AGENTCORE_RUNTIME === 'true'; + const runtimeName = stackName.replace(/-/g, '_').slice(0, 40) + '_agent'; const agentCoreConstruct = new AgentCoreConstruct(this, 'AgentCore', { stackName, - runtimeName: `${stackName}-agent-runtime`, + runtimeName, + skipRuntimeCreation, workspaceBucket: s3Construct.uploadsBucket, cognitoUserPoolId: cognitoConstruct.userPool.userPoolId, cognitoClientId: cognitoConstruct.oidc.clientId, From b0bfd2c65c75ecb4b4e29032757b82ed8b01148c Mon Sep 17 00:00:00 2001 From: ianleely Date: Thu, 5 Feb 2026 11:22:15 +0000 Subject: [PATCH 3/3] ci: add automatic release workflow on tag push Automatically creates GitHub releases when tags matching 'v*' are pushed. Includes changelog generation from commits since previous tag. Co-Authored-By: Claude Opus 4.5 --- .github/workflows/release.yml | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a33f78d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,47 @@ +name: Release + +on: + push: + tags: + - 'v*' + +permissions: + contents: write + +jobs: + release: + name: Create Release + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get previous tag + id: prev_tag + run: | + PREV_TAG=$(git tag --sort=-v:refname | head -2 | tail -1) + echo "tag=$PREV_TAG" >> $GITHUB_OUTPUT + + - name: Generate changelog + id: changelog + run: | + CHANGELOG=$(git log ${{ steps.prev_tag.outputs.tag }}..HEAD --pretty=format:"- %s (%h)" --no-merges) + echo "changelog<> $GITHUB_OUTPUT + echo "$CHANGELOG" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Create Release + uses: softprops/action-gh-release@v2 + with: + generate_release_notes: true + body: | + ## Changes + + ${{ steps.changelog.outputs.changelog }} + + **Full Changelog**: https://github.com/${{ github.repository }}/compare/${{ steps.prev_tag.outputs.tag }}...${{ github.ref_name }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}