fix: websocket session priority, restart env pickup, Anthropic streaming timeouts

mdear · mdear · commit 570dc5aac7d3 · 2026-02-04T21:07:29.000-05:00
- websocket-context: prioritize URL sessionId over stale Redux activeSessionId
- stack_control.sh: use --force-recreate for restart to pick up env changes
- anthropic provider: add explicit httpx timeout (600s read) for extended thinking
- anthropic provider: disable HTTP/2 for more reliable streaming
- service.py: log warning instead of crash when stream has no response
- router.py: add event counting and timing logs for SSE debugging
- anthropic.py: add missing logger import
- .stack.env.local.example: document VITE_API_URL and LOCAL_STORAGE_URL_BASE alignment
diff --git a/docker/.stack.env.local.example b/docker/.stack.env.local.example
@@ -37,7 +37,7 @@ SANDBOX_DB_NAME=ii_sandbox
 SANDBOX_DATABASE_URL=postgresql+asyncpg://iiagent:iiagent@postgres:5432/ii_sandbox
 
 # ============================================================================
-# REDIS CONFIGURATION  
+# REDIS CONFIGURATION
 # ============================================================================
 REDIS_PORT=6379
 REDIS_URL=redis://redis:6379/0
@@ -58,6 +58,11 @@ MCP_PORT=6060
 # FRONTEND CONFIGURATION
 # ============================================================================
 FRONTEND_BUILD_MODE=production
+
+# API URL that the frontend uses to reach the backend.
+# IMPORTANT: For mobile/remote device access, use your machine's IP address
+# (e.g., http://192.168.x.x:8000) instead of localhost.
+# This MUST match LOCAL_STORAGE_URL_BASE below (same host) for file uploads to work.
 VITE_API_URL=http://localhost:8000
 
 # Disable Google OAuth for local setup (optional - set to enable)
@@ -117,6 +122,20 @@ OPENROUTER_API_KEY=
 # ============================================================================
 # These are not required for local-only mode
 
+# ============================================================================
+# LOCAL FILE STORAGE (for uploads and assets)
+# ============================================================================
+# URL base for serving uploaded files to browsers.
+# IMPORTANT: This MUST use the same host as VITE_API_URL above.
+# - Use localhost for local-only access
+# - Use your machine's IP (e.g., http://192.168.x.x:8000/files) for mobile/remote access
+# If mismatched, file uploads will fail on mobile devices because the browser
+# tries to upload to a URL it can't reach.
+LOCAL_STORAGE_URL_BASE=http://localhost:8000/files
+
+# Internal URL for container-to-container file access (usually doesn't need changing)
+LOCAL_STORAGE_INTERNAL_URL_BASE=http://backend:8000/files
+
 # Image search (Serper)
 # SERPER_API_KEY=
 
diff --git a/frontend/src/contexts/websocket-context.tsx b/frontend/src/contexts/websocket-context.tsx
@@ -64,9 +64,10 @@ export function SocketIOProvider({
     handleEventRef.current = handleEvent
     isFromNewQuestionRef.current = isFromNewQuestion
 
-    // Keep sessionIdRef in sync with activeSessionId (from Redux) or sessionId (from URL params)
-    // Priority: activeSessionId (for newly created sessions) > sessionId (from URL)
-    const currentSessionId = activeSessionId || sessionId
+    // Keep sessionIdRef in sync with sessionId (from URL params) or activeSessionId (from Redux)
+    // Priority: sessionId (from URL) > activeSessionId (for newly created sessions before URL updates)
+    // This ensures that when navigating directly to a session URL, it takes precedence over stale Redux state
+    const currentSessionId = sessionId || activeSessionId
 
     // Reset session initialization flag when sessionId changes or on initial load
     if (sessionIdRef.current !== currentSessionId) {
diff --git a/scripts/stack_control.sh b/scripts/stack_control.sh
@@ -17,8 +17,9 @@
 # LIFECYCLE COMMANDS:
 #   start [service]     Start services. No service = start all.
 #   stop [service]      Stop services. No service = stop all.
-#   restart [service]   Restart without rebuilding. No service = restart all.
+#   restart [service]   Recreate container (picks up env changes). No service = restart all.
 #   rebuild [service]   Stop, rebuild image, restart. No service = rebuild all buildable.
+#                       NOTE: For frontend VITE_* changes, use rebuild (baked at build time).
 #   wake [id]           Wake stopped sandbox containers. id = session or sandbox UUID.
 #   cleanup             Remove orphaned sandbox containers (Created/Exited state).
 #
@@ -149,8 +150,8 @@ USAGE:
 COMMANDS:
   start [service]     Start services (all if no service specified)
   stop [service]      Stop services (all if no service specified)
-  restart [service]   Restart without rebuilding
-  rebuild [service]   Rebuild from source and restart
+  restart [service]   Recreate container (picks up env changes, no rebuild)
+  rebuild [service]   Rebuild from source and restart (required for VITE_* changes)
   wake [id]           Wake stopped sandbox (session ID, sandbox ID, or 'all')
   cleanup             Remove orphaned sandbox containers (Created/Exited)
   status              Show running services and URLs
@@ -669,15 +670,15 @@ cmd_restart() {
 
     # If a specific service was requested, just restart that one
     if [[ -n "$TARGET_SERVICE" ]]; then
-        log_info "Restarting $TARGET_SERVICE (keeping existing image)..."
-        compose restart "$TARGET_SERVICE"
+        log_info "Restarting $TARGET_SERVICE (recreating container to pick up env changes)..."
+        compose up -d --force-recreate "$TARGET_SERVICE"
         log_success "$TARGET_SERVICE restarted"
         show_service_url "$TARGET_SERVICE"
         return
     fi
 
     # Restart all services
-    log_info "Restarting all services (keeping existing images)..."
+    log_info "Restarting all services (recreating containers to pick up env changes)..."
     cmd_stop
     echo ""
     cmd_start
@@ -863,22 +864,22 @@ cmd_logs() {
 _resync_sandbox_ports() {
     local sandbox_port
     sandbox_port=$(get_env_value SANDBOX_SERVER_PORT 8100)
-    
+
     log_info "Syncing port allocations with sandbox-server..."
-    
+
     # Try the rescan endpoint (returns 400 in cloud mode, which is fine)
     local response
     response=$(curl -fsS -X POST "http://localhost:${sandbox_port}/ports/rescan" 2>&1) && {
         log_success "Port allocations synced"
         return 0
     }
-    
+
     # Check if it's a "not available" error (cloud mode) - that's OK
     if echo "$response" | grep -q "not available"; then
         log_info "Port management not needed (cloud mode)"
         return 0
     fi
-    
+
     # Fallback: restart sandbox-server to trigger startup scan
     log_warn "Rescan endpoint not available, restarting sandbox-server..."
     local sandbox_server_container="${PROJECT_NAME}-sandbox-server-1"
@@ -939,7 +940,7 @@ cmd_wake() {
         done
         echo ""
         log_success "Woke $count sandbox(es)"
-        
+
         # Tell sandbox-server to rescan port allocations
         if [[ "$count" -gt 0 ]]; then
             _resync_sandbox_ports
@@ -1009,7 +1010,7 @@ cmd_wake() {
         if docker ps --filter "name=$container_name" --format "{{.Status}}" | grep -q "Up"; then
             log_success "Sandbox is now running"
             docker ps --filter "name=$container_name" --format "table {{.Names}}\t{{.Status}}"
-            
+
             # Tell sandbox-server to rescan port allocations
             _resync_sandbox_ports
         else
@@ -1058,7 +1059,7 @@ cmd_cleanup() {
         local container_id container_name
         container_id=$(echo "$line" | awk '{print $1}')
         container_name=$(echo "$line" | awk '{print $2}')
-        
+
         if [[ -n "$container_id" ]]; then
             log_info "Removing $container_name..."
             if docker rm "$container_id" &>/dev/null; then
@@ -1072,15 +1073,15 @@ cmd_cleanup() {
 
     echo ""
     log_success "Removed $count orphaned container(s)"
-    
+
     # Tell sandbox-server to clean up its port allocations
     if [[ "$count" -gt 0 ]]; then
         auto_detect_mode
         get_compose_vars
-        
+
         local sandbox_port
         sandbox_port=$(get_env_value SANDBOX_SERVER_PORT 8100)
-        
+
         log_info "Syncing port allocations with sandbox-server..."
         if curl -fsS -X POST "http://localhost:${sandbox_port}/ports/cleanup" &>/dev/null; then
             log_success "Port allocations cleaned"
diff --git a/src/ii_agent/llm/anthropic.py b/src/ii_agent/llm/anthropic.py
@@ -1,4 +1,5 @@
 import asyncio
+import logging
 import random
 import time
 from typing import Any, Tuple, cast
@@ -60,6 +61,8 @@
     RedactedThinkingBlock,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class AnthropicDirectClient(LLMClient):
     """Use Anthropic models via first party API."""
diff --git a/src/ii_agent/server/chat/llm/anthropic/provider.py b/src/ii_agent/server/chat/llm/anthropic/provider.py
@@ -14,6 +14,7 @@
 
 import anyio
 import anthropic
+import httpx
 from anthropic.types import (
     TextBlock,
     ToolUseBlock,
@@ -107,9 +108,30 @@ def __init__(self, llm_config: LLMConfig):
             )
         else:
             # Support custom base_url for Anthropic-compatible APIs (e.g., Minimax)
+            # Use explicit httpx.Timeout for better control over streaming timeouts
+            # Extended thinking can have long pauses between chunks
+            stream_timeout = httpx.Timeout(
+                connect=30.0,       # Connection timeout
+                read=600.0,         # Read timeout - 10 minutes for extended thinking
+                write=30.0,         # Write timeout
+                pool=30.0           # Pool timeout
+            )
+
+            # Create custom httpx client with HTTP/1.1 only (no HTTP/2)
+            # and explicit connection limits to avoid connection pooling issues
+            http_client = httpx.AsyncClient(
+                timeout=stream_timeout,
+                http2=False,  # Disable HTTP/2 for more reliable streaming
+                limits=httpx.Limits(
+                    max_keepalive_connections=5,
+                    max_connections=10,
+                    keepalive_expiry=30.0,  # Close idle connections after 30s
+                ),
+            )
+
             client_kwargs = {
                 "api_key": llm_config.api_key.get_secret_value(),
-                "timeout": 60 * 5,
+                "http_client": http_client,
                 "max_retries": 3,
             }
             if llm_config.base_url:
@@ -663,7 +685,6 @@ async def stream(
         content_started = False
         current_tool_call_id = None  # Track the current tool call being processed
 
-        logger.info("Starting Anthropic stream...")
         async with self.client.beta.messages.stream(**params, betas=betas) as stream:
             async for event in stream:
                 # Content block start
diff --git a/src/ii_agent/server/chat/router.py b/src/ii_agent/server/chat/router.py
@@ -273,6 +273,7 @@ async def event_generator():
         import time
 
         start_time = time.time()
+        event_count = 0
         logger.info(f"event_generator started for session {session_id}")
 
         try:
@@ -287,13 +288,15 @@ async def event_generator():
                     "created_at": session_metadata.created_at,
                 }
                 yield f"event: session\ndata: {json.dumps(session_event)}\n\n"
+                event_count += 1
 
             # Stream response from provider
             async for event in ChatService.stream_chat_response(
                 db_session=db_session,
                 chat_request=request,
                 user_id=str(current_user.id),
             ):
+                event_count += 1
                 event_type = event.get("type")
 
                 # Content events (start/delta/stop)
@@ -409,6 +412,9 @@ async def event_generator():
                     }
                     yield f"event: complete\ndata: {json.dumps(complete_event)}\n\n"
 
+            # Stream completed successfully
+            logger.info(f"event_generator completed successfully for session {session_id} after {event_count} events")
+
         except Exception as e:
             logger.error(f"Chat streaming error: {e}", exc_info=True)
             error_event = {
@@ -417,6 +423,9 @@ async def event_generator():
                 "code": "streaming_error",
             }
             yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
+        finally:
+            elapsed = time.time() - start_time
+            logger.info(f"event_generator finished for session {session_id}: {event_count} events in {elapsed:.2f}s")
 
     return StreamingResponse(
         event_generator(),
diff --git a/src/ii_agent/server/chat/service.py b/src/ii_agent/server/chat/service.py
@@ -563,33 +563,46 @@ async def stream_chat_response(
                 # Accumulate parts for this assistant turn
                 run_response: RunResponseOutput = None
                 file_parts = []
+                provider_event_count = 0
                 # Stream LLM response with tools
                 async for event in provider.stream(
                     messages=messages,
                     tools=tools_to_pass,
                     is_code_interpreter_enabled=is_code_interpreter_enabled,
                     session_id=session_id,
                 ):
+                    provider_event_count += 1
                     # Handle COMPLETE event separately (stores response)
                     if event.type == EventType.COMPLETE:
                         run_response = event.response
+                        logger.info(f"Service received COMPLETE event after {provider_event_count} events")
                     else:
                         # Convert event to SSE format and yield
                         sse_event = event.to_sse_event()
                         if sse_event is not None:
                             yield sse_event
 
+                logger.info(f"Provider stream loop exited after {provider_event_count} events, run_response is {'set' if run_response else 'None'}")
+
                 # Yield usage event for this LLM turn
-                if run_response:
-                    yield {
-                        "type": "usage",
-                        "usage": {
-                            "input_tokens": run_response.usage.prompt_tokens,
-                            "output_tokens": run_response.usage.completion_tokens,
-                            "cache_creation_tokens": run_response.usage.cache_write_tokens,
-                            "cache_read_tokens": run_response.usage.cache_read_tokens,
-                        },
-                    }
+                if run_response is None:
+                    logger.warning(
+                        "LLM stream completed without a response. "
+                        "This may indicate a provider error or timeout. "
+                        f"provider_event_count={provider_event_count}"
+                    )
+                    # Skip usage reporting and continue - let the loop handle tool use or end
+                    continue
+
+                yield {
+                    "type": "usage",
+                    "usage": {
+                        "input_tokens": run_response.usage.prompt_tokens,
+                        "output_tokens": run_response.usage.completion_tokens,
+                        "cache_creation_tokens": run_response.usage.cache_write_tokens,
+                        "cache_read_tokens": run_response.usage.cache_read_tokens,
+                    },
+                }
 
                 if run_response.files:
                     file_parts.extend(run_response.files)