Skip to content

Commit 570dc5a

Browse files
committed
fix: websocket session priority, restart env pickup, Anthropic streaming timeouts
- websocket-context: prioritize URL sessionId over stale Redux activeSessionId - stack_control.sh: use --force-recreate for restart to pick up env changes - anthropic provider: add explicit httpx timeout (600s read) for extended thinking - anthropic provider: disable HTTP/2 for more reliable streaming - service.py: log warning instead of crash when stream has no response - router.py: add event counting and timing logs for SSE debugging - anthropic.py: add missing logger import - .stack.env.local.example: document VITE_API_URL and LOCAL_STORAGE_URL_BASE alignment
1 parent e675b7b commit 570dc5a

7 files changed

Lines changed: 99 additions & 32 deletions

File tree

docker/.stack.env.local.example

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ SANDBOX_DB_NAME=ii_sandbox
3737
SANDBOX_DATABASE_URL=postgresql+asyncpg://iiagent:iiagent@postgres:5432/ii_sandbox
3838

3939
# ============================================================================
40-
# REDIS CONFIGURATION
40+
# REDIS CONFIGURATION
4141
# ============================================================================
4242
REDIS_PORT=6379
4343
REDIS_URL=redis://redis:6379/0
@@ -58,6 +58,11 @@ MCP_PORT=6060
5858
# FRONTEND CONFIGURATION
5959
# ============================================================================
6060
FRONTEND_BUILD_MODE=production
61+
62+
# API URL that the frontend uses to reach the backend.
63+
# IMPORTANT: For mobile/remote device access, use your machine's IP address
64+
# (e.g., http://192.168.x.x:8000) instead of localhost.
65+
# This MUST match LOCAL_STORAGE_URL_BASE below (same host) for file uploads to work.
6166
VITE_API_URL=http://localhost:8000
6267

6368
# Disable Google OAuth for local setup (optional - set to enable)
@@ -117,6 +122,20 @@ OPENROUTER_API_KEY=
117122
# ============================================================================
118123
# These are not required for local-only mode
119124

125+
# ============================================================================
126+
# LOCAL FILE STORAGE (for uploads and assets)
127+
# ============================================================================
128+
# URL base for serving uploaded files to browsers.
129+
# IMPORTANT: This MUST use the same host as VITE_API_URL above.
130+
# - Use localhost for local-only access
131+
# - Use your machine's IP (e.g., http://192.168.x.x:8000/files) for mobile/remote access
132+
# If mismatched, file uploads will fail on mobile devices because the browser
133+
# tries to upload to a URL it can't reach.
134+
LOCAL_STORAGE_URL_BASE=http://localhost:8000/files
135+
136+
# Internal URL for container-to-container file access (usually doesn't need changing)
137+
LOCAL_STORAGE_INTERNAL_URL_BASE=http://backend:8000/files
138+
120139
# Image search (Serper)
121140
# SERPER_API_KEY=
122141

frontend/src/contexts/websocket-context.tsx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ export function SocketIOProvider({
6464
handleEventRef.current = handleEvent
6565
isFromNewQuestionRef.current = isFromNewQuestion
6666

67-
// Keep sessionIdRef in sync with activeSessionId (from Redux) or sessionId (from URL params)
68-
// Priority: activeSessionId (for newly created sessions) > sessionId (from URL)
69-
const currentSessionId = activeSessionId || sessionId
67+
// Keep sessionIdRef in sync with sessionId (from URL params) or activeSessionId (from Redux)
68+
// Priority: sessionId (from URL) > activeSessionId (for newly created sessions before URL updates)
69+
// This ensures that when navigating directly to a session URL, it takes precedence over stale Redux state
70+
const currentSessionId = sessionId || activeSessionId
7071

7172
// Reset session initialization flag when sessionId changes or on initial load
7273
if (sessionIdRef.current !== currentSessionId) {

scripts/stack_control.sh

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
# LIFECYCLE COMMANDS:
1818
# start [service] Start services. No service = start all.
1919
# stop [service] Stop services. No service = stop all.
20-
# restart [service] Restart without rebuilding. No service = restart all.
20+
# restart [service] Recreate container (picks up env changes). No service = restart all.
2121
# rebuild [service] Stop, rebuild image, restart. No service = rebuild all buildable.
22+
# NOTE: For frontend VITE_* changes, use rebuild (baked at build time).
2223
# wake [id] Wake stopped sandbox containers. id = session or sandbox UUID.
2324
# cleanup Remove orphaned sandbox containers (Created/Exited state).
2425
#
@@ -149,8 +150,8 @@ USAGE:
149150
COMMANDS:
150151
start [service] Start services (all if no service specified)
151152
stop [service] Stop services (all if no service specified)
152-
restart [service] Restart without rebuilding
153-
rebuild [service] Rebuild from source and restart
153+
restart [service] Recreate container (picks up env changes, no rebuild)
154+
rebuild [service] Rebuild from source and restart (required for VITE_* changes)
154155
wake [id] Wake stopped sandbox (session ID, sandbox ID, or 'all')
155156
cleanup Remove orphaned sandbox containers (Created/Exited)
156157
status Show running services and URLs
@@ -669,15 +670,15 @@ cmd_restart() {
669670

670671
# If a specific service was requested, just restart that one
671672
if [[ -n "$TARGET_SERVICE" ]]; then
672-
log_info "Restarting $TARGET_SERVICE (keeping existing image)..."
673-
compose restart "$TARGET_SERVICE"
673+
log_info "Restarting $TARGET_SERVICE (recreating container to pick up env changes)..."
674+
compose up -d --force-recreate "$TARGET_SERVICE"
674675
log_success "$TARGET_SERVICE restarted"
675676
show_service_url "$TARGET_SERVICE"
676677
return
677678
fi
678679

679680
# Restart all services
680-
log_info "Restarting all services (keeping existing images)..."
681+
log_info "Restarting all services (recreating containers to pick up env changes)..."
681682
cmd_stop
682683
echo ""
683684
cmd_start
@@ -863,22 +864,22 @@ cmd_logs() {
863864
_resync_sandbox_ports() {
864865
local sandbox_port
865866
sandbox_port=$(get_env_value SANDBOX_SERVER_PORT 8100)
866-
867+
867868
log_info "Syncing port allocations with sandbox-server..."
868-
869+
869870
# Try the rescan endpoint (returns 400 in cloud mode, which is fine)
870871
local response
871872
response=$(curl -fsS -X POST "http://localhost:${sandbox_port}/ports/rescan" 2>&1) && {
872873
log_success "Port allocations synced"
873874
return 0
874875
}
875-
876+
876877
# Check if it's a "not available" error (cloud mode) - that's OK
877878
if echo "$response" | grep -q "not available"; then
878879
log_info "Port management not needed (cloud mode)"
879880
return 0
880881
fi
881-
882+
882883
# Fallback: restart sandbox-server to trigger startup scan
883884
log_warn "Rescan endpoint not available, restarting sandbox-server..."
884885
local sandbox_server_container="${PROJECT_NAME}-sandbox-server-1"
@@ -939,7 +940,7 @@ cmd_wake() {
939940
done
940941
echo ""
941942
log_success "Woke $count sandbox(es)"
942-
943+
943944
# Tell sandbox-server to rescan port allocations
944945
if [[ "$count" -gt 0 ]]; then
945946
_resync_sandbox_ports
@@ -1009,7 +1010,7 @@ cmd_wake() {
10091010
if docker ps --filter "name=$container_name" --format "{{.Status}}" | grep -q "Up"; then
10101011
log_success "Sandbox is now running"
10111012
docker ps --filter "name=$container_name" --format "table {{.Names}}\t{{.Status}}"
1012-
1013+
10131014
# Tell sandbox-server to rescan port allocations
10141015
_resync_sandbox_ports
10151016
else
@@ -1058,7 +1059,7 @@ cmd_cleanup() {
10581059
local container_id container_name
10591060
container_id=$(echo "$line" | awk '{print $1}')
10601061
container_name=$(echo "$line" | awk '{print $2}')
1061-
1062+
10621063
if [[ -n "$container_id" ]]; then
10631064
log_info "Removing $container_name..."
10641065
if docker rm "$container_id" &>/dev/null; then
@@ -1072,15 +1073,15 @@ cmd_cleanup() {
10721073

10731074
echo ""
10741075
log_success "Removed $count orphaned container(s)"
1075-
1076+
10761077
# Tell sandbox-server to clean up its port allocations
10771078
if [[ "$count" -gt 0 ]]; then
10781079
auto_detect_mode
10791080
get_compose_vars
1080-
1081+
10811082
local sandbox_port
10821083
sandbox_port=$(get_env_value SANDBOX_SERVER_PORT 8100)
1083-
1084+
10841085
log_info "Syncing port allocations with sandbox-server..."
10851086
if curl -fsS -X POST "http://localhost:${sandbox_port}/ports/cleanup" &>/dev/null; then
10861087
log_success "Port allocations cleaned"

src/ii_agent/llm/anthropic.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import logging
23
import random
34
import time
45
from typing import Any, Tuple, cast
@@ -60,6 +61,8 @@
6061
RedactedThinkingBlock,
6162
)
6263

64+
logger = logging.getLogger(__name__)
65+
6366

6467
class AnthropicDirectClient(LLMClient):
6568
"""Use Anthropic models via first party API."""

src/ii_agent/server/chat/llm/anthropic/provider.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import anyio
1616
import anthropic
17+
import httpx
1718
from anthropic.types import (
1819
TextBlock,
1920
ToolUseBlock,
@@ -107,9 +108,30 @@ def __init__(self, llm_config: LLMConfig):
107108
)
108109
else:
109110
# Support custom base_url for Anthropic-compatible APIs (e.g., Minimax)
111+
# Use explicit httpx.Timeout for better control over streaming timeouts
112+
# Extended thinking can have long pauses between chunks
113+
stream_timeout = httpx.Timeout(
114+
connect=30.0, # Connection timeout
115+
read=600.0, # Read timeout - 10 minutes for extended thinking
116+
write=30.0, # Write timeout
117+
pool=30.0 # Pool timeout
118+
)
119+
120+
# Create custom httpx client with HTTP/1.1 only (no HTTP/2)
121+
# and explicit connection limits to avoid connection pooling issues
122+
http_client = httpx.AsyncClient(
123+
timeout=stream_timeout,
124+
http2=False, # Disable HTTP/2 for more reliable streaming
125+
limits=httpx.Limits(
126+
max_keepalive_connections=5,
127+
max_connections=10,
128+
keepalive_expiry=30.0, # Close idle connections after 30s
129+
),
130+
)
131+
110132
client_kwargs = {
111133
"api_key": llm_config.api_key.get_secret_value(),
112-
"timeout": 60 * 5,
134+
"http_client": http_client,
113135
"max_retries": 3,
114136
}
115137
if llm_config.base_url:
@@ -663,7 +685,6 @@ async def stream(
663685
content_started = False
664686
current_tool_call_id = None # Track the current tool call being processed
665687

666-
logger.info("Starting Anthropic stream...")
667688
async with self.client.beta.messages.stream(**params, betas=betas) as stream:
668689
async for event in stream:
669690
# Content block start

src/ii_agent/server/chat/router.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ async def event_generator():
273273
import time
274274

275275
start_time = time.time()
276+
event_count = 0
276277
logger.info(f"event_generator started for session {session_id}")
277278

278279
try:
@@ -287,13 +288,15 @@ async def event_generator():
287288
"created_at": session_metadata.created_at,
288289
}
289290
yield f"event: session\ndata: {json.dumps(session_event)}\n\n"
291+
event_count += 1
290292

291293
# Stream response from provider
292294
async for event in ChatService.stream_chat_response(
293295
db_session=db_session,
294296
chat_request=request,
295297
user_id=str(current_user.id),
296298
):
299+
event_count += 1
297300
event_type = event.get("type")
298301

299302
# Content events (start/delta/stop)
@@ -409,6 +412,9 @@ async def event_generator():
409412
}
410413
yield f"event: complete\ndata: {json.dumps(complete_event)}\n\n"
411414

415+
# Stream completed successfully
416+
logger.info(f"event_generator completed successfully for session {session_id} after {event_count} events")
417+
412418
except Exception as e:
413419
logger.error(f"Chat streaming error: {e}", exc_info=True)
414420
error_event = {
@@ -417,6 +423,9 @@ async def event_generator():
417423
"code": "streaming_error",
418424
}
419425
yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
426+
finally:
427+
elapsed = time.time() - start_time
428+
logger.info(f"event_generator finished for session {session_id}: {event_count} events in {elapsed:.2f}s")
420429

421430
return StreamingResponse(
422431
event_generator(),

src/ii_agent/server/chat/service.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -563,33 +563,46 @@ async def stream_chat_response(
563563
# Accumulate parts for this assistant turn
564564
run_response: RunResponseOutput = None
565565
file_parts = []
566+
provider_event_count = 0
566567
# Stream LLM response with tools
567568
async for event in provider.stream(
568569
messages=messages,
569570
tools=tools_to_pass,
570571
is_code_interpreter_enabled=is_code_interpreter_enabled,
571572
session_id=session_id,
572573
):
574+
provider_event_count += 1
573575
# Handle COMPLETE event separately (stores response)
574576
if event.type == EventType.COMPLETE:
575577
run_response = event.response
578+
logger.info(f"Service received COMPLETE event after {provider_event_count} events")
576579
else:
577580
# Convert event to SSE format and yield
578581
sse_event = event.to_sse_event()
579582
if sse_event is not None:
580583
yield sse_event
581584

585+
logger.info(f"Provider stream loop exited after {provider_event_count} events, run_response is {'set' if run_response else 'None'}")
586+
582587
# Yield usage event for this LLM turn
583-
if run_response:
584-
yield {
585-
"type": "usage",
586-
"usage": {
587-
"input_tokens": run_response.usage.prompt_tokens,
588-
"output_tokens": run_response.usage.completion_tokens,
589-
"cache_creation_tokens": run_response.usage.cache_write_tokens,
590-
"cache_read_tokens": run_response.usage.cache_read_tokens,
591-
},
592-
}
588+
if run_response is None:
589+
logger.warning(
590+
"LLM stream completed without a response. "
591+
"This may indicate a provider error or timeout. "
592+
f"provider_event_count={provider_event_count}"
593+
)
594+
# Skip usage reporting and continue - let the loop handle tool use or end
595+
continue
596+
597+
yield {
598+
"type": "usage",
599+
"usage": {
600+
"input_tokens": run_response.usage.prompt_tokens,
601+
"output_tokens": run_response.usage.completion_tokens,
602+
"cache_creation_tokens": run_response.usage.cache_write_tokens,
603+
"cache_read_tokens": run_response.usage.cache_read_tokens,
604+
},
605+
}
593606

594607
if run_response.files:
595608
file_parts.extend(run_response.files)

0 commit comments

Comments
 (0)