diff --git a/Dockerfile b/Dockerfile
index 3f6cf50..a05b6f7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ RUN COMMIT="${BUILD_COMMIT:-$(git -C /app rev-parse --short HEAD 2>/dev/null ||
echo "{\"commit\":\"${COMMIT}\",\"branch\":\"${BRANCH}\",\"date\":\"${DATE}\"}" > /app/version.json
# Writable dirs for runtime data
-RUN mkdir -p runtime/uploads runtime/canvas-pages runtime/known_faces runtime/music runtime/generated_music runtime/faces runtime/transcripts
+RUN mkdir -p runtime/uploads runtime/canvas-pages runtime/known_faces runtime/music runtime/generated_music runtime/faces runtime/transcripts runtime/issue-reports
# Run as non-root user
RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app
diff --git a/deploy/openclaw/Dockerfile b/deploy/openclaw/Dockerfile
index 5c0b5e7..ffd55bc 100644
--- a/deploy/openclaw/Dockerfile
+++ b/deploy/openclaw/Dockerfile
@@ -16,8 +16,8 @@ ENV PATH=$PNPM_HOME:$PATH
RUN mkdir -p $PNPM_HOME
# OpenClaw version — pinned to the version tested with this OpenVoiceUI release.
-# Override at build time: docker compose build --build-arg OPENCLAW_VERSION=2026.5.2
-ARG OPENCLAW_VERSION=2026.5.2
+# Override at build time: docker compose build --build-arg OPENCLAW_VERSION=2026.5.7
+ARG OPENCLAW_VERSION=2026.5.7
RUN pnpm add -g openclaw@${OPENCLAW_VERSION} && pnpm approve-builds -g
# Optional: install a coding CLI so the coding-agent skill is available.
diff --git a/docker-compose.yml b/docker-compose.yml
index e37a515..0ca9045 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,7 @@ services:
build:
context: deploy/openclaw
args:
- OPENCLAW_VERSION: ${OPENCLAW_VERSION:-2026.5.2}
+ OPENCLAW_VERSION: ${OPENCLAW_VERSION:-2026.5.7}
CODING_CLI: ${CODING_CLI:-pi}
volumes:
- openclaw-data:/root/.openclaw
diff --git a/routes/conversation.py b/routes/conversation.py
index 4fc189a..b9c6aef 100644
--- a/routes/conversation.py
+++ b/routes/conversation.py
@@ -237,25 +237,14 @@ def _load_voice_system_prompt() -> str:
)
-# Spoken fallbacks for __session_start__ when the LLM returns nothing usable.
-# The current temporary primary model (zai/glm-5-turbo — see
-# memory/glm-primary-temporary-swap) returns empty / bare-"NO" completions on
-# the first turn of a session noticeably more often than MiniMax did. When that
-# happens we substitute one of these so the user always hears a greeting on
-# connect instead of dead air. (A profile-defined conversation.greeting wins
-# over these — see the __session_start__ handling in _conversation_inner.)
-_SESSION_START_FALLBACK_GREETINGS = (
- "Hey — I'm here. What can I do for you?",
- "Hi there. What's on your mind?",
- "I'm listening — what do you need?",
- "Hey, I'm here. What's up?",
- "Ready when you are — what can I help with?",
-)
-
-
-def _pick_session_start_greeting() -> str:
- """Pick a varied generic greeting for the empty-__session_start__ fallback."""
- return random.choice(_SESSION_START_FALLBACK_GREETINGS)
+# NOTE 2026-05-23: hardcoded fallback greetings REMOVED per feedback_no_hardcoded_responses.
+# Previously masked LLM-empty failures on __session_start__ with one of 5 canned
+# greetings. That made the broken state invisible — every connect that produced
+# silence was being papered over, so we couldn't see the failure rate. The
+# right behavior is: if the LLM returns empty, surface the failure clearly
+# (silence + warning log) so the bug is visible. The profile's verbatim
+# conversation.greeting still wins when defined — that's not hardcoded, it's
+# tenant-owned config.
def _is_vision_request(msg: str) -> bool:
@@ -2218,24 +2207,71 @@ def _retry_gateway():
if not full_response or not full_response.strip():
full_response = "I missed that — my brain glitched for a second. Could you say that again?"
- # ── Timeout empty: agent ran but produced nothing in 300s ──
- # This is NOT session poisoning — the session is healthy but the
- # agent ran out of time (long tool chain, image gen, website build).
- # Return a graceful spoken message; do NOT enter recovery.
+ # ── Slow-empty: LLM ran 5s+ and returned empty ──
+ # The fast-empty retry path above only covers <5s empties.
+ # The double-empty branch above only covers post-_retried empties.
+ # That leaves a 5-30s gap: a single non-retried slow empty
+ # would fall straight to text_done(None) → "No response from agent
+ # after recovery" → user sees agent died (observed 2026-05-23
+ # on bhb: 16882ms + 17370ms empties, both fell through).
+ #
+ # Try Z.AI direct (bypasses gateway and any poisoned openclaw
+ # session state) — same code path the double-empty branch uses.
+ # Only fall back to the spoken apology if Z.AI direct also fails.
+ # __session_start__ is handled by the dedicated greeting branch below.
if _is_empty and not getattr(stream_response, '_retried', False) \
- and metrics.get('llm_inference_ms', 0) >= 30000:
- if user_message == '__session_start__':
- full_response = "Hey, give me just a moment — I'm getting started."
- else:
- full_response = (
- "That took a bit longer than expected on my end. "
- "I'm still here — try again and I'll get right to it."
+ and metrics.get('llm_inference_ms', 0) >= 5000:
+ if user_message != '__session_start__':
+ try:
+ import requests as _req
+ _zai_key = os.environ.get('ZAI_API_KEY', '')
+ _fallback_msg = message_with_context if message_with_context else user_message
+ _fallback_system = _load_voice_system_prompt()
+ if _zai_key:
+ _zai_resp = _req.post(
+ 'https://api.z.ai/api/anthropic/v1/messages',
+ headers={
+ 'x-api-key': _zai_key,
+ 'anthropic-version': '2023-06-01',
+ 'content-type': 'application/json',
+ },
+ json={
+ 'model': 'glm-5-turbo',
+ 'max_tokens': 1500,
+ 'system': _fallback_system,
+ 'messages': [{'role': 'user', 'content': _fallback_msg}],
+ },
+ timeout=20,
+ )
+ if _zai_resp.status_code == 200:
+ _zai_data = _zai_resp.json()
+ _zai_text = _zai_data.get('content', [{}])[0].get('text', '')
+ if _zai_text:
+ full_response = _zai_text
+ metrics['fallback_used'] = 1
+ metrics['profile'] = 'zai-direct-slow-empty'
+ logger.info(
+ f"### SLOW-EMPTY Z.AI direct fallback succeeded "
+ f"({metrics['llm_inference_ms']}ms gateway empty → "
+ f"{len(_zai_text)} chars direct)"
+ )
+ except Exception as _fbe:
+ logger.error(f'### Slow-empty Z.AI fallback failed: {_fbe}')
+
+ # Z.AI direct didn't return text either — graceful apology
+ if not full_response or not full_response.strip():
+ if user_message == '__session_start__':
+ full_response = "Hey, give me just a moment — I'm getting started."
+ else:
+ full_response = (
+ "That took a bit longer than expected on my end. "
+ "I'm still here — try again and I'll get right to it."
+ )
+ metrics['fallback_used'] = 1
+ logger.warning(
+ f"### SLOW EMPTY ({metrics['llm_inference_ms']}ms) — "
+ f"Z.AI direct also failed, using apology"
)
- metrics['fallback_used'] = 1
- logger.warning(
- f"### TIMEOUT EMPTY ({metrics['llm_inference_ms']}ms) — "
- f"graceful fallback, no session recovery"
- )
# ── __session_start__ must ALWAYS produce a spoken greeting ──
# GLM-5-turbo (current temporary primary, see
@@ -2258,16 +2294,21 @@ def _retry_gateway():
_gs_norm = _gs.upper().rstrip('.!?')
_gs_tag_only = bool(_gs) and re.match(r'^\s*(\[[^\]]+\]\s*)+$', _gs)
if (not _gs) or _gs_norm in ('NO', 'YES') or _gs_tag_only:
- _fb_greeting = (_profile_greeting or '').strip() or _pick_session_start_greeting()
+ # ONLY use a profile-defined greeting (tenant config, not hardcoded).
+ # If no profile greeting, leave empty — the silence is the diagnostic
+ # signal that the LLM failed on __session_start__.
+ # (feedback_no_hardcoded_responses — 2026-05-23 removal of canned list)
+ _fb_greeting = (_profile_greeting or '').strip()
logger.warning(
f"### SESSION_START produced no usable greeting "
f"(was {full_response!r}, {metrics.get('llm_inference_ms')}ms) "
- f"— substituting fallback greeting: {_fb_greeting!r}"
+ f"— profile_greeting={_fb_greeting!r} (empty = silence by design)"
)
- full_response = _fb_greeting
- metrics['fallback_used'] = 1
+ full_response = _fb_greeting # may be '' — that's the right diagnostic signal
+ metrics['fallback_used'] = 1 if _fb_greeting else 0
+ metrics['llm_empty_session_start'] = 1
# Drop any partial / bare-token TTS buffered from the
- # broken turn so only the fallback greeting is spoken.
+ # broken turn so only the (profile) greeting is spoken, if any.
_tts_buf = ''
_tts_pending.clear()
diff --git a/routes/report_issue.py b/routes/report_issue.py
index f9996b9..1e702a0 100644
--- a/routes/report_issue.py
+++ b/routes/report_issue.py
@@ -72,18 +72,28 @@ def submit_issue():
'ua': request.headers.get('User-Agent', ''),
}
- # Always save locally
- REPORTS_DIR.mkdir(parents=True, exist_ok=True)
+ # Always save locally. Never let a filesystem error bubble up as an HTML 500
+ # page — the frontend does res.json() and an HTML body yields the cryptic
+ # "Unexpected token '<', " dict:
+ history_text = '\n'.join(
+ f"Scene {i+1}: {s.get('title','')} — {s.get('summary','')}"
+ for i, s in enumerate(scene_history)
+ )
+ user_msg = f"""Genre: {genre}
+Tone: {tone}
+Story so far:
+{history_text}
+
+Player chose: "{choice_text}"
+
+Generate the next scene JSON."""
+
+ r = httpx.post(
+ 'https://api.openai.com/v1/chat/completions',
+ json={
+ 'model': 'gpt-4o-mini',
+ 'messages': [
+ {'role': 'system', 'content': SCENE_SYSTEM},
+ {'role': 'user', 'content': user_msg},
+ ],
+ 'temperature': 0.85,
+ 'max_tokens': 1200,
+ 'response_format': {'type': 'json_object'},
+ },
+ headers={'Authorization': f'Bearer {OPENAI_API_KEY}'},
+ timeout=30.0,
+ )
+ r.raise_for_status()
+ content = r.json()['choices'][0]['message']['content']
+ return json.loads(content)
+
+
+# ── Asset generators ──────────────────────────────────────────────────────────
+
+def gen_image(prompt: str, out_path: Path) -> bool:
+ try:
+ r = httpx.post(
+ 'https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-schnell',
+ content=json.dumps({'inputs': prompt}).encode(),
+ headers={'Authorization': f'Bearer {HF_TOKEN}', 'Content-Type': 'application/json'},
+ timeout=60.0,
+ )
+ if r.status_code == 200 and 'image' in r.headers.get('content-type', ''):
+ out_path.write_bytes(r.content)
+ return True
+ logger.error(f'[story] image gen failed: {r.status_code} {r.text[:200]}')
+ except Exception as e:
+ logger.error(f'[story] image gen error: {e}')
+ return False
+
+
+def gen_suno_sound(prompt: str, loop: bool, key: str, out_path: Path) -> bool:
+ try:
+ r = httpx.post(
+ 'https://api.sunoapi.org/api/v1/generate/sounds',
+ json={'prompt': prompt, 'model': 'V5_5', 'soundLoop': loop, 'soundKey': key},
+ headers={'Authorization': f'Bearer {SUNO_API_KEY}', 'Content-Type': 'application/json'},
+ timeout=30.0,
+ )
+ if r.status_code != 200:
+ logger.error(f'[story] suno submit failed: {r.text[:200]}')
+ return False
+ task_id = r.json().get('data', {}).get('taskId')
+ for _ in range(40):
+ time.sleep(5)
+ poll = httpx.get(
+ f'https://api.sunoapi.org/api/v1/generate/record-info?taskId={task_id}',
+ headers={'Authorization': f'Bearer {SUNO_API_KEY}'},
+ timeout=15.0,
+ )
+ pdata = poll.json().get('data', {})
+ if pdata.get('status') == 'SUCCESS':
+ clips = pdata.get('response', {}).get('sunoData', [])
+ if clips:
+ url = clips[0].get('sourceAudioUrl') or clips[0].get('audioUrl')
+ dl = httpx.get(url, timeout=30.0, follow_redirects=True)
+ out_path.write_bytes(dl.content)
+ return True
+ elif pdata.get('status') == 'FAILED':
+ logger.error(f'[story] suno generation failed')
+ return False
+ except Exception as e:
+ logger.error(f'[story] suno error: {e}')
+ return False
+
+
+def gen_tts_line(text: str, char_key: str, out_path: Path) -> bool:
+ voice = STORY_VOICES.get(char_key, STORY_VOICES['narrator'])
+ ssml = f'{text}'
+ try:
+ r = httpx.post(
+ 'https://f.cluster.resemble.ai/stream',
+ json={'voice_uuid': voice['uuid'], 'data': ssml, 'precision': 'PCM_16', 'sample_rate': 24000},
+ headers={'Authorization': f'Bearer {RESEMBLE_KEY}', 'Content-Type': 'application/json'},
+ timeout=30.0,
+ )
+ if r.status_code == 200 and len(r.content) > 100:
+ out_path.write_bytes(r.content)
+ return True
+ logger.error(f'[story] tts failed: {r.status_code}')
+ except Exception as e:
+ logger.error(f'[story] tts error: {e}')
+ return False
+
+
+# ── Main endpoint ─────────────────────────────────────────────────────────────
+
+@story_bp.route('/api/story/generate-scene', methods=['POST'])
+def generate_scene():
+ data = request.get_json(force=True)
+ story_id = data.get('story_id', 'story-unknown')
+ choice_text = data.get('choice_text', '')
+ scene_history = data.get('scene_history', [])
+ genre = data.get('genre', 'fantasy')
+ tone = data.get('tone', 'mysterious')
+ scene_index = data.get('scene_index', 1) # which scene number this is
+
+ if not choice_text:
+ return jsonify({'error': 'choice_text required'}), 400
+
+ # 1. Generate scene JSON via LLM
+ try:
+ scene_data = generate_scene_json(choice_text, scene_history, genre, tone)
+ except Exception as e:
+ logger.error(f'[story] LLM error: {e}')
+ return jsonify({'error': f'Scene generation failed: {e}'}), 500
+
+ scene_id = f'scene_{scene_index:03d}'
+ story_dir = CANVAS_PAGES_DIR / 'stories' / story_id
+ story_dir.mkdir(parents=True, exist_ok=True)
+ try:
+ story_dir.chmod(0o777)
+ (CANVAS_PAGES_DIR / 'stories').chmod(0o777)
+ except Exception:
+ pass
+
+ # 2. Generate all assets in parallel
+ threads = []
+ errors = []
+
+ # Image
+ image_file = story_dir / f'{scene_id}_image.jpg'
+ def do_image():
+ if not gen_image(scene_data.get('image_prompt', 'dark fantasy scene'), image_file):
+ errors.append('image')
+ threads.append(threading.Thread(target=do_image))
+
+ # Ambient sound
+ ambient = scene_data.get('ambient', {})
+ ambient_file = story_dir / f'{scene_id}_ambient.mp3'
+ def do_ambient():
+ if not gen_suno_sound(
+ ambient.get('prompt', 'dark ambient atmosphere'),
+ True,
+ ambient.get('soundKey', 'Am'),
+ ambient_file
+ ):
+ errors.append('ambient')
+ threads.append(threading.Thread(target=do_ambient))
+
+ # SFX sounds
+ sfx_list = scene_data.get('sfx', [])
+ for sfx in sfx_list:
+ sfx_file = story_dir / f'{scene_id}_{sfx["id"]}.mp3'
+ def do_sfx(s=sfx, f=sfx_file):
+ gen_suno_sound(s.get('prompt', 'sound effect'), False, 'Any', f)
+ threads.append(threading.Thread(target=do_sfx))
+
+ # TTS lines
+ script = scene_data.get('script', [])
+ for i, line in enumerate(script):
+ line_file = story_dir / f'{scene_id}_line_{i:02d}.wav'
+ def do_tts(l=line, lf=line_file):
+ gen_tts_line(l.get('text', ''), l.get('character', 'narrator'), lf)
+ threads.append(threading.Thread(target=do_tts))
+
+ for t in threads: t.start()
+ for t in threads: t.join()
+
+ # 3. Build the scene object the canvas needs (with resolved file paths)
+ base = f'stories/{story_id}'
+
+ # Rebuild sounds array from scene_data + resolved paths
+ sounds = []
+ if ambient_file.exists():
+ sounds.append({
+ 'id': 'ambient',
+ 'role': 'ambient',
+ 'file': f'{base}/{scene_id}_ambient.mp3',
+ 'trigger': 'scene_start',
+ 'volume': ambient.get('volume', 0.35),
+ })
+ for sfx in sfx_list:
+ sfx_file = story_dir / f'{scene_id}_{sfx["id"]}.mp3'
+ if sfx_file.exists():
+ sounds.append({
+ 'id': sfx['id'],
+ 'role': 'sfx',
+ 'file': f'{base}/{scene_id}_{sfx["id"]}.mp3',
+ 'trigger': sfx.get('trigger', 'after_line_0'),
+ 'volume': sfx.get('volume', 0.8),
+ 'delay_ms': sfx.get('delay_ms', 0),
+ })
+
+ # Rebuild script with resolved audio paths
+ resolved_script = []
+ for i, line in enumerate(script):
+ line_file = story_dir / f'{scene_id}_line_{i:02d}.wav'
+ resolved_script.append({
+ 'type': line.get('type', 'narration'),
+ 'character': line.get('character', 'narrator'),
+ 'text': line.get('text', ''),
+ 'audio': f'{base}/{scene_id}_line_{i:02d}.wav' if line_file.exists() else None,
+ })
+
+ scene_out = {
+ 'scene_id': scene_id,
+ 'title': scene_data.get('title', 'Unknown'),
+ 'image_file': f'{base}/{scene_id}_image.jpg' if image_file.exists() else None,
+ 'sounds': sounds,
+ 'script': resolved_script,
+ 'choices': scene_data.get('choices', []),
+ }
+
+ return jsonify({'status': 'ready', 'scene': scene_out})
diff --git a/routes/suno.py b/routes/suno.py
index 225fb7c..06b1330 100644
--- a/routes/suno.py
+++ b/routes/suno.py
@@ -5,7 +5,7 @@
Generated songs land in generated_music/ and show up in the music player.
Endpoints:
- GET/POST /api/suno (action: generate|status|list|credits)
+ GET/POST /api/suno (action: generate|jingle|sfx|status|list|credits)
POST /api/suno/callback (webhook from sunoapi.org)
GET/POST /api/suno/completed (frontend polls for completed songs)
@@ -40,6 +40,13 @@
GENERATED_MUSIC_DIR.mkdir(parents=True, exist_ok=True)
GENERATED_METADATA_FILE = GENERATED_MUSIC_DIR / 'generated_metadata.json'
+# SFX (action=sfx) land in a dedicated subdir so they are NOT mixed in with the
+# music library/player. Kept under generated_music/ so it's already mounted +
+# web-served (/generated_music/sfx/) with no compose/mount changes. The
+# music list (_action_list) and music metadata/queue intentionally skip these.
+GENERATED_SOUNDS_DIR = GENERATED_MUSIC_DIR / 'sfx'
+GENERATED_SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
+
SUNO_API_KEY = os.environ.get('SUNO_API_KEY', '')
SUNO_API_BASE = 'https://api.sunoapi.org'
SUNO_WEBHOOK_SECRET = os.environ.get('SUNO_WEBHOOK_SECRET', '')
@@ -262,6 +269,9 @@ def handle_suno():
elif action == 'jingle':
return _action_jingle(_q, body)
+ elif action == 'sfx':
+ return _action_sfx(_q, body)
+
elif action == 'list_jingles':
return _action_list_jingles()
@@ -279,7 +289,7 @@ def handle_suno():
return _action_credits()
else:
- return jsonify({'action': 'error', 'response': f"Unknown action '{action}'. Use: generate, jingle, list_jingles, jingle_styles, status, list, credits"})
+ return jsonify({'action': 'error', 'response': f"Unknown action '{action}'. Use: generate, jingle, sfx, list_jingles, jingle_styles, status, list, credits"})
except Exception as exc:
logger.exception('Suno endpoint error')
@@ -407,6 +417,108 @@ def _action_generate(_q, body: dict):
return jsonify({'action': 'error', 'response': f"Couldn't reach Suno API: {exc}"})
+def _action_sfx(_q, body: dict):
+ """Generate a short non-vocal sound effect / ambient stinger.
+
+ Wraps sunoapi.org's "Sounds Generation (V5)" endpoint
+ (POST /api/v1/generate/sounds, 2.5 credits). This is NOT a jingle or a song
+ — it produces game SFX, UI blips, stingers, ambient beds, etc. with no
+ vocals. Returns a taskId in the same shape as /generate, so the normal
+ `action=status` poller downloads + saves the clip when ready.
+
+ Inputs:
+ prompt — required, what the sound should be (max 500 chars).
+ e.g. "retro 8-bit coin pickup blip", "wooden mallet thwack",
+ "ominous low brass sting", "arcade game-over jingle no vocals"
+ title — optional label used for the saved filename + metadata.
+ loop — optional bool; soundLoop (seamless looping bed). Default false.
+ tempo — optional int 1-300; soundTempo (BPM). Omit for auto.
+ key — optional musical key (Any, Cm, C#m, ... B). Omit for Any.
+ """
+ prompt = (_q('prompt') or body.get('prompt', '')).strip()
+ if not prompt:
+ return jsonify({'action': 'error', 'response': "Need a description of the sound — e.g. 'retro 8-bit coin pickup blip'."})
+ prompt = prompt[:500]
+
+ title = (_q('title') or body.get('title', '')).strip()
+
+ loop_raw = _q('loop') or body.get('loop', False)
+ if isinstance(loop_raw, bool):
+ sound_loop = loop_raw
+ else:
+ sound_loop = str(loop_raw).lower() in ('true', '1', 'yes')
+
+ request_body = {
+ 'prompt': prompt,
+ # V5_5 matches the proven story.py path (routes/story.py gen_suno_sound,
+ # shipped 2026-05-28). sunoapi.org docs say "V5 only" for this endpoint
+ # but production uses V5_5 successfully with better quality — match it.
+ 'model': 'V5_5',
+ 'soundLoop': sound_loop,
+ }
+
+ # Optional tempo (BPM 1-300)
+ tempo_raw = _q('tempo') or body.get('tempo', '')
+ if tempo_raw not in (None, ''):
+ try:
+ tempo = int(tempo_raw)
+ if 1 <= tempo <= 300:
+ request_body['soundTempo'] = tempo
+ except (TypeError, ValueError):
+ pass
+
+ # Optional musical key
+ key = (_q('key') or body.get('key', '')).strip()
+ if key and key.lower() != 'any':
+ request_body['soundKey'] = key
+
+ # NOTE: deliberately NO callBackUrl for SFX. SFX complete via polling
+ # (action=status), which routes them to the sounds subdir and keeps them out
+ # of the music library. The webhook callback path registers results as music,
+ # so skipping it prevents SFX from leaking into the music player.
+
+ logger.info(f'Suno sfx: loop={sound_loop} prompt={prompt[:80]}')
+
+ try:
+ resp = http_requests.post(
+ f'{SUNO_API_BASE}/api/v1/generate/sounds',
+ headers={'Authorization': f'Bearer {SUNO_API_KEY}', 'Content-Type': 'application/json'},
+ json=request_body,
+ timeout=30,
+ )
+ logger.info(f'Suno sfx response: {resp.status_code} {resp.text[:300]}')
+
+ if resp.status_code == 200:
+ data = resp.json()
+ if data.get('code') == 200 and data.get('data', {}).get('taskId'):
+ task_id = data['data']['taskId']
+ job_id = str(uuid.uuid4())
+ suno_jobs[job_id] = {
+ 'status': 'generating',
+ 'prompt': prompt,
+ 'title': title or prompt[:60],
+ 'style': 'sfx',
+ 'kind': 'sfx',
+ 'task_id': task_id,
+ 'created_at': time.time(),
+ }
+ return jsonify({
+ 'action': 'generating',
+ 'job_id': job_id,
+ 'task_id': task_id,
+ 'kind': 'sfx',
+ 'response': f"Generating sound: '{title or prompt[:40]}' — check back in ~20-40 seconds.",
+ 'estimated_seconds': 30,
+ })
+ else:
+ return jsonify({'action': 'error', 'response': f"Suno SFX error: {data.get('msg', 'Unknown error')}"})
+ else:
+ return jsonify({'action': 'error', 'response': f'Suno SFX HTTP {resp.status_code}: {resp.text[:200]}'})
+
+ except http_requests.RequestException as exc:
+ return jsonify({'action': 'error', 'response': f"Couldn't reach Suno API: {exc}"})
+
+
def _action_jingle(_q, body: dict):
"""Generate a 10-15 second vocal-logo jingle of a brand name.
@@ -645,7 +757,11 @@ def _action_status(job_id: str):
# Suno returns 2 clips per generation — only take the first one
songs = songs[:1] if songs else []
for song in songs:
- audio_url = song.get('audioUrl') or song.get('audio_url')
+ # sourceAudioUrl is the original/high-quality URL the
+ # sounds endpoint returns (see routes/story.py); kept as
+ # a fallback so SFX jobs download correctly. Additive —
+ # songs/jingles still prefer audioUrl, unchanged.
+ audio_url = song.get('audioUrl') or song.get('audio_url') or song.get('sourceAudioUrl')
if not audio_url:
continue
song_id = song.get('id', task_id)
@@ -657,8 +773,12 @@ def _action_status(job_id: str):
song_title = job.get('title') or job.get('prompt', '')[:60] or 'Generated Track'
duration = song.get('duration', 0)
slug = _slugify_title(song_title)
- filename = _unique_filename(GENERATED_MUSIC_DIR, slug)
- save_path = GENERATED_MUSIC_DIR / filename
+ # SFX go to the dedicated sounds subdir, separate from music.
+ _is_sfx = job.get('kind') == 'sfx'
+ _dir = GENERATED_SOUNDS_DIR if _is_sfx else GENERATED_MUSIC_DIR
+ _url_base = '/generated_music/sfx' if _is_sfx else '/generated_music'
+ filename = _unique_filename(_dir, slug)
+ save_path = _dir / filename
if not save_path.exists():
if not _is_safe_download_url(audio_url):
@@ -684,49 +804,54 @@ def _action_status(job_id: str):
logger.warning(f'Suno download failed: {audio_resp.status_code}')
continue
- # Save metadata — propagate jingle fields if this was a jingle job
kind = job.get('kind', 'song')
- extra = {}
- if kind == 'jingle':
- extra = {
- 'brand': job.get('brand', ''),
- 'style_key': job.get('style_key', ''),
- 'vocal_gender': job.get('vocal_gender', ''),
- 'instrumental': job.get('instrumental', False),
- }
- # Capture lyrics from Suno response if present
song_lyrics = song.get('prompt', '') or song.get('lyrics', '')
- if song_lyrics:
- extra['lyrics'] = song_lyrics
- _add_song_to_metadata(
- filename=filename,
- title=song_title,
- prompt=job.get('prompt', ''),
- style=job.get('style', ''),
- duration=duration,
- song_id=song_id,
- kind=kind,
- extra=extra,
- )
+
+ # SFX are NOT music — keep them out of the music metadata
+ # AND the music player's completed-songs queue so they
+ # don't pollute the music library. The status response
+ # below still returns the URL so the caller gets the clip.
+ if not _is_sfx:
+ extra = {}
+ if kind == 'jingle':
+ extra = {
+ 'brand': job.get('brand', ''),
+ 'style_key': job.get('style_key', ''),
+ 'vocal_gender': job.get('vocal_gender', ''),
+ 'instrumental': job.get('instrumental', False),
+ }
+ if song_lyrics:
+ extra['lyrics'] = song_lyrics
+ _add_song_to_metadata(
+ filename=filename,
+ title=song_title,
+ prompt=job.get('prompt', ''),
+ style=job.get('style', ''),
+ duration=duration,
+ song_id=song_id,
+ kind=kind,
+ extra=extra,
+ )
# Update job
job['status'] = 'complete'
job['song_id'] = song_id
job['title'] = song_title
- job['url'] = f'/generated_music/{filename}'
-
- # Notify frontend poller
- completed_songs_queue.append({
- 'song_id': song_id,
- 'filename': filename,
- 'title': song_title,
- 'job_id': job_id,
- 'kind': kind,
- 'url': f'/generated_music/{filename}',
- 'completed_at': datetime.now().isoformat(),
- 'prompt': job.get('prompt', ''),
- 'lyrics': song_lyrics,
- })
+ job['url'] = f'{_url_base}/{filename}'
+
+ # Notify frontend poller — music only (SFX skip the music queue)
+ if not _is_sfx:
+ completed_songs_queue.append({
+ 'song_id': song_id,
+ 'filename': filename,
+ 'title': song_title,
+ 'job_id': job_id,
+ 'kind': kind,
+ 'url': f'{_url_base}/{filename}',
+ 'completed_at': datetime.now().isoformat(),
+ 'prompt': job.get('prompt', ''),
+ 'lyrics': song_lyrics,
+ })
return jsonify({
'action': 'complete',
@@ -734,7 +859,7 @@ def _action_status(job_id: str):
'job_id': job_id,
'song_id': song_id,
'title': song_title,
- 'url': f'/generated_music/{filename}',
+ 'url': f'{_url_base}/{filename}',
'response': f"Done! '{song_title}' is ready to spin!",
})
diff --git a/server.py b/server.py
index 953ce2e..e7dd8f8 100644
--- a/server.py
+++ b/server.py
@@ -166,6 +166,9 @@ def _extract_page_version(path, max_lines=5):
from routes.suno import suno_bp
app.register_blueprint(suno_bp)
+from routes.story import story_bp
+app.register_blueprint(story_bp)
+
from routes.airadio_bridge import airadio_bp
app.register_blueprint(airadio_bp)
diff --git a/services/gateways/compat.py b/services/gateways/compat.py
index ee0d97f..b707d17 100644
--- a/services/gateways/compat.py
+++ b/services/gateways/compat.py
@@ -24,7 +24,7 @@
PROTOCOL_MAX = 5 # forward-compatible — OpenClaw ignores unsupported maxes
# Version this code was tested against (for warning logs).
-OPENCLAW_TESTED_VERSION = "2026.5.2"
+OPENCLAW_TESTED_VERSION = "2026.5.7"
OPENCLAW_MIN_VERSION = "2026.3.1"
diff --git a/setup-sudo.sh b/setup-sudo.sh
index b907445..af32f44 100755
--- a/setup-sudo.sh
+++ b/setup-sudo.sh
@@ -14,7 +14,7 @@ EMAIL="your@email.com" # ← EDIT: for Let's Encrypt notifications
SERVICE_NAME="openvoiceui"
RUN_USER="${SUDO_USER:-$(whoami)}"
WWW_DIR="/var/www/${SERVICE_NAME}" # canvas pages + any web assets
-OPENCLAW_TESTED_VERSION="2026.5.2" # pinned: the openclaw version tested with this release
+OPENCLAW_TESTED_VERSION="2026.5.7" # pinned: the openclaw version tested with this release
# ────────────────────────────────────────────────────────────────────────────
# Guard: refuse to run with placeholder values
diff --git a/src/app.js b/src/app.js
index a9afde8..3eb2a85 100644
--- a/src/app.js
+++ b/src/app.js
@@ -578,8 +578,11 @@ connectAiradio();
}
this.currentMood = mood;
- // Propagate mood to BigHeadFace if active
+ // Propagate mood to BigHeadFace + HaloSmokeFace if active.
+ // HaloSmoke collapses non-'thinking' moods to its idle state,
+ // which is how it clears the dots animation when a turn ends.
window.BigHeadFace?.setMood(mood);
+ window.HaloSmokeFace?.setMood(mood);
},
blink() {
@@ -2732,7 +2735,19 @@ connectAiradio();
context: this._gatherContext(),
}),
});
- const data = await res.json();
+ // Guard: a non-JSON body (e.g. an HTML error/login page) would
+ // make res.json() throw the cryptic "Unexpected token '<'" error.
+ // Surface a clean message based on status instead.
+ let data;
+ try {
+ data = await res.json();
+ } catch (_) {
+ throw new Error(
+ res.status === 401 || res.status === 403
+ ? 'Not signed in — please sign in and try again.'
+ : `Server returned ${res.status}. Please try again.`
+ );
+ }
if (data.ok) {
if (this._statusEl) { this._statusEl.textContent = '✓ Report submitted. Thank you!'; this._statusEl.className = 'irm-status success'; }
setTimeout(() => this.close(), 1800);
@@ -3782,6 +3797,7 @@ connectAiradio();
if (this._ttsGuardTimer) { clearTimeout(this._ttsGuardTimer); this._ttsGuardTimer = null; }
// Abort any in-flight fetch so streaming stops immediately
if (this._fetchAbortController) {
+ this._abortReason = 'stop';
this._fetchAbortController.abort();
this._fetchAbortController = null;
// Tell server to abort the openclaw run (fire-and-forget)
@@ -3854,6 +3870,7 @@ connectAiradio();
// instead: abort the tail, then fall through to the normal
// sendMessage path.
console.warn(`↩ POST-TEXT_DONE message — treating as fresh request: "${text.substring(0,30)}"`);
+ this._abortReason = 'user';
this._fetchAbortController.abort();
this._fetchAbortController = null;
fetch(`${this.config.serverUrl}${convPath('abort')}`, {
@@ -3864,6 +3881,7 @@ connectAiradio();
this.stopAudio();
} else if (this._ttsPlaying) {
// Agent already responded, TTS playing → ABORT
+ this._abortReason = 'user';
this._fetchAbortController.abort();
this._fetchAbortController = null;
console.warn(`⛔ ABORT source: ClawdbotMode.sendMessage (TTS playing, new msg: "${text.substring(0,30)}")`);
@@ -3987,14 +4005,20 @@ connectAiradio();
const decoder = new TextDecoder();
let buffer = '';
- // Inactivity timeout: abort if no data received for 60s
- // (heartbeats arrive every 10-15s during tool execution)
+ // Inactivity timeout: abort if no data received for this long.
+ // MUST be >= the server-side run budget (openclaw gateway
+ // timeout = 300s) so the client never gives up before the
+ // server does — otherwise long silent work (subagent spawns,
+ // batch ops) gets cut at the client and shows a false
+ // "stream timed out". Heartbeats normally arrive every 5-10s
+ // and reset this; 300s is the hard backstop matching the server.
// _inactivityTimer declared in outer scope so finally{} can clear it
- const INACTIVITY_TIMEOUT_MS = 60000;
+ const INACTIVITY_TIMEOUT_MS = 300000;
const _resetInactivity = () => {
if (_inactivityTimer) clearTimeout(_inactivityTimer);
_inactivityTimer = setTimeout(() => {
console.warn('[Stream] No data for 60s — aborting');
+ this._abortReason = 'inactivity';
this._fetchAbortController?.abort();
}, INACTIVITY_TIMEOUT_MS);
};
@@ -4593,11 +4617,25 @@ connectAiradio();
FaceModule.setMood('neutral');
StatusModule.update('idle', 'READY');
TranscriptPanel.removeThinking();
- // If agent was mid-task (had heartbeats), note the redirect
- if (this._wasAgentic) {
- this._wasAgentic = false;
+ // Label the abort by its ACTUAL cause — only an explicit
+ // user interrupt is "redirected by user". An inactivity
+ // timeout (agent went silent during long work) or a call
+ // stop is NOT a user redirect, and mislabeling it confused
+ // users ("why does it say redirected when I didn't?").
+ const _reason = this._abortReason;
+ this._abortReason = null;
+ const _wasAgentic = this._wasAgentic;
+ this._wasAgentic = false;
+ if (_reason === 'user') {
TranscriptPanel.finalizeStreaming('🔀 Redirected.');
ActionConsole.addEntry('system', 'Task redirected by user');
+ } else if (_reason === 'inactivity') {
+ TranscriptPanel.finalizeStreaming('⏳ Agent went quiet — stream timed out.');
+ ActionConsole.addEntry('system', 'Stream timed out (agent silent 60s) — not a user action');
+ } else if (_wasAgentic && !_reason) {
+ // Unknown-source abort during agentic work — don't blame
+ // the user; just close the stream quietly.
+ TranscriptPanel.finalizeStreaming(null);
} else {
TranscriptPanel.finalizeStreaming(null);
}
@@ -8318,6 +8356,13 @@ ${meta.artwork ? `
` : ''}
addMessage(role, text, opts = {}) {
if (!this.messages || !text) return;
+ // Remove any stale thinking bubble before appending an assistant
+ // response. The streaming path (startStreaming/finalizeStreaming)
+ // already does this, but the non-streaming response path
+ // (data.response → addMessage) was leaving the dots floating
+ // above the rendered reply.
+ if (role === 'assistant') this.removeThinking();
+
const msg = document.createElement('div');
msg.className = `tp-msg ${role === 'user' ? 'user' : 'assistant'}`;
diff --git a/src/face/HaloSmokeFace.js b/src/face/HaloSmokeFace.js
index 3387ba7..5177673 100644
--- a/src/face/HaloSmokeFace.js
+++ b/src/face/HaloSmokeFace.js
@@ -505,7 +505,16 @@ window.HaloSmokeFace = (function () {
function setThinking(v) { _thinking = !!v; }
- return { start, stop, setThinking };
+ // Implement the BaseFace mood contract — halo visually collapses all
+ // moods to two states: 'thinking' → animated dots, anything else → idle.
+ // Without this, FaceModule.setMood('neutral') after a turn couldn't reach
+ // halo (it only propagated to BigHeadFace), so the dots animation would
+ // stay on screen indefinitely.
+ function setMood(mood) {
+ _thinking = (mood === 'thinking');
+ }
+
+ return { start, stop, setThinking, setMood };
})();
// Self-register with FaceRenderer plugin system
diff --git a/src/face/manifest.json b/src/face/manifest.json
index 9bef8f6..5f6824d 100644
--- a/src/face/manifest.json
+++ b/src/face/manifest.json
@@ -18,7 +18,8 @@
"description": "Halo frequency ring with wispy smoke core — calm at rest, reacts to TTS speech",
"module": "/src/face/HaloSmokeFace.js",
"preview": "/src/face/previews/orb.svg",
- "moods": [],
+ "moods": ["neutral", "happy", "sad", "angry", "thinking", "surprised", "listening"],
+ "moods_visual_collapse": "Halo only renders two visual states — 'thinking' = animated dots; any other mood = idle smoke. The full mood vocab is accepted so FaceModule.setMood routing works uniformly.",
"features": ["audio-reactive", "smoke", "halo", "speech-reactive"],
"configurable": false
}