diff --git a/Dockerfile b/Dockerfile index 3f6cf50..a05b6f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN COMMIT="${BUILD_COMMIT:-$(git -C /app rev-parse --short HEAD 2>/dev/null || echo "{\"commit\":\"${COMMIT}\",\"branch\":\"${BRANCH}\",\"date\":\"${DATE}\"}" > /app/version.json # Writable dirs for runtime data -RUN mkdir -p runtime/uploads runtime/canvas-pages runtime/known_faces runtime/music runtime/generated_music runtime/faces runtime/transcripts +RUN mkdir -p runtime/uploads runtime/canvas-pages runtime/known_faces runtime/music runtime/generated_music runtime/faces runtime/transcripts runtime/issue-reports # Run as non-root user RUN useradd -m -u 1001 appuser && chown -R appuser:appuser /app diff --git a/deploy/openclaw/Dockerfile b/deploy/openclaw/Dockerfile index 5c0b5e7..ffd55bc 100644 --- a/deploy/openclaw/Dockerfile +++ b/deploy/openclaw/Dockerfile @@ -16,8 +16,8 @@ ENV PATH=$PNPM_HOME:$PATH RUN mkdir -p $PNPM_HOME # OpenClaw version — pinned to the version tested with this OpenVoiceUI release. -# Override at build time: docker compose build --build-arg OPENCLAW_VERSION=2026.5.2 -ARG OPENCLAW_VERSION=2026.5.2 +# Override at build time: docker compose build --build-arg OPENCLAW_VERSION=2026.5.7 +ARG OPENCLAW_VERSION=2026.5.7 RUN pnpm add -g openclaw@${OPENCLAW_VERSION} && pnpm approve-builds -g # Optional: install a coding CLI so the coding-agent skill is available. diff --git a/docker-compose.yml b/docker-compose.yml index e37a515..0ca9045 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: build: context: deploy/openclaw args: - OPENCLAW_VERSION: ${OPENCLAW_VERSION:-2026.5.2} + OPENCLAW_VERSION: ${OPENCLAW_VERSION:-2026.5.7} CODING_CLI: ${CODING_CLI:-pi} volumes: - openclaw-data:/root/.openclaw diff --git a/routes/conversation.py b/routes/conversation.py index 4fc189a..b9c6aef 100644 --- a/routes/conversation.py +++ b/routes/conversation.py @@ -237,25 +237,14 @@ def _load_voice_system_prompt() -> str: ) -# Spoken fallbacks for __session_start__ when the LLM returns nothing usable. -# The current temporary primary model (zai/glm-5-turbo — see -# memory/glm-primary-temporary-swap) returns empty / bare-"NO" completions on -# the first turn of a session noticeably more often than MiniMax did. When that -# happens we substitute one of these so the user always hears a greeting on -# connect instead of dead air. (A profile-defined conversation.greeting wins -# over these — see the __session_start__ handling in _conversation_inner.) -_SESSION_START_FALLBACK_GREETINGS = ( - "Hey — I'm here. What can I do for you?", - "Hi there. What's on your mind?", - "I'm listening — what do you need?", - "Hey, I'm here. What's up?", - "Ready when you are — what can I help with?", -) - - -def _pick_session_start_greeting() -> str: - """Pick a varied generic greeting for the empty-__session_start__ fallback.""" - return random.choice(_SESSION_START_FALLBACK_GREETINGS) +# NOTE 2026-05-23: hardcoded fallback greetings REMOVED per feedback_no_hardcoded_responses. +# Previously masked LLM-empty failures on __session_start__ with one of 5 canned +# greetings. That made the broken state invisible — every connect that produced +# silence was being papered over, so we couldn't see the failure rate. The +# right behavior is: if the LLM returns empty, surface the failure clearly +# (silence + warning log) so the bug is visible. The profile's verbatim +# conversation.greeting still wins when defined — that's not hardcoded, it's +# tenant-owned config. def _is_vision_request(msg: str) -> bool: @@ -2218,24 +2207,71 @@ def _retry_gateway(): if not full_response or not full_response.strip(): full_response = "I missed that — my brain glitched for a second. Could you say that again?" - # ── Timeout empty: agent ran but produced nothing in 300s ── - # This is NOT session poisoning — the session is healthy but the - # agent ran out of time (long tool chain, image gen, website build). - # Return a graceful spoken message; do NOT enter recovery. + # ── Slow-empty: LLM ran 5s+ and returned empty ── + # The fast-empty retry path above only covers <5s empties. + # The double-empty branch above only covers post-_retried empties. + # That leaves a 5-30s gap: a single non-retried slow empty + # would fall straight to text_done(None) → "No response from agent + # after recovery" → user sees agent died (observed 2026-05-23 + # on bhb: 16882ms + 17370ms empties, both fell through). + # + # Try Z.AI direct (bypasses gateway and any poisoned openclaw + # session state) — same code path the double-empty branch uses. + # Only fall back to the spoken apology if Z.AI direct also fails. + # __session_start__ is handled by the dedicated greeting branch below. if _is_empty and not getattr(stream_response, '_retried', False) \ - and metrics.get('llm_inference_ms', 0) >= 30000: - if user_message == '__session_start__': - full_response = "Hey, give me just a moment — I'm getting started." - else: - full_response = ( - "That took a bit longer than expected on my end. " - "I'm still here — try again and I'll get right to it." + and metrics.get('llm_inference_ms', 0) >= 5000: + if user_message != '__session_start__': + try: + import requests as _req + _zai_key = os.environ.get('ZAI_API_KEY', '') + _fallback_msg = message_with_context if message_with_context else user_message + _fallback_system = _load_voice_system_prompt() + if _zai_key: + _zai_resp = _req.post( + 'https://api.z.ai/api/anthropic/v1/messages', + headers={ + 'x-api-key': _zai_key, + 'anthropic-version': '2023-06-01', + 'content-type': 'application/json', + }, + json={ + 'model': 'glm-5-turbo', + 'max_tokens': 1500, + 'system': _fallback_system, + 'messages': [{'role': 'user', 'content': _fallback_msg}], + }, + timeout=20, + ) + if _zai_resp.status_code == 200: + _zai_data = _zai_resp.json() + _zai_text = _zai_data.get('content', [{}])[0].get('text', '') + if _zai_text: + full_response = _zai_text + metrics['fallback_used'] = 1 + metrics['profile'] = 'zai-direct-slow-empty' + logger.info( + f"### SLOW-EMPTY Z.AI direct fallback succeeded " + f"({metrics['llm_inference_ms']}ms gateway empty → " + f"{len(_zai_text)} chars direct)" + ) + except Exception as _fbe: + logger.error(f'### Slow-empty Z.AI fallback failed: {_fbe}') + + # Z.AI direct didn't return text either — graceful apology + if not full_response or not full_response.strip(): + if user_message == '__session_start__': + full_response = "Hey, give me just a moment — I'm getting started." + else: + full_response = ( + "That took a bit longer than expected on my end. " + "I'm still here — try again and I'll get right to it." + ) + metrics['fallback_used'] = 1 + logger.warning( + f"### SLOW EMPTY ({metrics['llm_inference_ms']}ms) — " + f"Z.AI direct also failed, using apology" ) - metrics['fallback_used'] = 1 - logger.warning( - f"### TIMEOUT EMPTY ({metrics['llm_inference_ms']}ms) — " - f"graceful fallback, no session recovery" - ) # ── __session_start__ must ALWAYS produce a spoken greeting ── # GLM-5-turbo (current temporary primary, see @@ -2258,16 +2294,21 @@ def _retry_gateway(): _gs_norm = _gs.upper().rstrip('.!?') _gs_tag_only = bool(_gs) and re.match(r'^\s*(\[[^\]]+\]\s*)+$', _gs) if (not _gs) or _gs_norm in ('NO', 'YES') or _gs_tag_only: - _fb_greeting = (_profile_greeting or '').strip() or _pick_session_start_greeting() + # ONLY use a profile-defined greeting (tenant config, not hardcoded). + # If no profile greeting, leave empty — the silence is the diagnostic + # signal that the LLM failed on __session_start__. + # (feedback_no_hardcoded_responses — 2026-05-23 removal of canned list) + _fb_greeting = (_profile_greeting or '').strip() logger.warning( f"### SESSION_START produced no usable greeting " f"(was {full_response!r}, {metrics.get('llm_inference_ms')}ms) " - f"— substituting fallback greeting: {_fb_greeting!r}" + f"— profile_greeting={_fb_greeting!r} (empty = silence by design)" ) - full_response = _fb_greeting - metrics['fallback_used'] = 1 + full_response = _fb_greeting # may be '' — that's the right diagnostic signal + metrics['fallback_used'] = 1 if _fb_greeting else 0 + metrics['llm_empty_session_start'] = 1 # Drop any partial / bare-token TTS buffered from the - # broken turn so only the fallback greeting is spoken. + # broken turn so only the (profile) greeting is spoken, if any. _tts_buf = '' _tts_pending.clear() diff --git a/routes/report_issue.py b/routes/report_issue.py index f9996b9..1e702a0 100644 --- a/routes/report_issue.py +++ b/routes/report_issue.py @@ -72,18 +72,28 @@ def submit_issue(): 'ua': request.headers.get('User-Agent', ''), } - # Always save locally - REPORTS_DIR.mkdir(parents=True, exist_ok=True) + # Always save locally. Never let a filesystem error bubble up as an HTML 500 + # page — the frontend does res.json() and an HTML body yields the cryptic + # "Unexpected token '<', " dict: + history_text = '\n'.join( + f"Scene {i+1}: {s.get('title','')} — {s.get('summary','')}" + for i, s in enumerate(scene_history) + ) + user_msg = f"""Genre: {genre} +Tone: {tone} +Story so far: +{history_text} + +Player chose: "{choice_text}" + +Generate the next scene JSON.""" + + r = httpx.post( + 'https://api.openai.com/v1/chat/completions', + json={ + 'model': 'gpt-4o-mini', + 'messages': [ + {'role': 'system', 'content': SCENE_SYSTEM}, + {'role': 'user', 'content': user_msg}, + ], + 'temperature': 0.85, + 'max_tokens': 1200, + 'response_format': {'type': 'json_object'}, + }, + headers={'Authorization': f'Bearer {OPENAI_API_KEY}'}, + timeout=30.0, + ) + r.raise_for_status() + content = r.json()['choices'][0]['message']['content'] + return json.loads(content) + + +# ── Asset generators ────────────────────────────────────────────────────────── + +def gen_image(prompt: str, out_path: Path) -> bool: + try: + r = httpx.post( + 'https://router.huggingface.co/hf-inference/models/black-forest-labs/FLUX.1-schnell', + content=json.dumps({'inputs': prompt}).encode(), + headers={'Authorization': f'Bearer {HF_TOKEN}', 'Content-Type': 'application/json'}, + timeout=60.0, + ) + if r.status_code == 200 and 'image' in r.headers.get('content-type', ''): + out_path.write_bytes(r.content) + return True + logger.error(f'[story] image gen failed: {r.status_code} {r.text[:200]}') + except Exception as e: + logger.error(f'[story] image gen error: {e}') + return False + + +def gen_suno_sound(prompt: str, loop: bool, key: str, out_path: Path) -> bool: + try: + r = httpx.post( + 'https://api.sunoapi.org/api/v1/generate/sounds', + json={'prompt': prompt, 'model': 'V5_5', 'soundLoop': loop, 'soundKey': key}, + headers={'Authorization': f'Bearer {SUNO_API_KEY}', 'Content-Type': 'application/json'}, + timeout=30.0, + ) + if r.status_code != 200: + logger.error(f'[story] suno submit failed: {r.text[:200]}') + return False + task_id = r.json().get('data', {}).get('taskId') + for _ in range(40): + time.sleep(5) + poll = httpx.get( + f'https://api.sunoapi.org/api/v1/generate/record-info?taskId={task_id}', + headers={'Authorization': f'Bearer {SUNO_API_KEY}'}, + timeout=15.0, + ) + pdata = poll.json().get('data', {}) + if pdata.get('status') == 'SUCCESS': + clips = pdata.get('response', {}).get('sunoData', []) + if clips: + url = clips[0].get('sourceAudioUrl') or clips[0].get('audioUrl') + dl = httpx.get(url, timeout=30.0, follow_redirects=True) + out_path.write_bytes(dl.content) + return True + elif pdata.get('status') == 'FAILED': + logger.error(f'[story] suno generation failed') + return False + except Exception as e: + logger.error(f'[story] suno error: {e}') + return False + + +def gen_tts_line(text: str, char_key: str, out_path: Path) -> bool: + voice = STORY_VOICES.get(char_key, STORY_VOICES['narrator']) + ssml = f'{text}' + try: + r = httpx.post( + 'https://f.cluster.resemble.ai/stream', + json={'voice_uuid': voice['uuid'], 'data': ssml, 'precision': 'PCM_16', 'sample_rate': 24000}, + headers={'Authorization': f'Bearer {RESEMBLE_KEY}', 'Content-Type': 'application/json'}, + timeout=30.0, + ) + if r.status_code == 200 and len(r.content) > 100: + out_path.write_bytes(r.content) + return True + logger.error(f'[story] tts failed: {r.status_code}') + except Exception as e: + logger.error(f'[story] tts error: {e}') + return False + + +# ── Main endpoint ───────────────────────────────────────────────────────────── + +@story_bp.route('/api/story/generate-scene', methods=['POST']) +def generate_scene(): + data = request.get_json(force=True) + story_id = data.get('story_id', 'story-unknown') + choice_text = data.get('choice_text', '') + scene_history = data.get('scene_history', []) + genre = data.get('genre', 'fantasy') + tone = data.get('tone', 'mysterious') + scene_index = data.get('scene_index', 1) # which scene number this is + + if not choice_text: + return jsonify({'error': 'choice_text required'}), 400 + + # 1. Generate scene JSON via LLM + try: + scene_data = generate_scene_json(choice_text, scene_history, genre, tone) + except Exception as e: + logger.error(f'[story] LLM error: {e}') + return jsonify({'error': f'Scene generation failed: {e}'}), 500 + + scene_id = f'scene_{scene_index:03d}' + story_dir = CANVAS_PAGES_DIR / 'stories' / story_id + story_dir.mkdir(parents=True, exist_ok=True) + try: + story_dir.chmod(0o777) + (CANVAS_PAGES_DIR / 'stories').chmod(0o777) + except Exception: + pass + + # 2. Generate all assets in parallel + threads = [] + errors = [] + + # Image + image_file = story_dir / f'{scene_id}_image.jpg' + def do_image(): + if not gen_image(scene_data.get('image_prompt', 'dark fantasy scene'), image_file): + errors.append('image') + threads.append(threading.Thread(target=do_image)) + + # Ambient sound + ambient = scene_data.get('ambient', {}) + ambient_file = story_dir / f'{scene_id}_ambient.mp3' + def do_ambient(): + if not gen_suno_sound( + ambient.get('prompt', 'dark ambient atmosphere'), + True, + ambient.get('soundKey', 'Am'), + ambient_file + ): + errors.append('ambient') + threads.append(threading.Thread(target=do_ambient)) + + # SFX sounds + sfx_list = scene_data.get('sfx', []) + for sfx in sfx_list: + sfx_file = story_dir / f'{scene_id}_{sfx["id"]}.mp3' + def do_sfx(s=sfx, f=sfx_file): + gen_suno_sound(s.get('prompt', 'sound effect'), False, 'Any', f) + threads.append(threading.Thread(target=do_sfx)) + + # TTS lines + script = scene_data.get('script', []) + for i, line in enumerate(script): + line_file = story_dir / f'{scene_id}_line_{i:02d}.wav' + def do_tts(l=line, lf=line_file): + gen_tts_line(l.get('text', ''), l.get('character', 'narrator'), lf) + threads.append(threading.Thread(target=do_tts)) + + for t in threads: t.start() + for t in threads: t.join() + + # 3. Build the scene object the canvas needs (with resolved file paths) + base = f'stories/{story_id}' + + # Rebuild sounds array from scene_data + resolved paths + sounds = [] + if ambient_file.exists(): + sounds.append({ + 'id': 'ambient', + 'role': 'ambient', + 'file': f'{base}/{scene_id}_ambient.mp3', + 'trigger': 'scene_start', + 'volume': ambient.get('volume', 0.35), + }) + for sfx in sfx_list: + sfx_file = story_dir / f'{scene_id}_{sfx["id"]}.mp3' + if sfx_file.exists(): + sounds.append({ + 'id': sfx['id'], + 'role': 'sfx', + 'file': f'{base}/{scene_id}_{sfx["id"]}.mp3', + 'trigger': sfx.get('trigger', 'after_line_0'), + 'volume': sfx.get('volume', 0.8), + 'delay_ms': sfx.get('delay_ms', 0), + }) + + # Rebuild script with resolved audio paths + resolved_script = [] + for i, line in enumerate(script): + line_file = story_dir / f'{scene_id}_line_{i:02d}.wav' + resolved_script.append({ + 'type': line.get('type', 'narration'), + 'character': line.get('character', 'narrator'), + 'text': line.get('text', ''), + 'audio': f'{base}/{scene_id}_line_{i:02d}.wav' if line_file.exists() else None, + }) + + scene_out = { + 'scene_id': scene_id, + 'title': scene_data.get('title', 'Unknown'), + 'image_file': f'{base}/{scene_id}_image.jpg' if image_file.exists() else None, + 'sounds': sounds, + 'script': resolved_script, + 'choices': scene_data.get('choices', []), + } + + return jsonify({'status': 'ready', 'scene': scene_out}) diff --git a/routes/suno.py b/routes/suno.py index 225fb7c..06b1330 100644 --- a/routes/suno.py +++ b/routes/suno.py @@ -5,7 +5,7 @@ Generated songs land in generated_music/ and show up in the music player. Endpoints: - GET/POST /api/suno (action: generate|status|list|credits) + GET/POST /api/suno (action: generate|jingle|sfx|status|list|credits) POST /api/suno/callback (webhook from sunoapi.org) GET/POST /api/suno/completed (frontend polls for completed songs) @@ -40,6 +40,13 @@ GENERATED_MUSIC_DIR.mkdir(parents=True, exist_ok=True) GENERATED_METADATA_FILE = GENERATED_MUSIC_DIR / 'generated_metadata.json' +# SFX (action=sfx) land in a dedicated subdir so they are NOT mixed in with the +# music library/player. Kept under generated_music/ so it's already mounted + +# web-served (/generated_music/sfx/) with no compose/mount changes. The +# music list (_action_list) and music metadata/queue intentionally skip these. +GENERATED_SOUNDS_DIR = GENERATED_MUSIC_DIR / 'sfx' +GENERATED_SOUNDS_DIR.mkdir(parents=True, exist_ok=True) + SUNO_API_KEY = os.environ.get('SUNO_API_KEY', '') SUNO_API_BASE = 'https://api.sunoapi.org' SUNO_WEBHOOK_SECRET = os.environ.get('SUNO_WEBHOOK_SECRET', '') @@ -262,6 +269,9 @@ def handle_suno(): elif action == 'jingle': return _action_jingle(_q, body) + elif action == 'sfx': + return _action_sfx(_q, body) + elif action == 'list_jingles': return _action_list_jingles() @@ -279,7 +289,7 @@ def handle_suno(): return _action_credits() else: - return jsonify({'action': 'error', 'response': f"Unknown action '{action}'. Use: generate, jingle, list_jingles, jingle_styles, status, list, credits"}) + return jsonify({'action': 'error', 'response': f"Unknown action '{action}'. Use: generate, jingle, sfx, list_jingles, jingle_styles, status, list, credits"}) except Exception as exc: logger.exception('Suno endpoint error') @@ -407,6 +417,108 @@ def _action_generate(_q, body: dict): return jsonify({'action': 'error', 'response': f"Couldn't reach Suno API: {exc}"}) +def _action_sfx(_q, body: dict): + """Generate a short non-vocal sound effect / ambient stinger. + + Wraps sunoapi.org's "Sounds Generation (V5)" endpoint + (POST /api/v1/generate/sounds, 2.5 credits). This is NOT a jingle or a song + — it produces game SFX, UI blips, stingers, ambient beds, etc. with no + vocals. Returns a taskId in the same shape as /generate, so the normal + `action=status` poller downloads + saves the clip when ready. + + Inputs: + prompt — required, what the sound should be (max 500 chars). + e.g. "retro 8-bit coin pickup blip", "wooden mallet thwack", + "ominous low brass sting", "arcade game-over jingle no vocals" + title — optional label used for the saved filename + metadata. + loop — optional bool; soundLoop (seamless looping bed). Default false. + tempo — optional int 1-300; soundTempo (BPM). Omit for auto. + key — optional musical key (Any, Cm, C#m, ... B). Omit for Any. + """ + prompt = (_q('prompt') or body.get('prompt', '')).strip() + if not prompt: + return jsonify({'action': 'error', 'response': "Need a description of the sound — e.g. 'retro 8-bit coin pickup blip'."}) + prompt = prompt[:500] + + title = (_q('title') or body.get('title', '')).strip() + + loop_raw = _q('loop') or body.get('loop', False) + if isinstance(loop_raw, bool): + sound_loop = loop_raw + else: + sound_loop = str(loop_raw).lower() in ('true', '1', 'yes') + + request_body = { + 'prompt': prompt, + # V5_5 matches the proven story.py path (routes/story.py gen_suno_sound, + # shipped 2026-05-28). sunoapi.org docs say "V5 only" for this endpoint + # but production uses V5_5 successfully with better quality — match it. + 'model': 'V5_5', + 'soundLoop': sound_loop, + } + + # Optional tempo (BPM 1-300) + tempo_raw = _q('tempo') or body.get('tempo', '') + if tempo_raw not in (None, ''): + try: + tempo = int(tempo_raw) + if 1 <= tempo <= 300: + request_body['soundTempo'] = tempo + except (TypeError, ValueError): + pass + + # Optional musical key + key = (_q('key') or body.get('key', '')).strip() + if key and key.lower() != 'any': + request_body['soundKey'] = key + + # NOTE: deliberately NO callBackUrl for SFX. SFX complete via polling + # (action=status), which routes them to the sounds subdir and keeps them out + # of the music library. The webhook callback path registers results as music, + # so skipping it prevents SFX from leaking into the music player. + + logger.info(f'Suno sfx: loop={sound_loop} prompt={prompt[:80]}') + + try: + resp = http_requests.post( + f'{SUNO_API_BASE}/api/v1/generate/sounds', + headers={'Authorization': f'Bearer {SUNO_API_KEY}', 'Content-Type': 'application/json'}, + json=request_body, + timeout=30, + ) + logger.info(f'Suno sfx response: {resp.status_code} {resp.text[:300]}') + + if resp.status_code == 200: + data = resp.json() + if data.get('code') == 200 and data.get('data', {}).get('taskId'): + task_id = data['data']['taskId'] + job_id = str(uuid.uuid4()) + suno_jobs[job_id] = { + 'status': 'generating', + 'prompt': prompt, + 'title': title or prompt[:60], + 'style': 'sfx', + 'kind': 'sfx', + 'task_id': task_id, + 'created_at': time.time(), + } + return jsonify({ + 'action': 'generating', + 'job_id': job_id, + 'task_id': task_id, + 'kind': 'sfx', + 'response': f"Generating sound: '{title or prompt[:40]}' — check back in ~20-40 seconds.", + 'estimated_seconds': 30, + }) + else: + return jsonify({'action': 'error', 'response': f"Suno SFX error: {data.get('msg', 'Unknown error')}"}) + else: + return jsonify({'action': 'error', 'response': f'Suno SFX HTTP {resp.status_code}: {resp.text[:200]}'}) + + except http_requests.RequestException as exc: + return jsonify({'action': 'error', 'response': f"Couldn't reach Suno API: {exc}"}) + + def _action_jingle(_q, body: dict): """Generate a 10-15 second vocal-logo jingle of a brand name. @@ -645,7 +757,11 @@ def _action_status(job_id: str): # Suno returns 2 clips per generation — only take the first one songs = songs[:1] if songs else [] for song in songs: - audio_url = song.get('audioUrl') or song.get('audio_url') + # sourceAudioUrl is the original/high-quality URL the + # sounds endpoint returns (see routes/story.py); kept as + # a fallback so SFX jobs download correctly. Additive — + # songs/jingles still prefer audioUrl, unchanged. + audio_url = song.get('audioUrl') or song.get('audio_url') or song.get('sourceAudioUrl') if not audio_url: continue song_id = song.get('id', task_id) @@ -657,8 +773,12 @@ def _action_status(job_id: str): song_title = job.get('title') or job.get('prompt', '')[:60] or 'Generated Track' duration = song.get('duration', 0) slug = _slugify_title(song_title) - filename = _unique_filename(GENERATED_MUSIC_DIR, slug) - save_path = GENERATED_MUSIC_DIR / filename + # SFX go to the dedicated sounds subdir, separate from music. + _is_sfx = job.get('kind') == 'sfx' + _dir = GENERATED_SOUNDS_DIR if _is_sfx else GENERATED_MUSIC_DIR + _url_base = '/generated_music/sfx' if _is_sfx else '/generated_music' + filename = _unique_filename(_dir, slug) + save_path = _dir / filename if not save_path.exists(): if not _is_safe_download_url(audio_url): @@ -684,49 +804,54 @@ def _action_status(job_id: str): logger.warning(f'Suno download failed: {audio_resp.status_code}') continue - # Save metadata — propagate jingle fields if this was a jingle job kind = job.get('kind', 'song') - extra = {} - if kind == 'jingle': - extra = { - 'brand': job.get('brand', ''), - 'style_key': job.get('style_key', ''), - 'vocal_gender': job.get('vocal_gender', ''), - 'instrumental': job.get('instrumental', False), - } - # Capture lyrics from Suno response if present song_lyrics = song.get('prompt', '') or song.get('lyrics', '') - if song_lyrics: - extra['lyrics'] = song_lyrics - _add_song_to_metadata( - filename=filename, - title=song_title, - prompt=job.get('prompt', ''), - style=job.get('style', ''), - duration=duration, - song_id=song_id, - kind=kind, - extra=extra, - ) + + # SFX are NOT music — keep them out of the music metadata + # AND the music player's completed-songs queue so they + # don't pollute the music library. The status response + # below still returns the URL so the caller gets the clip. + if not _is_sfx: + extra = {} + if kind == 'jingle': + extra = { + 'brand': job.get('brand', ''), + 'style_key': job.get('style_key', ''), + 'vocal_gender': job.get('vocal_gender', ''), + 'instrumental': job.get('instrumental', False), + } + if song_lyrics: + extra['lyrics'] = song_lyrics + _add_song_to_metadata( + filename=filename, + title=song_title, + prompt=job.get('prompt', ''), + style=job.get('style', ''), + duration=duration, + song_id=song_id, + kind=kind, + extra=extra, + ) # Update job job['status'] = 'complete' job['song_id'] = song_id job['title'] = song_title - job['url'] = f'/generated_music/{filename}' - - # Notify frontend poller - completed_songs_queue.append({ - 'song_id': song_id, - 'filename': filename, - 'title': song_title, - 'job_id': job_id, - 'kind': kind, - 'url': f'/generated_music/{filename}', - 'completed_at': datetime.now().isoformat(), - 'prompt': job.get('prompt', ''), - 'lyrics': song_lyrics, - }) + job['url'] = f'{_url_base}/{filename}' + + # Notify frontend poller — music only (SFX skip the music queue) + if not _is_sfx: + completed_songs_queue.append({ + 'song_id': song_id, + 'filename': filename, + 'title': song_title, + 'job_id': job_id, + 'kind': kind, + 'url': f'{_url_base}/{filename}', + 'completed_at': datetime.now().isoformat(), + 'prompt': job.get('prompt', ''), + 'lyrics': song_lyrics, + }) return jsonify({ 'action': 'complete', @@ -734,7 +859,7 @@ def _action_status(job_id: str): 'job_id': job_id, 'song_id': song_id, 'title': song_title, - 'url': f'/generated_music/{filename}', + 'url': f'{_url_base}/{filename}', 'response': f"Done! '{song_title}' is ready to spin!", }) diff --git a/server.py b/server.py index 953ce2e..e7dd8f8 100644 --- a/server.py +++ b/server.py @@ -166,6 +166,9 @@ def _extract_page_version(path, max_lines=5): from routes.suno import suno_bp app.register_blueprint(suno_bp) +from routes.story import story_bp +app.register_blueprint(story_bp) + from routes.airadio_bridge import airadio_bp app.register_blueprint(airadio_bp) diff --git a/services/gateways/compat.py b/services/gateways/compat.py index ee0d97f..b707d17 100644 --- a/services/gateways/compat.py +++ b/services/gateways/compat.py @@ -24,7 +24,7 @@ PROTOCOL_MAX = 5 # forward-compatible — OpenClaw ignores unsupported maxes # Version this code was tested against (for warning logs). -OPENCLAW_TESTED_VERSION = "2026.5.2" +OPENCLAW_TESTED_VERSION = "2026.5.7" OPENCLAW_MIN_VERSION = "2026.3.1" diff --git a/setup-sudo.sh b/setup-sudo.sh index b907445..af32f44 100755 --- a/setup-sudo.sh +++ b/setup-sudo.sh @@ -14,7 +14,7 @@ EMAIL="your@email.com" # ← EDIT: for Let's Encrypt notifications SERVICE_NAME="openvoiceui" RUN_USER="${SUDO_USER:-$(whoami)}" WWW_DIR="/var/www/${SERVICE_NAME}" # canvas pages + any web assets -OPENCLAW_TESTED_VERSION="2026.5.2" # pinned: the openclaw version tested with this release +OPENCLAW_TESTED_VERSION="2026.5.7" # pinned: the openclaw version tested with this release # ──────────────────────────────────────────────────────────────────────────── # Guard: refuse to run with placeholder values diff --git a/src/app.js b/src/app.js index a9afde8..3eb2a85 100644 --- a/src/app.js +++ b/src/app.js @@ -578,8 +578,11 @@ connectAiradio(); } this.currentMood = mood; - // Propagate mood to BigHeadFace if active + // Propagate mood to BigHeadFace + HaloSmokeFace if active. + // HaloSmoke collapses non-'thinking' moods to its idle state, + // which is how it clears the dots animation when a turn ends. window.BigHeadFace?.setMood(mood); + window.HaloSmokeFace?.setMood(mood); }, blink() { @@ -2732,7 +2735,19 @@ connectAiradio(); context: this._gatherContext(), }), }); - const data = await res.json(); + // Guard: a non-JSON body (e.g. an HTML error/login page) would + // make res.json() throw the cryptic "Unexpected token '<'" error. + // Surface a clean message based on status instead. + let data; + try { + data = await res.json(); + } catch (_) { + throw new Error( + res.status === 401 || res.status === 403 + ? 'Not signed in — please sign in and try again.' + : `Server returned ${res.status}. Please try again.` + ); + } if (data.ok) { if (this._statusEl) { this._statusEl.textContent = '✓ Report submitted. Thank you!'; this._statusEl.className = 'irm-status success'; } setTimeout(() => this.close(), 1800); @@ -3782,6 +3797,7 @@ connectAiradio(); if (this._ttsGuardTimer) { clearTimeout(this._ttsGuardTimer); this._ttsGuardTimer = null; } // Abort any in-flight fetch so streaming stops immediately if (this._fetchAbortController) { + this._abortReason = 'stop'; this._fetchAbortController.abort(); this._fetchAbortController = null; // Tell server to abort the openclaw run (fire-and-forget) @@ -3854,6 +3870,7 @@ connectAiradio(); // instead: abort the tail, then fall through to the normal // sendMessage path. console.warn(`↩ POST-TEXT_DONE message — treating as fresh request: "${text.substring(0,30)}"`); + this._abortReason = 'user'; this._fetchAbortController.abort(); this._fetchAbortController = null; fetch(`${this.config.serverUrl}${convPath('abort')}`, { @@ -3864,6 +3881,7 @@ connectAiradio(); this.stopAudio(); } else if (this._ttsPlaying) { // Agent already responded, TTS playing → ABORT + this._abortReason = 'user'; this._fetchAbortController.abort(); this._fetchAbortController = null; console.warn(`⛔ ABORT source: ClawdbotMode.sendMessage (TTS playing, new msg: "${text.substring(0,30)}")`); @@ -3987,14 +4005,20 @@ connectAiradio(); const decoder = new TextDecoder(); let buffer = ''; - // Inactivity timeout: abort if no data received for 60s - // (heartbeats arrive every 10-15s during tool execution) + // Inactivity timeout: abort if no data received for this long. + // MUST be >= the server-side run budget (openclaw gateway + // timeout = 300s) so the client never gives up before the + // server does — otherwise long silent work (subagent spawns, + // batch ops) gets cut at the client and shows a false + // "stream timed out". Heartbeats normally arrive every 5-10s + // and reset this; 300s is the hard backstop matching the server. // _inactivityTimer declared in outer scope so finally{} can clear it - const INACTIVITY_TIMEOUT_MS = 60000; + const INACTIVITY_TIMEOUT_MS = 300000; const _resetInactivity = () => { if (_inactivityTimer) clearTimeout(_inactivityTimer); _inactivityTimer = setTimeout(() => { console.warn('[Stream] No data for 60s — aborting'); + this._abortReason = 'inactivity'; this._fetchAbortController?.abort(); }, INACTIVITY_TIMEOUT_MS); }; @@ -4593,11 +4617,25 @@ connectAiradio(); FaceModule.setMood('neutral'); StatusModule.update('idle', 'READY'); TranscriptPanel.removeThinking(); - // If agent was mid-task (had heartbeats), note the redirect - if (this._wasAgentic) { - this._wasAgentic = false; + // Label the abort by its ACTUAL cause — only an explicit + // user interrupt is "redirected by user". An inactivity + // timeout (agent went silent during long work) or a call + // stop is NOT a user redirect, and mislabeling it confused + // users ("why does it say redirected when I didn't?"). + const _reason = this._abortReason; + this._abortReason = null; + const _wasAgentic = this._wasAgentic; + this._wasAgentic = false; + if (_reason === 'user') { TranscriptPanel.finalizeStreaming('🔀 Redirected.'); ActionConsole.addEntry('system', 'Task redirected by user'); + } else if (_reason === 'inactivity') { + TranscriptPanel.finalizeStreaming('⏳ Agent went quiet — stream timed out.'); + ActionConsole.addEntry('system', 'Stream timed out (agent silent 60s) — not a user action'); + } else if (_wasAgentic && !_reason) { + // Unknown-source abort during agentic work — don't blame + // the user; just close the stream quietly. + TranscriptPanel.finalizeStreaming(null); } else { TranscriptPanel.finalizeStreaming(null); } @@ -8318,6 +8356,13 @@ ${meta.artwork ? `` : ''} addMessage(role, text, opts = {}) { if (!this.messages || !text) return; + // Remove any stale thinking bubble before appending an assistant + // response. The streaming path (startStreaming/finalizeStreaming) + // already does this, but the non-streaming response path + // (data.response → addMessage) was leaving the dots floating + // above the rendered reply. + if (role === 'assistant') this.removeThinking(); + const msg = document.createElement('div'); msg.className = `tp-msg ${role === 'user' ? 'user' : 'assistant'}`; diff --git a/src/face/HaloSmokeFace.js b/src/face/HaloSmokeFace.js index 3387ba7..5177673 100644 --- a/src/face/HaloSmokeFace.js +++ b/src/face/HaloSmokeFace.js @@ -505,7 +505,16 @@ window.HaloSmokeFace = (function () { function setThinking(v) { _thinking = !!v; } - return { start, stop, setThinking }; + // Implement the BaseFace mood contract — halo visually collapses all + // moods to two states: 'thinking' → animated dots, anything else → idle. + // Without this, FaceModule.setMood('neutral') after a turn couldn't reach + // halo (it only propagated to BigHeadFace), so the dots animation would + // stay on screen indefinitely. + function setMood(mood) { + _thinking = (mood === 'thinking'); + } + + return { start, stop, setThinking, setMood }; })(); // Self-register with FaceRenderer plugin system diff --git a/src/face/manifest.json b/src/face/manifest.json index 9bef8f6..5f6824d 100644 --- a/src/face/manifest.json +++ b/src/face/manifest.json @@ -18,7 +18,8 @@ "description": "Halo frequency ring with wispy smoke core — calm at rest, reacts to TTS speech", "module": "/src/face/HaloSmokeFace.js", "preview": "/src/face/previews/orb.svg", - "moods": [], + "moods": ["neutral", "happy", "sad", "angry", "thinking", "surprised", "listening"], + "moods_visual_collapse": "Halo only renders two visual states — 'thinking' = animated dots; any other mood = idle smoke. The full mood vocab is accepted so FaceModule.setMood routing works uniformly.", "features": ["audio-reactive", "smoke", "halo", "speech-reactive"], "configurable": false }