diff --git a/apps/backend/core/auth.py b/apps/backend/core/auth.py index 78faac550e..a5eb13908d 100644 --- a/apps/backend/core/auth.py +++ b/apps/backend/core/auth.py @@ -965,6 +965,14 @@ def get_sdk_env_vars() -> dict[str, str]: if bash_path: env["CLAUDE_CODE_GIT_BASH_PATH"] = bash_path + # Explicitly unset CLAUDECODE in SDK subprocess environment. + # + # Claude Code CLI refuses to start when this marker env var is present, + # treating it as a nested Claude Code session. Auto-Claude can run inside + # a Claude Code terminal during debugging, so we must clear this marker for + # child SDK/CLI processes. + env["CLAUDECODE"] = "" + # Explicitly unset PYTHONPATH in SDK subprocess environment to prevent # pollution of agent subprocess environments. This fixes ACS-251 where # external projects with different Python versions would fail due to diff --git a/apps/backend/core/client.py b/apps/backend/core/client.py index a21e395920..99430e793b 100644 --- a/apps/backend/core/client.py +++ b/apps/backend/core/client.py @@ -18,6 +18,7 @@ import os import threading import time +from collections.abc import Callable from pathlib import Path from typing import Any @@ -536,6 +537,7 @@ def create_client( betas: list[str] | None = None, effort_level: str | None = None, fast_mode: bool = False, + stderr_callback: Callable[[str], None] | None = None, ) -> ClaudeSDKClient: """ Create a Claude Agent SDK client with multi-layered security. @@ -571,6 +573,8 @@ def create_client( the "user" setting source so the CLI reads fastMode from ~/.claude/settings.json. Requires extra usage enabled on Claude subscription; falls back to standard speed automatically. + stderr_callback: Optional callback invoked with each stderr line emitted + by the Claude CLI process. Returns: Configured ClaudeSDKClient @@ -959,6 +963,9 @@ def create_client( "enable_file_checkpointing": True, } + if stderr_callback: + options_kwargs["stderr"] = stderr_callback + # Fast mode: enable user setting source so CLI reads fastMode from # ~/.claude/settings.json. Without this, the SDK's default --setting-sources "" # blocks all filesystem settings and the CLI never sees fastMode: true. diff --git a/apps/backend/prompts/spec_quick.md b/apps/backend/prompts/spec_quick.md index a9050b7024..629f3430d2 100644 --- a/apps/backend/prompts/spec_quick.md +++ b/apps/backend/prompts/spec_quick.md @@ -31,46 +31,46 @@ That's it. No deep analysis needed. ## PHASE 2: CREATE MINIMAL SPEC -Create a concise `spec.md`: +Create a concise `spec.md` that includes the required validator sections: ```bash cat > spec.md << 'EOF' -# Quick Spec: [Task Name] +# Specification: [Task Name] -## Task -[One sentence description] +## Overview +[One paragraph describing what is being changed and why] -## Files to Modify +## Workflow Type +**Type**: simple + +## Task Scope +### Files to Modify - `[path/to/file]` - [what to change] -## Change Details +### Change Details [Brief description of the change - a few sentences max] -## Verification +## Success Criteria - [ ] [How to verify the change works] - -## Notes -[Any gotchas or considerations - optional] EOF ``` -**Keep it short!** A simple spec should be 20-50 lines, not 200+. +**Keep it short!** A simple spec should be concise and focused. --- ## PHASE 3: CREATE SIMPLE PLAN -Create `implementation_plan.json`: +Create `implementation_plan.json` with current schema fields: ```bash cat > implementation_plan.json << 'EOF' { - "spec_name": "[spec-name]", + "feature": "[task description]", "workflow_type": "simple", - "total_phases": 1, - "recommended_workers": 1, "phases": [ { + "id": "phase-1", "phase": 1, "name": "Implementation", "description": "[task description]", @@ -86,17 +86,19 @@ cat > implementation_plan.json << 'EOF' "patterns_from": [], "verification": { "type": "manual", - "run": "[verification step]" + "instructions": "[verification step]" } } ] } ], - "metadata": { - "created_at": "[timestamp]", - "complexity": "simple", - "estimated_sessions": 1 - } + "summary": { + "total_phases": 1, + "total_subtasks": 1, + "recommended_workers": 1 + }, + "created_at": "[timestamp]", + "updated_at": "[timestamp]" } EOF ``` @@ -146,18 +148,22 @@ Ready for implementation. **spec.md**: ```markdown -# Quick Spec: Button Color Change +# Specification: Button Color Change -## Task +## Overview Update primary button color from blue (#3B82F6) to green (#22C55E). -## Files to Modify +## Workflow Type +**Type**: simple + +## Task Scope +### Files to Modify - `src/components/Button.tsx` - Update color constant -## Change Details +### Change Details Change the `primaryColor` variable from `#3B82F6` to `#22C55E`. -## Verification +## Success Criteria - [ ] Buttons appear green in the UI - [ ] No console errors ``` @@ -168,18 +174,22 @@ Change the `primaryColor` variable from `#3B82F6` to `#22C55E`. **spec.md**: ```markdown -# Quick Spec: Fix Welcome Typo +# Specification: Fix Welcome Typo -## Task +## Overview Correct spelling of "recieve" to "receive" in welcome message. -## Files to Modify +## Workflow Type +**Type**: simple + +## Task Scope +### Files to Modify - `src/pages/Home.tsx` - Fix typo on line 42 -## Change Details +### Change Details Find "You will recieve" and change to "You will receive". -## Verification +## Success Criteria - [ ] Welcome message displays correctly ``` diff --git a/apps/backend/spec/phases/spec_phases.py b/apps/backend/spec/phases/spec_phases.py index afb5e1a29e..2a705159b2 100644 --- a/apps/backend/spec/phases/spec_phases.py +++ b/apps/backend/spec/phases/spec_phases.py @@ -60,9 +60,16 @@ async def phase_quick_spec(self) -> PhaseResult: plan_file = self.spec_dir / "implementation_plan.json" if spec_file.exists() and plan_file.exists(): - self.ui.print_status("Quick spec already exists", "success") - return PhaseResult( - "quick_spec", True, [str(spec_file), str(plan_file)], [], 0 + spec_valid = self.spec_validator.validate_spec_document().valid + plan_valid = self.spec_validator.validate_implementation_plan().valid + if spec_valid and plan_valid: + self.ui.print_status("Quick spec already exists", "success") + return PhaseResult( + "quick_spec", True, [str(spec_file), str(plan_file)], [], 0 + ) + + self.ui.print_status( + "Quick spec files exist but are invalid, regenerating...", "warning" ) is_greenfield = self._check_and_log_greenfield() @@ -91,15 +98,46 @@ async def phase_quick_spec(self) -> PhaseResult: phase_name="quick_spec", ) - if success and spec_file.exists(): + if success: # Create minimal plan if agent didn't if not plan_file.exists(): writer.create_minimal_plan(self.spec_dir, self.task_description) - self.ui.print_status("Quick spec created", "success") - return PhaseResult( - "quick_spec", True, [str(spec_file), str(plan_file)], [], attempt + spec_valid = ( + spec_file.exists() + and self.spec_validator.validate_spec_document().valid + ) + plan_valid = ( + plan_file.exists() + and self.spec_validator.validate_implementation_plan().valid + ) + + if not plan_valid and plan_file.exists(): + from ..validate_pkg.auto_fix import auto_fix_plan + + if auto_fix_plan(self.spec_dir): + plan_valid = ( + self.spec_validator.validate_implementation_plan().valid + ) + + if spec_valid and plan_valid: + self.ui.print_status("Quick spec created", "success") + return PhaseResult( + "quick_spec", + True, + [str(spec_file), str(plan_file)], + [], + attempt, + ) + + errors.append( + f"Attempt {attempt + 1}: Quick spec output invalid " + f"(spec_valid={spec_valid}, plan_valid={plan_valid})" + ) + self.ui.print_status( + "Quick spec created files but validation failed", "error" ) + continue errors.append(f"Attempt {attempt + 1}: Quick spec agent failed") diff --git a/apps/backend/spec/pipeline/agent_runner.py b/apps/backend/spec/pipeline/agent_runner.py index 4ebe0ff6c1..e4a9eb906a 100644 --- a/apps/backend/spec/pipeline/agent_runner.py +++ b/apps/backend/spec/pipeline/agent_runner.py @@ -5,8 +5,11 @@ Handles the execution of AI agents for the spec creation pipeline. """ +import logging from pathlib import Path +from agents.tools_pkg.models import AGENT_CONFIGS + # Configure safe encoding before any output (fixes Windows encoding errors) from ui.capabilities import configure_safe_encoding @@ -25,10 +28,36 @@ # The import chain: spec.pipeline -> agent_runner -> core.client -> agents.tools_pkg -> spec.validate_pkg # By deferring the import, we break the circular dependency. +logger = logging.getLogger(__name__) + class AgentRunner: """Manages agent execution with logging and error handling.""" + PHASE_AGENT_MAP = { + "discovery": "spec_discovery", + "requirements": "spec_gatherer", + "historical_context": "spec_gatherer", + "research": "spec_researcher", + "context": "spec_context", + "spec_writing": "spec_writer", + "quick_spec": "spec_writer", + "self_critique": "spec_critic", + "planning": "planner", + "validation": "spec_validation", + "complexity_assessment": "spec_gatherer", + } + + PROMPT_AGENT_MAP = { + "spec_researcher.md": "spec_researcher", + "spec_writer.md": "spec_writer", + "spec_quick.md": "spec_writer", + "spec_critic.md": "spec_critic", + "planner.md": "planner", + "validation_fixer.md": "spec_validation", + "complexity_assessor.md": "spec_gatherer", + } + def __init__( self, project_dir: Path, @@ -57,6 +86,7 @@ async def run_agent( thinking_budget: int | None = None, thinking_level: str = "medium", prior_phase_summaries: str | None = None, + phase_name: str | None = None, ) -> tuple[bool, str]: """Run an agent with the given prompt. @@ -67,6 +97,7 @@ async def run_agent( thinking_budget: Token budget for extended thinking (None = disabled) thinking_level: Thinking level string (low, medium, high) prior_phase_summaries: Summaries from previous phases for context + phase_name: Optional phase name used to select agent profile Returns: Tuple of (success, response_text) @@ -131,6 +162,20 @@ async def run_agent( resolve_model_id, ) + agent_type = self._resolve_agent_type(prompt_file, phase_name) + if agent_type not in AGENT_CONFIGS: + debug_error( + "agent_runner", + f"Unknown agent type resolved for prompt {prompt_file}", + phase_name=phase_name, + agent_type=agent_type, + ) + return ( + False, + f"Unknown spec agent type '{agent_type}' for prompt {prompt_file}" + + (f" (phase: {phase_name})" if phase_name else ""), + ) + betas = get_model_betas(self.model) fast_mode = get_fast_mode(self.spec_dir) debug( @@ -142,12 +187,24 @@ async def run_agent( resolved_model, thinking_level or "medium" ) + captured_stderr_lines: list[str] = [] + + def _capture_stderr_line(line: str) -> None: + if not line: + return + captured_stderr_lines.append(line) + # Keep only the most recent lines to prevent unbounded memory growth + if len(captured_stderr_lines) > 200: + del captured_stderr_lines[:-200] + client = create_client( self.project_dir, self.spec_dir, resolved_model, + agent_type=agent_type, betas=betas, fast_mode=fast_mode, + stderr_callback=_capture_stderr_line, **thinking_kwargs, ) @@ -255,14 +312,58 @@ async def run_agent( return True, response_text except Exception as e: + error_text = str(e) + stderr_text = getattr(e, "stderr", None) + if ( + stderr_text + and stderr_text.strip() == "Check stderr output for details" + and captured_stderr_lines + ): + stderr_text = "\n".join(captured_stderr_lines) + + if stderr_text: + logger.error( + "Spec agent stderr [prompt=%s phase=%s]:\n%s", + prompt_file, + phase_name or "-", + stderr_text, + ) + error_text = f"{error_text}\n--- STDERR START ---\n{stderr_text}\n--- STDERR END ---" + + detailed_error = ( + f"{type(e).__name__}: {error_text}" + f" [prompt={prompt_file}]" + + (f" [phase={phase_name}]" if phase_name else "") + ) debug_error( "agent_runner", - f"Agent session error: {e}", + f"Agent session error: {detailed_error}", exception_type=type(e).__name__, ) if self.task_logger: - self.task_logger.log_error(f"Agent error: {e}", LogPhase.PLANNING) - return False, str(e) + self.task_logger.log_error( + f"Agent error ({prompt_file}{f'/{phase_name}' if phase_name else ''}): {detailed_error}", + LogPhase.PLANNING, + ) + return False, detailed_error + + @classmethod + def _resolve_agent_type( + cls, + prompt_file: str, + phase_name: str | None, + ) -> str: + """Resolve the spec pipeline agent type for the current prompt/phase.""" + normalized_prompt = (prompt_file or "").strip() + normalized_phase = (phase_name or "").strip().lower() + + if normalized_prompt in cls.PROMPT_AGENT_MAP: + return cls.PROMPT_AGENT_MAP[normalized_prompt] + + if normalized_phase in cls.PHASE_AGENT_MAP: + return cls.PHASE_AGENT_MAP[normalized_phase] + + return "spec_writer" @staticmethod def _extract_tool_input_display(inp: dict) -> str | None: diff --git a/apps/backend/spec/pipeline/orchestrator.py b/apps/backend/spec/pipeline/orchestrator.py index 3f6a567cd0..bb07abe719 100644 --- a/apps/backend/spec/pipeline/orchestrator.py +++ b/apps/backend/spec/pipeline/orchestrator.py @@ -162,6 +162,7 @@ async def _run_agent( thinking_budget=thinking_budget, thinking_level=self.thinking_level, prior_phase_summaries=prior_summaries if prior_summaries else None, + phase_name=phase_name, ) async def _store_phase_summary(self, phase_name: str) -> None: diff --git a/apps/backend/spec/writer.py b/apps/backend/spec/writer.py index 6f59934dae..f642c2ca8a 100644 --- a/apps/backend/spec/writer.py +++ b/apps/backend/spec/writer.py @@ -12,13 +12,13 @@ def create_minimal_plan(spec_dir: Path, task_description: str) -> Path: """Create a minimal implementation plan for simple tasks.""" + timestamp = datetime.now().isoformat() plan = { - "spec_name": spec_dir.name, + "feature": task_description or spec_dir.name, "workflow_type": "simple", - "total_phases": 1, - "recommended_workers": 1, "phases": [ { + "id": "phase-1", "phase": 1, "name": "Implementation", "description": task_description or "Simple implementation", @@ -34,17 +34,19 @@ def create_minimal_plan(spec_dir: Path, task_description: str) -> Path: "patterns_from": [], "verification": { "type": "manual", - "run": "Verify the change works as expected", + "instructions": "Verify the change works as expected", }, } ], } ], - "metadata": { - "created_at": datetime.now().isoformat(), - "complexity": "simple", - "estimated_sessions": 1, + "summary": { + "total_phases": 1, + "total_subtasks": 1, + "recommended_workers": 1, }, + "created_at": timestamp, + "updated_at": timestamp, } plan_file = spec_dir / "implementation_plan.json" diff --git a/apps/frontend/src/main/__tests__/insights-config.test.ts b/apps/frontend/src/main/__tests__/insights-config.test.ts index c7b75195d9..86e9c00c39 100644 --- a/apps/frontend/src/main/__tests__/insights-config.test.ts +++ b/apps/frontend/src/main/__tests__/insights-config.test.ts @@ -84,6 +84,18 @@ describe('InsightsConfig', () => { expect(env.ANTHROPIC_BASE_URL).toBe(''); }); + it('should clear CLAUDECODE to avoid nested session errors', async () => { + const config = new InsightsConfig(); + process.env = { + ...originalEnv, + CLAUDECODE: '1' + }; + + const env = await config.getProcessEnv(); + + expect(env.CLAUDECODE).toBe(''); + }); + it('should set PYTHONPATH only to auto-build path when python env has none', async () => { const config = new InsightsConfig(); mockGetPythonEnv.mockReturnValue({}); diff --git a/apps/frontend/src/main/__tests__/task-state-manager.test.ts b/apps/frontend/src/main/__tests__/task-state-manager.test.ts index eadbbb731e..f0f1ef1b32 100644 --- a/apps/frontend/src/main/__tests__/task-state-manager.test.ts +++ b/apps/frontend/src/main/__tests__/task-state-manager.test.ts @@ -470,6 +470,25 @@ describe('TaskStateManager', () => { }); describe('actor state restoration', () => { + it('should transition from backlog on late QA_STARTED events', () => { + const lateQaEvent = { + type: 'QA_STARTED', + taskId: mockTask.id, + specId: mockTask.specId, + projectId: mockProject.id, + timestamp: new Date().toISOString(), + eventId: 'evt-late-qa', + sequence: 0, + iteration: 1, + maxIterations: 3 + }; + + const accepted = manager.handleTaskEvent(mockTask.id, lateQaEvent, mockTask, mockProject); + + expect(accepted).toBe(true); + expect(manager.getCurrentState(mockTask.id)).toBe('qa_review'); + }); + it('should restore actor state from task with in_progress status', () => { const taskInProgress = createMockTask({ status: 'in_progress', diff --git a/apps/frontend/src/main/agent/agent-manager.ts b/apps/frontend/src/main/agent/agent-manager.ts index 38b2138a1d..8d3fe36796 100644 --- a/apps/frontend/src/main/agent/agent-manager.ts +++ b/apps/frontend/src/main/agent/agent-manager.ts @@ -15,6 +15,7 @@ import { } from './types'; import type { IdeationConfig } from '../../shared/types'; import { resetStuckSubtasks } from '../ipc-handlers/task/plan-file-utils'; +import { getAPIProfileEnv } from '../services/profile'; import { AUTO_BUILD_PATHS, getSpecsDir, sanitizeThinkingLevel } from '../../shared/constants'; import { projectStore } from '../project-store'; @@ -22,6 +23,19 @@ import { projectStore } from '../project-store'; * Main AgentManager - orchestrates agent process lifecycle * This is a slim facade that delegates to focused modules */ +async function hasRunnableAuth(profileManager: ClaudeProfileManager): Promise { + try { + const apiProfileEnv = await getAPIProfileEnv(); + if (Object.keys(apiProfileEnv).length > 0) { + return true; + } + } catch (error) { + console.warn('[AgentManager] Failed to load API profile env, falling back to OAuth auth check:', error); + } + + return profileManager.hasValidAuth(); +} + export class AgentManager extends EventEmitter { private state: AgentState; private events: AgentEvents; @@ -240,7 +254,7 @@ export class AgentManager extends EventEmitter { this.emit('error', taskId, 'Failed to initialize profile manager. Please check file permissions and disk space.'); return; } - if (!profileManager.hasValidAuth()) { + if (!await hasRunnableAuth(profileManager)) { this.emit('error', taskId, 'Claude authentication required. Please authenticate in Settings > Claude Profiles before starting tasks.'); return; } @@ -354,7 +368,7 @@ export class AgentManager extends EventEmitter { this.emit('error', taskId, 'Failed to initialize profile manager. Please check file permissions and disk space.'); return; } - if (!profileManager.hasValidAuth()) { + if (!await hasRunnableAuth(profileManager)) { this.emit('error', taskId, 'Claude authentication required. Please authenticate in Settings > Claude Profiles before starting tasks.'); return; } diff --git a/apps/frontend/src/main/app-updater.ts b/apps/frontend/src/main/app-updater.ts index 42905f0638..47d57d11aa 100644 --- a/apps/frontend/src/main/app-updater.ts +++ b/apps/frontend/src/main/app-updater.ts @@ -35,9 +35,13 @@ const GITHUB_REPO = 'Auto-Claude'; // Debug mode - DEBUG_UPDATER=true or development mode const DEBUG_UPDATER = process.env.DEBUG_UPDATER === 'true' || process.env.NODE_ENV === 'development'; +function isUpdaterOperational(): boolean { + return app.isPackaged || process.env.DEBUG_UPDATER === 'true'; +} + // Configure electron-updater autoUpdater.autoDownload = false; // We control downloads manually to prevent downgrades -autoUpdater.autoInstallOnAppQuit = true; // Automatically install on app quit +autoUpdater.autoInstallOnAppQuit = app.isPackaged; // Only auto-install on quit in packaged builds // Update channels: 'latest' for stable, 'beta' for pre-release type UpdateChannel = 'latest' | 'beta'; @@ -348,6 +352,10 @@ export function initializeAppUpdater(window: BrowserWindow, betaUpdates = false) * Called from IPC handler when user requests manual check */ export async function checkForUpdates(): Promise { + if (!isUpdaterOperational()) { + return null; + } + try { console.warn('[app-updater] Manual update check requested'); const result = await autoUpdater.checkForUpdates(); @@ -430,6 +438,12 @@ function isRunningFromReadOnlyVolume(): boolean { * Returns false if running from a read-only volume (update cannot proceed) */ export function quitAndInstall(): boolean { + // Guard: only install in production or when updater is explicitly enabled + if (!isUpdaterOperational()) { + console.warn('[app-updater] quitAndInstall blocked: not in packaged mode'); + return false; + } + // Check if running from read-only volume before attempting install if (isRunningFromReadOnlyVolume()) { console.warn('[app-updater] Cannot install: running from read-only volume'); diff --git a/apps/frontend/src/main/index.ts b/apps/frontend/src/main/index.ts index c8644ed8a9..13272b8d18 100644 --- a/apps/frontend/src/main/index.ts +++ b/apps/frontend/src/main/index.ts @@ -609,7 +609,9 @@ app.whenReady().then(() => { // Quit when all windows are closed (except on macOS) app.on('window-all-closed', () => { + appLog.warn('[main] window-all-closed event received'); if (!isMacOS()) { + appLog.warn('[main] Triggering app.quit() from window-all-closed (non-macOS)'); app.quit(); } }); @@ -618,13 +620,17 @@ app.on('window-all-closed', () => { // before the JS environment tears down. Without this, pty.node's native // ThreadSafeFunction callbacks fire after teardown, causing SIGABRT (GitHub #1469). app.on('before-quit', (event) => { + appLog.warn(`[main] before-quit event received (isQuitting=${String(isQuitting)})`); + // Re-entrancy guard: the second app.quit() call (after cleanup) must pass through if (isQuitting) { + appLog.warn('[main] before-quit re-entry detected, allowing quit to continue'); return; } isQuitting = true; // Pause quit to perform async cleanup + appLog.warn('[main] before-quit prevented for async cleanup'); event.preventDefault(); // Stop synchronous services immediately diff --git a/apps/frontend/src/main/insights/config.ts b/apps/frontend/src/main/insights/config.ts index a7b8d8c72a..25fcf28b8b 100644 --- a/apps/frontend/src/main/insights/config.ts +++ b/apps/frontend/src/main/insights/config.ts @@ -150,6 +150,8 @@ export class InsightsConfig { ...oauthModeClearVars, ...profileEnv, ...apiProfileEnv, + // Prevent nested Claude Code session detection in child SDK/CLI process. + CLAUDECODE: '', PYTHONUNBUFFERED: '1', PYTHONIOENCODING: 'utf-8', PYTHONUTF8: '1', diff --git a/apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts b/apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts index 1011f95ff9..dc821400ac 100644 --- a/apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts +++ b/apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts @@ -10,6 +10,7 @@ import { fileWatcher } from '../../file-watcher'; import { findTaskAndProject } from './shared'; import { checkGitStatus } from '../../project-initializer'; import { initializeClaudeProfileManager, type ClaudeProfileManager } from '../../claude-profile-manager'; +import { getAPIProfileEnv } from '../../services/profile'; import { taskStateManager } from '../../task-state-manager'; import { getPlanPath, @@ -83,6 +84,25 @@ async function ensureProfileManagerInitialized(): Promise< } } +/** + * Check whether any runnable auth method is available. + * Accepts either: + * - Active API profile credentials, or + * - Active OAuth profile credentials + */ +async function hasRunnableAuth(profileManager: ClaudeProfileManager): Promise { + try { + const apiProfileEnv = await getAPIProfileEnv(); + if (Object.keys(apiProfileEnv).length > 0) { + return true; + } + } catch (error) { + console.warn('[AuthCheck] Failed to load API profile env, falling back to OAuth auth check:', error); + } + + return profileManager.hasValidAuth(); +} + /** * Get the spec directory for file watching, preferring the worktree path if it exists. * When a task runs in a worktree, implementation_plan.json is written there, @@ -172,7 +192,7 @@ export function registerTaskExecutionHandlers( } // Check authentication - Claude requires valid auth to run tasks - if (!profileManager.hasValidAuth()) { + if (!await hasRunnableAuth(profileManager)) { console.warn('[TASK_START] No valid authentication for active profile'); mainWindow.webContents.send( IPC_CHANNELS.TASK_ERROR, @@ -747,7 +767,7 @@ export function registerTaskExecutionHandlers( return { success: false, error: initResult.error }; } const profileManager = initResult.profileManager; - if (!profileManager.hasValidAuth()) { + if (!await hasRunnableAuth(profileManager)) { console.warn('[TASK_UPDATE_STATUS] No valid authentication for active profile'); if (mainWindow) { mainWindow.webContents.send( @@ -977,6 +997,19 @@ export function registerTaskExecutionHandlers( return { success: false, error: 'Task not found' }; } + // Helper to notify renderer of task status changes. + const sendStatusChange = (status: TaskStatus) => { + const mainWindow = getMainWindow(); + if (mainWindow) { + mainWindow.webContents.send( + IPC_CHANNELS.TASK_STATUS_CHANGE, + taskId, + status, + project.id + ); + } + }; + // Get the spec directory - use task.specsPath if available (handles worktree vs main) // This is critical: task might exist in worktree, and getTasks() prefers worktree version. // If we write to main project but task is in worktree, the worktree's old status takes precedence on refresh. @@ -1092,6 +1125,7 @@ export function registerTaskExecutionHandlers( // CRITICAL: Invalidate cache AFTER file writes complete // This ensures getTasks() returns fresh data reflecting the recovery projectStore.invalidateTasksCache(project.id); + sendStatusChange('human_review'); return { success: true, @@ -1204,6 +1238,7 @@ export function registerTaskExecutionHandlers( if (!gitStatusForRestart.isGitRepo || !gitStatusForRestart.hasCommits) { console.warn('[Recovery] Git check failed, cannot auto-restart task'); // Recovery succeeded but we can't restart without git + sendStatusChange(newStatus); return { success: true, data: { @@ -1221,6 +1256,7 @@ export function registerTaskExecutionHandlers( const initResult = await ensureProfileManagerInitialized(); if (!initResult.success) { // Recovery succeeded but we can't restart without profile manager + sendStatusChange(newStatus); return { success: true, data: { @@ -1233,9 +1269,10 @@ export function registerTaskExecutionHandlers( }; } const profileManager = initResult.profileManager; - if (!profileManager.hasValidAuth()) { + if (!await hasRunnableAuth(profileManager)) { console.warn('[Recovery] Auth check failed, cannot auto-restart task'); // Recovery succeeded but we can't restart without auth + sendStatusChange(newStatus); return { success: true, data: { @@ -1327,15 +1364,7 @@ export function registerTaskExecutionHandlers( } // Notify renderer of status change - const mainWindow = getMainWindow(); - if (mainWindow) { - mainWindow.webContents.send( - IPC_CHANNELS.TASK_STATUS_CHANGE, - taskId, - newStatus, - project.id - ); - } + sendStatusChange(newStatus); return { success: true, diff --git a/apps/frontend/src/main/services/profile/profile-service.test.ts b/apps/frontend/src/main/services/profile/profile-service.test.ts index c5f9ad9926..6b581cdc03 100644 --- a/apps/frontend/src/main/services/profile/profile-service.test.ts +++ b/apps/frontend/src/main/services/profile/profile-service.test.ts @@ -607,7 +607,7 @@ describe('profile-service', () => { { id: 'profile-1', name: 'Test Profile', - baseUrl: '', + baseUrl: 'https://api.example.com', apiKey: 'sk-test-key-12345678', models: { default: 'claude-sonnet-4-5-20250929', @@ -627,14 +627,38 @@ describe('profile-service', () => { const result = await getAPIProfileEnv(); - expect(result).not.toHaveProperty('ANTHROPIC_BASE_URL'); expect(result).not.toHaveProperty('ANTHROPIC_DEFAULT_HAIKU_MODEL'); expect(result).not.toHaveProperty('ANTHROPIC_DEFAULT_SONNET_MODEL'); expect(result).toEqual({ + ANTHROPIC_BASE_URL: 'https://api.example.com', ANTHROPIC_AUTH_TOKEN: 'sk-test-key-12345678', ANTHROPIC_MODEL: 'claude-sonnet-4-5-20250929' }); }); + + it('should return empty object when active API profile is incomplete', async () => { + const mockFile: ProfilesFile = { + profiles: [ + { + id: 'profile-1', + name: 'Broken API Profile', + baseUrl: '', + apiKey: 'sk-test-key-12345678', + createdAt: Date.now(), + updatedAt: Date.now() + } + ], + activeProfileId: 'profile-1', + version: 1 + }; + + const { loadProfilesFile } = await import('./profile-manager'); + vi.mocked(loadProfilesFile).mockResolvedValue(mockFile); + + const result = await getAPIProfileEnv(); + + expect(result).toEqual({}); + }); }); describe('testConnection', () => { diff --git a/apps/frontend/src/main/services/profile/profile-service.ts b/apps/frontend/src/main/services/profile/profile-service.ts index c57f60ce25..624154008c 100644 --- a/apps/frontend/src/main/services/profile/profile-service.ts +++ b/apps/frontend/src/main/services/profile/profile-service.ts @@ -13,6 +13,7 @@ import Anthropic, { APIConnectionTimeoutError } from '@anthropic-ai/sdk'; +import { isAPIProfileAuthenticated } from '../../claude-profile/profile-utils'; import { loadProfilesFile, generateProfileId, atomicModifyProfiles } from './profile-manager'; import type { APIProfile, TestConnectionResult, ModelInfo, DiscoverModelsResult } from '@shared/types/profile'; @@ -272,6 +273,11 @@ export async function getAPIProfileEnv(): Promise> { return {}; } + // Return empty if active API profile is incomplete/invalid + if (!isAPIProfileAuthenticated(profile)) { + return {}; + } + // Map profile fields to SDK env vars const envVars: Record = { ANTHROPIC_BASE_URL: profile.baseUrl || '', diff --git a/apps/frontend/src/renderer/components/UsageIndicator.test.tsx b/apps/frontend/src/renderer/components/UsageIndicator.test.tsx new file mode 100644 index 0000000000..12a0edaf90 --- /dev/null +++ b/apps/frontend/src/renderer/components/UsageIndicator.test.tsx @@ -0,0 +1,144 @@ +/** + * @vitest-environment jsdom + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import '@testing-library/jest-dom/vitest'; +import { act, render, screen, waitFor } from '@testing-library/react'; +import type { ReactNode } from 'react'; +import { UsageIndicator } from './UsageIndicator'; +import { useSettingsStore } from '@/stores/settings-store'; + +vi.mock('@/stores/settings-store', () => ({ + useSettingsStore: vi.fn() +})); + +vi.mock('./ui/tooltip', () => ({ + TooltipProvider: ({ children }: { children: ReactNode }) => <>{children}, + Tooltip: ({ children }: { children: ReactNode }) => <>{children}, + TooltipTrigger: ({ children }: { children: ReactNode }) => <>{children}, + TooltipContent: ({ children }: { children: ReactNode }) => <>{children} +})); + +vi.mock('./ui/popover', () => ({ + Popover: ({ children }: { children: ReactNode }) => <>{children}, + PopoverTrigger: ({ children }: { children: ReactNode }) => <>{children}, + PopoverContent: ({ children }: { children: ReactNode }) => <>{children} +})); + +vi.mock('react-i18next', () => ({ + useTranslation: vi.fn(() => ({ + t: (key: string) => { + const translations: Record = { + 'common:usage.loading': 'Loading usage', + 'common:usage.reauthRequired': 'Re-authentication required', + 'common:usage.reauthRequiredDescription': 'Your session has expired. Re-authenticate to continue.', + 'common:usage.clickToOpenSettings': 'Open settings', + 'common:usage.dataUnavailable': 'Usage data unavailable', + 'common:usage.dataUnavailableDescription': 'Usage data is not currently available.', + 'common:usage.notAvailable': 'N/A' + }; + return translations[key] || key; + }, + i18n: { language: 'en' } + })) +})); + +let mockActiveProfileId: string | null = null; +let onAllProfilesUsageUpdatedCallback: ((allProfilesUsage: AllProfilesUsagePayload) => void) | undefined; +// Coupled to the payload shape returned by buildAllProfilesUsageResponse. +type AllProfilesUsagePayload = ReturnType['data']; + +function buildAllProfilesUsageResponse(needsReauthentication: boolean) { + return { + success: true, + data: { + activeProfile: { + sessionPercent: 0, + weeklyPercent: 0, + profileId: 'oauth-profile-1', + profileName: 'OAuth Profile 1', + fetchedAt: new Date(), + needsReauthentication + }, + allProfiles: [ + { + profileId: 'oauth-profile-1', + profileName: 'OAuth Profile 1', + sessionPercent: 0, + weeklyPercent: 0, + isAuthenticated: true, + isRateLimited: false, + availabilityScore: 100, + isActive: true, + needsReauthentication + } + ], + fetchedAt: new Date() + } + }; +} + +describe('UsageIndicator re-auth handling by auth mode', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockActiveProfileId = null; + onAllProfilesUsageUpdatedCallback = undefined; + + vi.mocked(useSettingsStore).mockImplementation((selector) => { + // Minimal store slice required by UsageIndicator. + const state = { activeProfileId: mockActiveProfileId } satisfies { activeProfileId: string | null }; + return selector(state as any); + }); + + (window as any).electronAPI = { + onUsageUpdated: vi.fn(() => vi.fn()), + onAllProfilesUsageUpdated: vi.fn((callback: (allProfilesUsage: AllProfilesUsagePayload) => void) => { + onAllProfilesUsageUpdatedCallback = callback; + return vi.fn(); + }), + requestUsageUpdate: vi.fn().mockResolvedValue({ success: false, data: null }), + requestAllProfilesUsage: vi.fn().mockResolvedValue(buildAllProfilesUsageResponse(true)) + }; + }); + + it('does not show OAuth re-auth UI in API profile mode', async () => { + mockActiveProfileId = 'api-profile-1'; + + render(); + + await waitFor(() => { + expect(screen.getByRole('button', { name: 'Usage data unavailable' })).toBeInTheDocument(); + }); + + expect(screen.queryByText('Re-authentication required')).not.toBeInTheDocument(); + }); + + it('shows re-auth UI in OAuth mode when active profile needs re-authentication', async () => { + render(); + + await waitFor(() => { + expect(screen.getByRole('button', { name: 'Re-authentication required' })).toBeInTheDocument(); + }); + }); + + it('ignores re-auth updates from usage events in API profile mode', async () => { + mockActiveProfileId = 'api-profile-1'; + + render(); + + await waitFor(() => { + expect(onAllProfilesUsageUpdatedCallback).toBeDefined(); + }); + + await act(async () => { + onAllProfilesUsageUpdatedCallback?.(buildAllProfilesUsageResponse(true).data); + }); + + await waitFor(() => { + expect(screen.getByRole('button', { name: 'Usage data unavailable' })).toBeInTheDocument(); + }); + + expect(screen.queryByText('Re-authentication required')).not.toBeInTheDocument(); + }); +}); diff --git a/apps/frontend/src/renderer/components/UsageIndicator.tsx b/apps/frontend/src/renderer/components/UsageIndicator.tsx index 048beb525d..ddbc78ec27 100644 --- a/apps/frontend/src/renderer/components/UsageIndicator.tsx +++ b/apps/frontend/src/renderer/components/UsageIndicator.tsx @@ -22,6 +22,7 @@ import { import { useTranslation } from 'react-i18next'; import { formatTimeRemaining, localizeUsageWindowLabel, hasHardcodedText } from '../../shared/utils/format-time'; import type { ClaudeUsageSnapshot, ProfileUsageSummary } from '../../shared/types/agent'; +import { useSettingsStore } from '@/stores/settings-store'; import type { AppSection } from './settings/AppSettings'; /** @@ -82,6 +83,8 @@ export function UsageIndicator() { const [isOpen, setIsOpen] = useState(false); const [isPinned, setIsPinned] = useState(false); const hoverTimeoutRef = useRef(null); + const activeApiProfileId = useSettingsStore((state) => state.activeProfileId); + const isUsingApiProfile = Boolean(activeApiProfileId); /** * Helper function to get initials from a profile name @@ -309,6 +312,10 @@ export function UsageIndicator() { (hasHardcodedText(usage?.weeklyResetTime) ? undefined : usage?.weeklyResetTime)) : (hasHardcodedText(usage?.weeklyResetTime) ? undefined : usage?.weeklyResetTime); + // Re-authentication is only relevant for OAuth mode, never for API profiles + const showReauthForActiveAccount = !isUsingApiProfile && Boolean(usage?.needsReauthentication); + const showUnavailableReauth = !isUsingApiProfile && activeProfileNeedsReauth; + useEffect(() => { // Listen for usage updates from main process const unsubscribe = window.electronAPI.onUsageUpdated((snapshot: ClaudeUsageSnapshot) => { @@ -322,9 +329,9 @@ export function UsageIndicator() { // Filter out the active profile - we only want to show "other" profiles const nonActiveProfiles = allProfilesUsage.allProfiles.filter(p => !p.isActive); setOtherProfiles(nonActiveProfiles); - // Track if active profile needs re-auth + // Track if active profile needs re-auth (OAuth mode only) const activeProfile = allProfilesUsage.allProfiles.find(p => p.isActive); - setActiveProfileNeedsReauth(activeProfile?.needsReauthentication ?? false); + setActiveProfileNeedsReauth(!isUsingApiProfile && Boolean(activeProfile?.needsReauthentication)); }); // Request initial usage on mount @@ -347,11 +354,9 @@ export function UsageIndicator() { if (result.success && result.data) { const nonActiveProfiles = result.data.allProfiles.filter(p => !p.isActive); setOtherProfiles(nonActiveProfiles); - // Track if active profile needs re-auth (even if main usage is unavailable) + // Track if active profile needs re-auth (OAuth mode only) const activeProfile = result.data.allProfiles.find(p => p.isActive); - if (activeProfile?.needsReauthentication) { - setActiveProfileNeedsReauth(true); - } + setActiveProfileNeedsReauth(!isUsingApiProfile && Boolean(activeProfile?.needsReauthentication)); } }).catch((error) => { console.warn('[UsageIndicator] Failed to fetch all profiles usage:', error); @@ -361,7 +366,7 @@ export function UsageIndicator() { unsubscribe(); unsubscribeAllProfiles?.(); }; - }, []); + }, [isUsingApiProfile]); // Show loading state if (isLoading) { @@ -376,7 +381,7 @@ export function UsageIndicator() { // Show unavailable state - with better messaging based on cause if (!isAvailable || !usage) { // Check if it's a re-auth issue (better UX than generic "not supported") - const needsReauth = activeProfileNeedsReauth; + const needsReauth = showUnavailableReauth; return ( @@ -440,7 +445,7 @@ export function UsageIndicator() { // Badge color based on the limiting (higher) percentage // Override to red/destructive when re-auth is needed - const badgeColorClasses = usage.needsReauthentication + const badgeColorClasses = showReauthForActiveAccount ? 'text-red-500 bg-red-500/10 border-red-500/20' : getBadgeColorClasses(limitingPercent); @@ -461,7 +466,7 @@ export function UsageIndicator() { const maxUsage = Math.max(usage.sessionPercent, usage.weeklyPercent); // Show AlertCircle when re-auth needed or high usage - const Icon = usage.needsReauthentication ? AlertCircle : + const Icon = showReauthForActiveAccount ? AlertCircle : maxUsage >= THRESHOLD_WARNING ? AlertCircle : maxUsage >= THRESHOLD_ELEVATED ? TrendingUp : Activity; @@ -478,7 +483,7 @@ export function UsageIndicator() { > {/* Show "!" when re-auth needed, otherwise dual usage display */} - {usage.needsReauthentication ? ( + {showReauthForActiveAccount ? ( ! @@ -510,7 +515,7 @@ export function UsageIndicator() { {/* Re-auth required prompt - shown when active profile needs re-authentication */} - {usage.needsReauthentication ? ( + {showReauthForActiveAccount ? (
@@ -615,16 +620,16 @@ export function UsageIndicator() { {/* Initials Avatar with warning indicator for re-auth needed */}
{getInitials(usage.profileName)}
{/* Status dot for re-auth needed */} - {usage.needsReauthentication && ( + {showReauthForActiveAccount && (
)}
@@ -635,14 +640,14 @@ export function UsageIndicator() { {t('common:usage.activeAccount')} - {usage.needsReauthentication && ( + {showReauthForActiveAccount && ( {t('common:usage.needsReauth')} )}
{usage.profileEmail || usage.profileName}
diff --git a/apps/frontend/src/shared/state-machines/__tests__/task-machine.test.ts b/apps/frontend/src/shared/state-machines/__tests__/task-machine.test.ts index 61f45525b5..1f25ee40c8 100644 --- a/apps/frontend/src/shared/state-machines/__tests__/task-machine.test.ts +++ b/apps/frontend/src/shared/state-machines/__tests__/task-machine.test.ts @@ -381,6 +381,15 @@ describe('taskMachine', () => { expect(snapshot.value).toBe('coding'); }); + it('should allow QA_STARTED from backlog (late event after actor restore)', () => { + const events: TaskEvent[] = [ + { type: 'QA_STARTED', iteration: 1, maxIterations: 3 } + ]; + + const snapshot = runEvents(events); + expect(snapshot.value).toBe('qa_review'); + }); + it('should allow CODING_STARTED from planning (skipped PLANNING_COMPLETE)', () => { const events: TaskEvent[] = [ { type: 'PLANNING_STARTED' }, @@ -401,6 +410,15 @@ describe('taskMachine', () => { expect(snapshot.value).toBe('qa_review'); }); + it('should allow ALL_SUBTASKS_DONE from backlog (late event after actor restore)', () => { + const events: TaskEvent[] = [ + { type: 'ALL_SUBTASKS_DONE', totalCount: 1 } + ]; + + const snapshot = runEvents(events); + expect(snapshot.value).toBe('qa_review'); + }); + it('should allow QA_STARTED from planning (missed coding events)', () => { const events: TaskEvent[] = [ { type: 'PLANNING_STARTED' }, @@ -433,6 +451,16 @@ describe('taskMachine', () => { expect(snapshot.value).toBe('human_review'); expect(snapshot.context.reviewReason).toBe('completed'); }); + + it('should allow QA_PASSED from backlog (late event after actor restore)', () => { + const events: TaskEvent[] = [ + { type: 'QA_PASSED', iteration: 1, testsRun: {} } + ]; + + const snapshot = runEvents(events); + expect(snapshot.value).toBe('human_review'); + expect(snapshot.context.reviewReason).toBe('completed'); + }); }); describe('qa_rejected flow', () => { diff --git a/apps/frontend/src/shared/state-machines/task-machine.ts b/apps/frontend/src/shared/state-machines/task-machine.ts index 91fb3b4282..9e96625313 100644 --- a/apps/frontend/src/shared/state-machines/task-machine.ts +++ b/apps/frontend/src/shared/state-machines/task-machine.ts @@ -52,6 +52,11 @@ export const taskMachine = createMachine( PLANNING_STARTED: 'planning', // Fallback: if coding starts from backlog (e.g., resumed task), go to coding CODING_STARTED: 'coding', + // Fallback: resumed tasks can emit late-stage events before state restoration catches up + ALL_SUBTASKS_DONE: 'qa_review', + QA_STARTED: 'qa_review', + QA_FAILED: 'qa_fixing', + QA_PASSED: { target: 'human_review', actions: 'setReviewReasonCompleted' }, USER_STOPPED: 'backlog' } }, diff --git a/apps/frontend/vitest.config.ts b/apps/frontend/vitest.config.ts index 199ca6efc4..42ba523acf 100644 --- a/apps/frontend/vitest.config.ts +++ b/apps/frontend/vitest.config.ts @@ -24,7 +24,7 @@ export default defineConfig({ }, resolve: { alias: { - '@': resolve(__dirname, 'src'), + '@': resolve(__dirname, 'src/renderer'), '@main': resolve(__dirname, 'src/main'), '@renderer': resolve(__dirname, 'src/renderer'), '@shared': resolve(__dirname, 'src/shared') diff --git a/package-lock.json b/package-lock.json index 31ab465ad8..c9dc697958 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "auto-claude", - "version": "2.7.6-beta.6", + "version": "2.7.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "auto-claude", - "version": "2.7.6-beta.6", + "version": "2.7.6", "license": "AGPL-3.0", "workspaces": [ "apps/*", @@ -25,7 +25,7 @@ }, "apps/frontend": { "name": "auto-claude-ui", - "version": "2.7.6-beta.6", + "version": "2.7.6", "hasInstallScript": true, "license": "AGPL-3.0", "dependencies": { diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py new file mode 100644 index 0000000000..fcd4d9dbce --- /dev/null +++ b/tests/test_agent_runner.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +""" +Tests for spec.pipeline.agent_runner +""" + +import pytest +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +# Add auto-claude backend to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "apps" / "backend")) + +from spec.pipeline.agent_runner import AgentRunner + + +class TestAgentRunnerResolveAgentType: + """Tests for AgentRunner._resolve_agent_type.""" + + @pytest.mark.parametrize( + "prompt_file,phase_name,expected", + [ + ("spec_quick.md", None, "spec_writer"), + ("unknown.md", "quick_spec", "spec_writer"), + ("unknown.md", "planning", "planner"), + ("unknown.md", "validation", "spec_validation"), + ("unknown.md", "historical_context", "spec_gatherer"), + ("unknown.md", None, "spec_writer"), + ], + ) + def test_resolve_agent_type(self, prompt_file: str, phase_name: str | None, expected: str): + """Resolves agent type by prompt first, then phase, then default.""" + assert AgentRunner._resolve_agent_type(prompt_file, phase_name) == expected + + +class TestAgentRunnerUnknownAgent: + """Tests behavior for unknown resolved agent type.""" + + @pytest.mark.asyncio + async def test_run_agent_returns_error_on_unknown_agent_type(self, tmp_path: Path, monkeypatch): + """run_agent should fail early when resolved agent type is not configured.""" + prompt_dir = Path(__file__).parent.parent / "apps" / "backend" / "prompts" + prompt_file = "tmp_agent_runner_test_prompt.md" + prompt_path = prompt_dir / prompt_file + prompt_path.write_text("temporary prompt", encoding="utf-8") + + runner = AgentRunner(project_dir=tmp_path, spec_dir=tmp_path, model="sonnet") + + monkeypatch.setattr( + AgentRunner, + "_resolve_agent_type", + classmethod(lambda cls, prompt, phase: "not_a_real_agent"), + ) + + try: + success, output = await runner.run_agent(prompt_file, phase_name="quick_spec") + assert success is False + assert "Unknown spec agent type 'not_a_real_agent'" in output + assert prompt_file in output + assert "phase: quick_spec" in output + finally: + prompt_path.unlink(missing_ok=True) + + +class TestAgentRunnerErrorHandling: + """Tests for exception handling and stderr surfacing.""" + + @pytest.mark.asyncio + async def test_run_agent_includes_process_stderr_in_error_output( + self, tmp_path: Path, monkeypatch + ): + """run_agent should include ProcessError stderr text in returned error.""" + prompt_dir = Path(__file__).parent.parent / "apps" / "backend" / "prompts" + prompt_file = "tmp_agent_runner_stderr_prompt.md" + prompt_path = prompt_dir / prompt_file + prompt_path.write_text("temporary prompt", encoding="utf-8") + + runner = AgentRunner(project_dir=tmp_path, spec_dir=tmp_path, model="sonnet") + + class ProcessError(Exception): + def __init__(self, message: str, *, exit_code: int, stderr: str): + super().__init__( + f"{message} (exit code: {exit_code})\n" + "Error output: Check stderr output for details" + ) + self.exit_code = exit_code + self.stderr = stderr + + class _FakeClient: + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def query(self, _prompt): + raise ProcessError( + "Command failed with exit code 1", + exit_code=1, + stderr="fatal: synthetic stderr for test", + ) + + async def receive_response(self): + if False: # pragma: no cover + yield + + monkeypatch.setattr( + AgentRunner, + "_resolve_agent_type", + classmethod(lambda cls, prompt, phase: "spec_writer"), + ) + + with patch("core.client.create_client", return_value=_FakeClient()): + success, output = await runner.run_agent(prompt_file, phase_name="quick_spec") + + try: + assert success is False + assert "ProcessError:" in output + assert "--- STDERR START ---" in output + assert "fatal: synthetic stderr for test" in output + assert "--- STDERR END ---" in output + assert "[prompt=tmp_agent_runner_stderr_prompt.md]" in output + assert "[phase=quick_spec]" in output + finally: + prompt_path.unlink(missing_ok=True) + + @pytest.mark.asyncio + async def test_run_agent_uses_captured_cli_stderr_when_process_error_stderr_is_placeholder( + self, tmp_path: Path, monkeypatch + ): + """run_agent should include captured CLI stderr when ProcessError stderr is generic.""" + prompt_dir = Path(__file__).parent.parent / "apps" / "backend" / "prompts" + prompt_file = "tmp_agent_runner_stderr_capture_prompt.md" + prompt_path = prompt_dir / prompt_file + prompt_path.write_text("temporary prompt", encoding="utf-8") + + runner = AgentRunner(project_dir=tmp_path, spec_dir=tmp_path, model="sonnet") + + class ProcessError(Exception): + def __init__(self, message: str, *, exit_code: int, stderr: str): + super().__init__( + f"{message} (exit code: {exit_code})\n" + "Error output: Check stderr output for details" + ) + self.exit_code = exit_code + self.stderr = stderr + + def _fake_create_client(*args, **kwargs): + stderr_cb = kwargs["stderr_callback"] + stderr_cb("fatal: synthetic stderr line 1") + stderr_cb("fatal: synthetic stderr line 2") + + class _FakeClient: + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def query(self, _prompt): + raise ProcessError( + "Command failed with exit code 1", + exit_code=1, + stderr="Check stderr output for details", + ) + + async def receive_response(self): + if False: # pragma: no cover + yield + + return _FakeClient() + + monkeypatch.setattr( + AgentRunner, + "_resolve_agent_type", + classmethod(lambda cls, prompt, phase: "spec_writer"), + ) + + with patch("core.client.create_client", side_effect=_fake_create_client): + success, output = await runner.run_agent(prompt_file, phase_name="quick_spec") + + try: + assert success is False + assert "ProcessError:" in output + assert "--- STDERR START ---" in output + assert "fatal: synthetic stderr line 1" in output + assert "fatal: synthetic stderr line 2" in output + assert "--- STDERR END ---" in output + assert "[prompt=tmp_agent_runner_stderr_capture_prompt.md]" in output + assert "[phase=quick_spec]" in output + finally: + prompt_path.unlink(missing_ok=True) diff --git a/tests/test_auth.py b/tests/test_auth.py index 33faf03d05..7e53a3ed94 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -583,6 +583,8 @@ def test_returns_non_empty_vars(self, monkeypatch): assert "ANTHROPIC_MODEL" not in env # Empty value excluded assert "DISABLE_TELEMETRY" in env assert env["DISABLE_TELEMETRY"] == "1" + assert "CLAUDECODE" in env + assert env["CLAUDECODE"] == "" def test_includes_claude_git_bash_on_windows(self, monkeypatch): """Auto-detects git-bash path on Windows.""" diff --git a/tests/test_spec_phases.py b/tests/test_spec_phases.py index 3bebb29c03..447d1224e9 100644 --- a/tests/test_spec_phases.py +++ b/tests/test_spec_phases.py @@ -475,7 +475,7 @@ async def test_quick_spec_files_exist( mock_ui_module, mock_spec_validator, ): - """Quick spec phase returns early if files exist.""" + """Quick spec phase returns early if files exist and validate.""" (spec_dir / "spec.md").write_text("# Test Spec") (spec_dir / "implementation_plan.json").write_text(json.dumps({"phases": []})) @@ -483,7 +483,7 @@ async def test_quick_spec_files_exist( project_dir=temp_dir, spec_dir=spec_dir, task_description="Test task", - spec_validator=mock_spec_validator(), + spec_validator=mock_spec_validator(spec_valid=True, plan_valid=True), run_agent_fn=mock_run_agent_fn(), task_logger=mock_task_logger, ui_module=mock_ui_module, @@ -517,7 +517,7 @@ async def agent_side_effect(*args, **kwargs): project_dir=temp_dir, spec_dir=spec_dir, task_description="Test task", - spec_validator=mock_spec_validator(), + spec_validator=mock_spec_validator(spec_valid=True, plan_valid=True), run_agent_fn=agent_fn, task_logger=mock_task_logger, ui_module=mock_ui_module, @@ -528,6 +528,110 @@ async def agent_side_effect(*args, **kwargs): assert result.success is True assert agent_fn.called + @pytest.mark.asyncio + async def test_quick_spec_regenerates_when_existing_files_invalid( + self, + temp_dir: Path, + spec_dir: Path, + mock_task_logger, + mock_ui_module, + mock_spec_validator, + ): + """Quick spec reruns agent when existing files fail validation.""" + (spec_dir / "spec.md").write_text("# Invalid Existing Spec") + (spec_dir / "implementation_plan.json").write_text(json.dumps({"phases": []})) + + async def agent_side_effect(*args, **kwargs): + (spec_dir / "spec.md").write_text("# Regenerated Spec") + (spec_dir / "implementation_plan.json").write_text( + json.dumps( + { + "feature": "Test task", + "workflow_type": "simple", + "phases": [ + { + "phase": 1, + "name": "Implementation", + "subtasks": [ + { + "id": "subtask-1-1", + "description": "Do thing", + "status": "pending", + } + ], + } + ], + } + ) + ) + return (True, "Done") + + agent_fn = AsyncMock(side_effect=agent_side_effect) + validator = mock_spec_validator(spec_valid=False, plan_valid=False) + validator.validate_spec_document.side_effect = [ + type("Result", (), {"valid": False})(), + type("Result", (), {"valid": True})(), + ] + validator.validate_implementation_plan.side_effect = [ + type("Result", (), {"valid": False})(), + type("Result", (), {"valid": True})(), + ] + + executor = PhaseExecutor( + project_dir=temp_dir, + spec_dir=spec_dir, + task_description="Test task", + spec_validator=validator, + run_agent_fn=agent_fn, + task_logger=mock_task_logger, + ui_module=mock_ui_module, + ) + + result = await executor.phase_quick_spec() + + assert result.success is True + assert agent_fn.await_count == 1 + + @pytest.mark.asyncio + async def test_quick_spec_applies_plan_autofix_before_retry( + self, + temp_dir: Path, + spec_dir: Path, + mock_task_logger, + mock_ui_module, + mock_spec_validator, + ): + """Quick spec applies plan autofix when plan validation initially fails.""" + + async def agent_side_effect(*args, **kwargs): + (spec_dir / "spec.md").write_text("# Generated Spec") + (spec_dir / "implementation_plan.json").write_text(json.dumps({"spec_name": "legacy"})) + return (True, "Done") + + agent_fn = AsyncMock(side_effect=agent_side_effect) + validator = mock_spec_validator(spec_valid=True, plan_valid=False) + validator.validate_implementation_plan.side_effect = [ + type("Result", (), {"valid": False})(), + type("Result", (), {"valid": True})(), + ] + + executor = PhaseExecutor( + project_dir=temp_dir, + spec_dir=spec_dir, + task_description="Test task", + spec_validator=validator, + run_agent_fn=agent_fn, + task_logger=mock_task_logger, + ui_module=mock_ui_module, + ) + + with patch("spec.validate_pkg.auto_fix.auto_fix_plan", return_value=True) as mock_auto_fix: + result = await executor.phase_quick_spec() + + assert result.success is True + assert agent_fn.await_count == 1 + mock_auto_fix.assert_called_once_with(spec_dir) + class TestPhaseResearch: """Tests for phase_research method.""" diff --git a/tests/test_spec_pipeline.py b/tests/test_spec_pipeline.py index 878f43855b..3b08f1aedd 100644 --- a/tests/test_spec_pipeline.py +++ b/tests/test_spec_pipeline.py @@ -15,7 +15,7 @@ import sys import time from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch # Add auto-claude directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / "apps" / "backend")) @@ -575,6 +575,35 @@ def test_creates_validator(self, temp_dir: Path): assert orchestrator.validator is not None +class TestSpecOrchestratorRunAgent: + """Tests for _run_agent wiring into AgentRunner.""" + + @pytest.mark.asyncio + async def test_run_agent_passes_phase_name_to_runner(self, temp_dir: Path): + """_run_agent forwards phase_name to AgentRunner.run_agent.""" + with patch('spec.pipeline.init_auto_claude_dir') as mock_init: + mock_init.return_value = (temp_dir / ".auto-claude", False) + specs_dir = temp_dir / ".auto-claude" / "specs" + specs_dir.mkdir(parents=True, exist_ok=True) + + orchestrator = SpecOrchestrator(project_dir=temp_dir) + + mock_runner = MagicMock() + mock_runner.run_agent = AsyncMock(return_value=(True, "ok")) + orchestrator._agent_runner = mock_runner + + result = await orchestrator._run_agent( + "spec_quick.md", + additional_context="ctx", + phase_name="quick_spec", + ) + + assert result == (True, "ok") + mock_runner.run_agent.assert_awaited_once() + _, kwargs = mock_runner.run_agent.call_args + assert kwargs["phase_name"] == "quick_spec" + + class TestSpecOrchestratorAssessment: """Tests for complexity assessment state."""