Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions apps/backend/spec/pipeline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,27 +203,27 @@ def generate_spec_name(task_description: str) -> str:
return "-".join(name_parts) if name_parts else "spec"


def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:
def rename_spec_dir_from_requirements(spec_dir: Path) -> Path:
"""Rename spec directory based on requirements.json task description.

Args:
spec_dir: The current spec directory

Returns:
Tuple of (success, new_spec_dir). If success is False, new_spec_dir is the original.
The new spec directory path (or the original if no rename was needed/possible).
"""
requirements_file = spec_dir / "requirements.json"

if not requirements_file.exists():
return False
return spec_dir

try:
with open(requirements_file, encoding="utf-8") as f:
req = json.load(f)

task_desc = req.get("task_description", "")
if not task_desc:
return False
return spec_dir

# Generate new name
new_name = generate_spec_name(task_desc)
Expand All @@ -240,11 +240,11 @@ def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:

# Don't rename if it's already a good name (not "pending")
if "pending" not in current_name:
return True
return spec_dir

# Don't rename if target already exists
if new_spec_dir.exists():
return True
return spec_dir

# Rename the directory
shutil.move(str(spec_dir), str(new_spec_dir))
Expand All @@ -253,11 +253,11 @@ def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:
update_task_logger_path(new_spec_dir)

print_status(f"Spec folder: {highlight(new_dir_name)}", "success")
return True
return new_spec_dir

except (json.JSONDecodeError, OSError) as e:
print_status(f"Could not rename spec folder: {e}", "warning")
return False
return spec_dir


# Phase display configuration
Expand Down
85 changes: 70 additions & 15 deletions apps/backend/spec/pipeline/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,42 @@ async def run(self, interactive: bool = True, auto_approve: bool = False) -> boo
task_logger.start_phase(LogPhase.PLANNING, "Starting spec creation process")
TaskEventEmitter.from_spec_dir(self.spec_dir).emit("PLANNING_STARTED")

try:
return await self._run_phases(interactive, auto_approve, task_logger, ui)
except Exception as e:
# Emit PLANNING_FAILED so the frontend XState machine transitions to error state
# instead of leaving the task stuck in "planning" forever
try:
task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
task_emitter.emit(
"PLANNING_FAILED",
{"error": str(e), "recoverable": True},
)
except Exception:
pass # Don't mask the original error
try:
task_logger.end_phase(
LogPhase.PLANNING,
success=False,
message=f"Spec creation crashed: {e}",
)
except Exception:
pass # Best effort - don't mask the original error when logging fails
raise
Comment on lines +241 to +262
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use _emit_planning_failed helper instead of inlining duplicate logic.

Lines 246–251 replicate exactly what _emit_planning_failed (line 690) already does, including the protective try/except. Replace the inline block with a single call to keep the emission logic centralized.

♻️ Proposed fix
         try:
             return await self._run_phases(interactive, auto_approve, task_logger, ui)
         except Exception as e:
             # Emit PLANNING_FAILED so the frontend XState machine transitions to error state
             # instead of leaving the task stuck in "planning" forever
-            try:
-                task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
-                task_emitter.emit(
-                    "PLANNING_FAILED",
-                    {"error": str(e), "recoverable": True},
-                )
-            except Exception:
-                pass  # Don't mask the original error
+            self._emit_planning_failed(f"Spec creation crashed: {e}")
             try:
                 task_logger.end_phase(
                     LogPhase.PLANNING,
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 241 - 262, Replace
the inline emit/try block that duplicates the emission helper with a call to the
centralized helper: remove the try/except that creates TaskEventEmitter and
emits "PLANNING_FAILED" and instead call self._emit_planning_failed(...) passing
the current task_logger (and the caught exception e) so the emission logic is
centralized; keep the subsequent task_logger.end_phase(...) handling as-is.


async def _run_phases(
self,
interactive: bool,
auto_approve: bool,
task_logger,
ui,
) -> bool:
Comment on lines +264 to +270
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Missing type hints on _run_phases parameters.

task_logger and ui lack type annotations. Per coding guidelines, backend Python should use type hints.

Proposed fix
+    from types import ModuleType
+    from task_logger import TaskLogger
+
     async def _run_phases(
         self,
         interactive: bool,
         auto_approve: bool,
-        task_logger,
-        ui,
+        task_logger: TaskLogger,
+        ui: ModuleType,
     ) -> bool:
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 264 - 270, The
_run_phases method is missing type hints for the task_logger and ui parameters;
update the signature of async def _run_phases(self, interactive: bool,
auto_approve: bool, task_logger, ui) -> bool to add precise type annotations for
task_logger and ui (e.g., TaskLogger or logging.Logger and UIInterface or the
concrete UI class used in this module) consistent with the codebase typing
conventions, and import any required types or use forward references if
necessary so the function signature becomes fully typed.

"""Internal method that runs all spec creation phases.

Separated from run() so that run() can wrap this in a try/except
to emit PLANNING_FAILED on unhandled exceptions.
"""

print(
box(
f"Spec Directory: {self.spec_dir}\n"
Expand Down Expand Up @@ -294,6 +330,7 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
task_logger.end_phase(
LogPhase.PLANNING, success=False, message="Discovery failed"
)
self._emit_planning_failed("Discovery phase failed")
return False
# Store summary for subsequent phases (compaction)
await self._store_phase_summary("discovery")
Expand All @@ -310,12 +347,17 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
success=False,
message="Requirements gathering failed",
)
self._emit_planning_failed("Requirements gathering failed")
return False
# Store summary for subsequent phases (compaction)
await self._store_phase_summary("requirements")

# Rename spec folder with better name from requirements
rename_spec_dir_from_requirements(self.spec_dir)
# IMPORTANT: Update self.spec_dir after rename so subsequent phases use the correct path
new_spec_dir = rename_spec_dir_from_requirements(self.spec_dir)
if new_spec_dir != self.spec_dir:
self.spec_dir = new_spec_dir
self.validator = SpecValidator(self.spec_dir)
Comment on lines +357 to +360

This comment was marked as outdated.


# Update task description from requirements
req = requirements.load_requirements(self.spec_dir)
Expand All @@ -338,6 +380,7 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
task_logger.end_phase(
LogPhase.PLANNING, success=False, message="Complexity assessment failed"
)
self._emit_planning_failed("Complexity assessment failed")
return False

# Map of all available phases
Expand Down Expand Up @@ -401,6 +444,9 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
success=False,
message=f"Phase {phase_name} failed",
)
self._emit_planning_failed(
f"Phase '{phase_name}' failed: {'; '.join(result.errors)}"
)
return False

# Summary
Expand Down Expand Up @@ -638,6 +684,25 @@ def _print_completion_summary(
)
)

def _emit_planning_failed(self, error: str) -> None:
"""Emit PLANNING_FAILED event so the frontend transitions to error state.

Without this, the task stays stuck in 'planning' / 'in_progress' forever
when spec creation fails, because the XState machine never receives a
terminal event.

Args:
error: Human-readable error description
"""
try:
task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
task_emitter.emit(
"PLANNING_FAILED",
{"error": error, "recoverable": True},
)
except Exception:
pass # Best effort - don't mask the original failure
Comment on lines +690 to +707
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Clean helper; consider logging the swallowed exception for debuggability.

The bare pass on Line 703–704 silences all failures, including unexpected ones (e.g., self.spec_dir is None). A debug-level log would help troubleshoot without masking the original error.

Suggested improvement
         except Exception:
-            pass  # Best effort - don't mask the original failure
+            # Best effort - don't mask the original failure.
+            # Logging here could itself fail, so keep it minimal.
+            import sys
+            try:
+                sys.stderr.write("[orchestrator] _emit_planning_failed failed\n")
+            except Exception:
+                pass
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 687 - 704, The
except block in _emit_planning_failed currently swallows all exceptions; change
it to capture the exception (except Exception as e) and emit a debug-level log
so failures building or emitting the PLANNING_FAILED event are visible for
debugging. Use the module/class logger (e.g., logger.debug or self.logger.debug
if available) to log a short message including the exception and context (e.g.,
spec_dir and that TaskEventEmitter.from_spec_dir or task_emitter.emit failed)
while still not raising the error so original failure isn’t masked; reference
_emit_planning_failed, TaskEventEmitter.from_spec_dir, and task_emitter.emit
when making the change.


def _run_review_checkpoint(self, auto_approve: bool) -> bool:
"""Run the human review checkpoint.

Expand Down Expand Up @@ -698,17 +763,7 @@ def _rename_spec_dir_from_requirements(self) -> bool:
Returns:
True if successful or not needed, False on error
"""
result = rename_spec_dir_from_requirements(self.spec_dir)
# Update self.spec_dir if it was renamed
if result and self.spec_dir.name.endswith("-pending"):
# Find the renamed directory
parent = self.spec_dir.parent
prefix = self.spec_dir.name[:4] # e.g., "001-"
for candidate in parent.iterdir():
if (
candidate.name.startswith(prefix)
and "pending" not in candidate.name
):
self.spec_dir = candidate
break
return result
new_spec_dir = rename_spec_dir_from_requirements(self.spec_dir)
if new_spec_dir != self.spec_dir:
self.spec_dir = new_spec_dir
return new_spec_dir.exists()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Backward-compat method now correctly updates self.spec_dir, but return semantics have subtly changed.

Previously this method presumably returned True on success / False on error. Now it returns new_spec_dir.exists(). If the rename succeeds but the new directory is somehow immediately deleted (race condition), this would return False even though the rename succeeded. This is an unlikely edge case, but worth noting for callers that rely on the return value.

🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 766 - 769, The
current change updates self.spec_dir but flips return semantics by returning
new_spec_dir.exists() which can be False even when the rename succeeded; to
preserve previous success/error semantics, detect whether a rename actually
occurred (compare new_spec_dir with the old value), set self.spec_dir
accordingly (as you already do), and return True when a rename happened
(indicating success); otherwise fall back to checking existence of the existing
spec dir (e.g., return self.spec_dir.exists()). Reference:
rename_spec_dir_from_requirements, self.spec_dir, new_spec_dir.exists().

72 changes: 51 additions & 21 deletions apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,34 @@ export function registerTaskExecutionHandlers(

console.warn('[TASK_START] Found task:', task.specId, 'status:', task.status, 'reviewReason:', task.reviewReason, 'subtasks:', task.subtasks.length);

// Check if implementation_plan.json has valid subtasks BEFORE XState handling.
// This is more reliable than task.subtasks.length which may not be loaded yet.
const specsBaseDir = getSpecsDir(project.autoBuildPath);
const specDir = path.join(
project.path,
specsBaseDir,
task.specId
);
const planFilePath = path.join(specDir, AUTO_BUILD_PATHS.IMPLEMENTATION_PLAN);
let planHasSubtasks = false;
if (existsSync(planFilePath)) {
try {
const planContent = readFileSync(planFilePath, 'utf-8');
const plan = JSON.parse(planContent);
const phases = plan?.phases;
if (Array.isArray(phases)) {
const subtaskCount = phases.reduce(
(sum: number, phase: { subtasks?: unknown[] }) =>
sum + (Array.isArray(phase.subtasks) ? phase.subtasks.length : 0),
0
);
planHasSubtasks = subtaskCount > 0;
}
} catch {
// Invalid/corrupt plan file - treat as no subtasks
}
}

// Immediately mark as started so the UI moves the card to In Progress.
// Use XState actor state as source of truth (if actor exists), with task data as fallback.
// - plan_review: User approved the plan, send PLAN_APPROVED to transition to coding
Expand All @@ -173,6 +201,11 @@ export function registerTaskExecutionHandlers(
// XState says plan_review - send PLAN_APPROVED
console.warn('[TASK_START] XState: plan_review -> coding via PLAN_APPROVED');
taskStateManager.handleUiEvent(taskId, { type: 'PLAN_APPROVED' }, task, project);
} else if (currentXState === 'error' && !planHasSubtasks) {
// FIX (#1562): Task crashed during planning (no subtasks yet).
// Uses planHasSubtasks from implementation_plan.json (more reliable than task.subtasks.length).
console.warn('[TASK_START] XState: error with no plan subtasks -> planning via PLANNING_STARTED');
taskStateManager.handleUiEvent(taskId, { type: 'PLANNING_STARTED' }, task, project);
} else if (currentXState === 'human_review' || currentXState === 'error') {
// XState says human_review or error - send USER_RESUMED
console.warn('[TASK_START] XState:', currentXState, '-> coding via USER_RESUMED');
Expand All @@ -186,6 +219,11 @@ export function registerTaskExecutionHandlers(
// No XState actor - fallback to task data (e.g., after app restart)
console.warn('[TASK_START] No XState actor, task data: plan_review -> coding via PLAN_APPROVED');
taskStateManager.handleUiEvent(taskId, { type: 'PLAN_APPROVED' }, task, project);
} else if (task.status === 'error' && !planHasSubtasks) {
// FIX (#1562): No XState actor, task crashed during planning (no subtasks).
// Uses planHasSubtasks from implementation_plan.json (more reliable than task.subtasks.length).
console.warn('[TASK_START] No XState actor, error with no plan subtasks -> planning via PLANNING_STARTED');
taskStateManager.handleUiEvent(taskId, { type: 'PLANNING_STARTED' }, task, project);
} else if (task.status === 'human_review' || task.status === 'error') {
// No XState actor - fallback to task data for resuming
console.warn('[TASK_START] No XState actor, task data:', task.status, '-> coding via USER_RESUMED');
Expand All @@ -204,13 +242,7 @@ export function registerTaskExecutionHandlers(
console.warn(`[TASK_START] Reset ${resetResult.resetCount} stuck subtask(s) before starting`);
}

// Start file watcher for this task
const specsBaseDir = getSpecsDir(project.autoBuildPath);
const specDir = path.join(
project.path,
specsBaseDir,
task.specId
);
// Start file watcher for this task (specsBaseDir and specDir already computed above)
fileWatcher.watch(taskId, specDir);

// Check if spec.md exists (indicates spec creation was already done or in progress)
Expand All @@ -220,9 +252,13 @@ export function registerTaskExecutionHandlers(
// Check if this task needs spec creation first (no spec file = not yet created)
// OR if it has a spec but no implementation plan subtasks (spec created, needs planning/building)
const needsSpecCreation = !hasSpec;
const needsImplementation = hasSpec && task.subtasks.length === 0;
// FIX (#1562): Check actual plan file for subtasks, not just task.subtasks.length.
// When a task crashes during planning, it may have spec.md but an empty/missing
// implementation_plan.json. Previously, this path would call startTaskExecution
// (run.py) which expects subtasks to exist. Now we check the actual plan file.
const needsImplementation = hasSpec && !planHasSubtasks;

console.warn('[TASK_START] hasSpec:', hasSpec, 'needsSpecCreation:', needsSpecCreation, 'needsImplementation:', needsImplementation);
console.warn('[TASK_START] hasSpec:', hasSpec, 'planHasSubtasks:', planHasSubtasks, 'needsSpecCreation:', needsSpecCreation, 'needsImplementation:', needsImplementation);

// Get base branch: task-level override takes precedence over project settings
const baseBranch = task.metadata?.baseBranch || project.settings?.mainBranch;
Expand All @@ -237,18 +273,12 @@ export function registerTaskExecutionHandlers(
// Also pass baseBranch so worktrees are created from the correct branch
agentManager.startSpecCreation(taskId, project.path, taskDescription, specDir, task.metadata, baseBranch, project.id);
} else if (needsImplementation) {
// Spec exists but no subtasks - run run.py to create implementation plan and execute
// Read the spec.md to get the task description
const _taskDescription = task.description || task.title;
try {
readFileSync(specFilePath, 'utf-8');
} catch {
// Use default description
}

console.warn('[TASK_START] Starting task execution (no subtasks) for:', task.specId);
// Start task execution which will create the implementation plan
// Note: No parallel mode for planning phase - parallel only makes sense with multiple subtasks
// Spec exists but no valid subtasks in implementation plan
// FIX (#1562): Use startTaskExecution (run.py) which will create the planner
// agent session to generate the implementation plan. run.py handles the case
// where implementation_plan.json is missing or has no subtasks - the planner
// agent will generate the plan before the coder starts.
console.warn('[TASK_START] Starting task execution (no valid subtasks in plan) for:', task.specId);
agentManager.startTaskExecution(
taskId,
project.path,
Expand Down
6 changes: 5 additions & 1 deletion apps/frontend/src/shared/state-machines/task-machine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,16 @@ export const taskMachine = createMachine(
on: {
CREATE_PR: 'creating_pr',
MARK_DONE: 'done',
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' }
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' },
// Allow restarting planning from human_review (e.g., incomplete task with no subtasks)
PLANNING_STARTED: { target: 'planning', actions: 'clearReviewReason' }
}
},
error: {
on: {
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' },
// Allow restarting from error back to planning (e.g., spec creation crashed)
PLANNING_STARTED: { target: 'planning', actions: 'clearReviewReason' },
MARK_DONE: 'done'
}
},
Expand Down
Loading