Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions apps/backend/spec/pipeline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,27 +203,27 @@ def generate_spec_name(task_description: str) -> str:
return "-".join(name_parts) if name_parts else "spec"


def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:
def rename_spec_dir_from_requirements(spec_dir: Path) -> Path:
"""Rename spec directory based on requirements.json task description.

Args:
spec_dir: The current spec directory

Returns:
Tuple of (success, new_spec_dir). If success is False, new_spec_dir is the original.
The new spec directory path (or the original if no rename was needed/possible).
"""
requirements_file = spec_dir / "requirements.json"

if not requirements_file.exists():
return False
return spec_dir

try:
with open(requirements_file, encoding="utf-8") as f:
req = json.load(f)

task_desc = req.get("task_description", "")
if not task_desc:
return False
return spec_dir

# Generate new name
new_name = generate_spec_name(task_desc)
Expand All @@ -240,11 +240,11 @@ def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:

# Don't rename if it's already a good name (not "pending")
if "pending" not in current_name:
return True
return spec_dir

# Don't rename if target already exists
if new_spec_dir.exists():
return True
return spec_dir

# Rename the directory
shutil.move(str(spec_dir), str(new_spec_dir))
Expand All @@ -253,11 +253,11 @@ def rename_spec_dir_from_requirements(spec_dir: Path) -> bool:
update_task_logger_path(new_spec_dir)

print_status(f"Spec folder: {highlight(new_dir_name)}", "success")
return True
return new_spec_dir

except (json.JSONDecodeError, OSError) as e:
print_status(f"Could not rename spec folder: {e}", "warning")
return False
return spec_dir


# Phase display configuration
Expand Down
106 changes: 90 additions & 16 deletions apps/backend/spec/pipeline/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,42 @@ async def run(self, interactive: bool = True, auto_approve: bool = False) -> boo
task_logger.start_phase(LogPhase.PLANNING, "Starting spec creation process")
TaskEventEmitter.from_spec_dir(self.spec_dir).emit("PLANNING_STARTED")

try:
return await self._run_phases(interactive, auto_approve, task_logger, ui)
except Exception as e:
# Emit PLANNING_FAILED so the frontend XState machine transitions to error state
# instead of leaving the task stuck in "planning" forever
try:
task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
task_emitter.emit(
"PLANNING_FAILED",
{"error": str(e), "recoverable": True},
)
except Exception:
pass # Don't mask the original error
try:
task_logger.end_phase(
LogPhase.PLANNING,
success=False,
message=f"Spec creation crashed: {e}",
)
except Exception:
pass # Best effort - don't mask the original error when logging fails
raise
Comment on lines +241 to +262
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use _emit_planning_failed helper instead of inlining duplicate logic.

Lines 246–251 replicate exactly what _emit_planning_failed (line 690) already does, including the protective try/except. Replace the inline block with a single call to keep the emission logic centralized.

♻️ Proposed fix
         try:
             return await self._run_phases(interactive, auto_approve, task_logger, ui)
         except Exception as e:
             # Emit PLANNING_FAILED so the frontend XState machine transitions to error state
             # instead of leaving the task stuck in "planning" forever
-            try:
-                task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
-                task_emitter.emit(
-                    "PLANNING_FAILED",
-                    {"error": str(e), "recoverable": True},
-                )
-            except Exception:
-                pass  # Don't mask the original error
+            self._emit_planning_failed(f"Spec creation crashed: {e}")
             try:
                 task_logger.end_phase(
                     LogPhase.PLANNING,
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 241 - 262, Replace
the inline emit/try block that duplicates the emission helper with a call to the
centralized helper: remove the try/except that creates TaskEventEmitter and
emits "PLANNING_FAILED" and instead call self._emit_planning_failed(...) passing
the current task_logger (and the caught exception e) so the emission logic is
centralized; keep the subsequent task_logger.end_phase(...) handling as-is.


async def _run_phases(
self,
interactive: bool,
auto_approve: bool,
task_logger,
ui,
) -> bool:
Comment on lines +264 to +270
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Missing type hints on _run_phases parameters.

task_logger and ui lack type annotations. Per coding guidelines, backend Python should use type hints.

Proposed fix
+    from types import ModuleType
+    from task_logger import TaskLogger
+
     async def _run_phases(
         self,
         interactive: bool,
         auto_approve: bool,
-        task_logger,
-        ui,
+        task_logger: TaskLogger,
+        ui: ModuleType,
     ) -> bool:
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 264 - 270, The
_run_phases method is missing type hints for the task_logger and ui parameters;
update the signature of async def _run_phases(self, interactive: bool,
auto_approve: bool, task_logger, ui) -> bool to add precise type annotations for
task_logger and ui (e.g., TaskLogger or logging.Logger and UIInterface or the
concrete UI class used in this module) consistent with the codebase typing
conventions, and import any required types or use forward references if
necessary so the function signature becomes fully typed.

"""Internal method that runs all spec creation phases.

Separated from run() so that run() can wrap this in a try/except
to emit PLANNING_FAILED on unhandled exceptions.
"""

print(
box(
f"Spec Directory: {self.spec_dir}\n"
Expand Down Expand Up @@ -294,6 +330,7 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
task_logger.end_phase(
LogPhase.PLANNING, success=False, message="Discovery failed"
)
self._emit_planning_failed("Discovery phase failed")
return False
# Store summary for subsequent phases (compaction)
await self._store_phase_summary("discovery")
Expand All @@ -310,12 +347,20 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
success=False,
message="Requirements gathering failed",
)
self._emit_planning_failed("Requirements gathering failed")
return False
# Store summary for subsequent phases (compaction)
await self._store_phase_summary("requirements")

# Rename spec folder with better name from requirements
rename_spec_dir_from_requirements(self.spec_dir)
# IMPORTANT: Update self.spec_dir after rename so subsequent phases use the correct path
new_spec_dir = rename_spec_dir_from_requirements(self.spec_dir)
if new_spec_dir != self.spec_dir:
self.spec_dir = new_spec_dir
self.validator = SpecValidator(self.spec_dir)
Comment on lines +357 to +360

This comment was marked as outdated.

# Update phase executor to use the renamed directory
phase_executor.spec_dir = self.spec_dir
phase_executor.spec_validator = self.validator

# Update task description from requirements
req = requirements.load_requirements(self.spec_dir)
Expand All @@ -338,6 +383,7 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
task_logger.end_phase(
LogPhase.PLANNING, success=False, message="Complexity assessment failed"
)
self._emit_planning_failed("Complexity assessment failed")
return False

# Map of all available phases
Expand Down Expand Up @@ -401,6 +447,9 @@ def run_phase(name: str, phase_fn: Callable) -> phases.PhaseResult:
success=False,
message=f"Phase {phase_name} failed",
)
self._emit_planning_failed(
f"Phase '{phase_name}' failed: {'; '.join(result.errors)}"
)
return False

# Summary
Expand Down Expand Up @@ -638,6 +687,25 @@ def _print_completion_summary(
)
)

def _emit_planning_failed(self, error: str) -> None:
"""Emit PLANNING_FAILED event so the frontend transitions to error state.

Without this, the task stays stuck in 'planning' / 'in_progress' forever
when spec creation fails, because the XState machine never receives a
terminal event.

Args:
error: Human-readable error description
"""
try:
task_emitter = TaskEventEmitter.from_spec_dir(self.spec_dir)
task_emitter.emit(
"PLANNING_FAILED",
{"error": error, "recoverable": True},
)
except Exception:
pass # Best effort - don't mask the original failure
Comment on lines +690 to +707
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Clean helper; consider logging the swallowed exception for debuggability.

The bare pass on Line 703–704 silences all failures, including unexpected ones (e.g., self.spec_dir is None). A debug-level log would help troubleshoot without masking the original error.

Suggested improvement
         except Exception:
-            pass  # Best effort - don't mask the original failure
+            # Best effort - don't mask the original failure.
+            # Logging here could itself fail, so keep it minimal.
+            import sys
+            try:
+                sys.stderr.write("[orchestrator] _emit_planning_failed failed\n")
+            except Exception:
+                pass
🤖 Prompt for AI Agents
In `@apps/backend/spec/pipeline/orchestrator.py` around lines 687 - 704, The
except block in _emit_planning_failed currently swallows all exceptions; change
it to capture the exception (except Exception as e) and emit a debug-level log
so failures building or emitting the PLANNING_FAILED event are visible for
debugging. Use the module/class logger (e.g., logger.debug or self.logger.debug
if available) to log a short message including the exception and context (e.g.,
spec_dir and that TaskEventEmitter.from_spec_dir or task_emitter.emit failed)
while still not raising the error so original failure isn’t masked; reference
_emit_planning_failed, TaskEventEmitter.from_spec_dir, and task_emitter.emit
when making the change.


def _run_review_checkpoint(self, auto_approve: bool) -> bool:
"""Run the human review checkpoint.

Expand Down Expand Up @@ -696,19 +764,25 @@ def _rename_spec_dir_from_requirements(self) -> bool:
The functionality has been moved to models.rename_spec_dir_from_requirements.

Returns:
True if successful or not needed, False on error
True if successful or not needed, False if prerequisites are missing
"""
result = rename_spec_dir_from_requirements(self.spec_dir)
# Update self.spec_dir if it was renamed
if result and self.spec_dir.name.endswith("-pending"):
# Find the renamed directory
parent = self.spec_dir.parent
prefix = self.spec_dir.name[:4] # e.g., "001-"
for candidate in parent.iterdir():
if (
candidate.name.startswith(prefix)
and "pending" not in candidate.name
):
self.spec_dir = candidate
break
return result
# Check prerequisites first
requirements_file = self.spec_dir / "requirements.json"
if not requirements_file.exists():
return False

try:
with open(requirements_file, encoding="utf-8") as f:
req = json.load(f)
task_desc = req.get("task_description", "")
if not task_desc:
return False
except (json.JSONDecodeError, OSError):
return False

# Attempt rename
new_spec_dir = rename_spec_dir_from_requirements(self.spec_dir)
if new_spec_dir != self.spec_dir:
self.spec_dir = new_spec_dir
self.validator = SpecValidator(self.spec_dir)
return True
78 changes: 56 additions & 22 deletions apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,26 @@ export function registerTaskExecutionHandlers(

console.warn('[TASK_START] Found task:', task.specId, 'status:', task.status, 'reviewReason:', task.reviewReason, 'subtasks:', task.subtasks.length);

// Check if implementation_plan.json has valid subtasks BEFORE XState handling.
// This is more reliable than task.subtasks.length which may not be loaded yet.
const specsBaseDir = getSpecsDir(project.autoBuildPath);
const specDir = path.join(
project.path,
specsBaseDir,
task.specId
);
const planFilePath = path.join(specDir, AUTO_BUILD_PATHS.IMPLEMENTATION_PLAN);
let planHasSubtasks = false;
const planContent = safeReadFileSync(planFilePath);
if (planContent) {
try {
const plan = JSON.parse(planContent);
planHasSubtasks = checkSubtasksCompletion(plan).totalCount > 0;
} catch {
// Invalid/corrupt plan file - treat as no subtasks
}
}

// Immediately mark as started so the UI moves the card to In Progress.
// Use XState actor state as source of truth (if actor exists), with task data as fallback.
// - plan_review: User approved the plan, send PLAN_APPROVED to transition to coding
Expand All @@ -173,6 +193,11 @@ export function registerTaskExecutionHandlers(
// XState says plan_review - send PLAN_APPROVED
console.warn('[TASK_START] XState: plan_review -> coding via PLAN_APPROVED');
taskStateManager.handleUiEvent(taskId, { type: 'PLAN_APPROVED' }, task, project);
} else if (currentXState === 'error' && !planHasSubtasks) {
// FIX (#1562): Task crashed during planning (no subtasks yet).
// Uses planHasSubtasks from implementation_plan.json (more reliable than task.subtasks.length).
console.warn('[TASK_START] XState: error with no plan subtasks -> planning via PLANNING_STARTED');
taskStateManager.handleUiEvent(taskId, { type: 'PLANNING_STARTED' }, task, project);
} else if (currentXState === 'human_review' || currentXState === 'error') {
// XState says human_review or error - send USER_RESUMED
console.warn('[TASK_START] XState:', currentXState, '-> coding via USER_RESUMED');
Expand All @@ -186,6 +211,11 @@ export function registerTaskExecutionHandlers(
// No XState actor - fallback to task data (e.g., after app restart)
console.warn('[TASK_START] No XState actor, task data: plan_review -> coding via PLAN_APPROVED');
taskStateManager.handleUiEvent(taskId, { type: 'PLAN_APPROVED' }, task, project);
} else if (task.status === 'error' && !planHasSubtasks) {
// FIX (#1562): No XState actor, task crashed during planning (no subtasks).
// Uses planHasSubtasks from implementation_plan.json (more reliable than task.subtasks.length).
console.warn('[TASK_START] No XState actor, error with no plan subtasks -> planning via PLANNING_STARTED');
taskStateManager.handleUiEvent(taskId, { type: 'PLANNING_STARTED' }, task, project);
} else if (task.status === 'human_review' || task.status === 'error') {
// No XState actor - fallback to task data for resuming
console.warn('[TASK_START] No XState actor, task data:', task.status, '-> coding via USER_RESUMED');
Expand All @@ -204,13 +234,7 @@ export function registerTaskExecutionHandlers(
console.warn(`[TASK_START] Reset ${resetResult.resetCount} stuck subtask(s) before starting`);
}

// Start file watcher for this task
const specsBaseDir = getSpecsDir(project.autoBuildPath);
const specDir = path.join(
project.path,
specsBaseDir,
task.specId
);
// Start file watcher for this task (specsBaseDir and specDir already computed above)
fileWatcher.watch(taskId, specDir);

// Check if spec.md exists (indicates spec creation was already done or in progress)
Expand All @@ -220,9 +244,13 @@ export function registerTaskExecutionHandlers(
// Check if this task needs spec creation first (no spec file = not yet created)
// OR if it has a spec but no implementation plan subtasks (spec created, needs planning/building)
const needsSpecCreation = !hasSpec;
const needsImplementation = hasSpec && task.subtasks.length === 0;
// FIX (#1562): Check actual plan file for subtasks, not just task.subtasks.length.
// When a task crashes during planning, it may have spec.md but an empty/missing
// implementation_plan.json. Previously, this path would call startTaskExecution
// (run.py) which expects subtasks to exist. Now we check the actual plan file.
const needsImplementation = hasSpec && !planHasSubtasks;

console.warn('[TASK_START] hasSpec:', hasSpec, 'needsSpecCreation:', needsSpecCreation, 'needsImplementation:', needsImplementation);
console.warn('[TASK_START] hasSpec:', hasSpec, 'planHasSubtasks:', planHasSubtasks, 'needsSpecCreation:', needsSpecCreation, 'needsImplementation:', needsImplementation);

// Get base branch: task-level override takes precedence over project settings
const baseBranch = task.metadata?.baseBranch || project.settings?.mainBranch;
Expand All @@ -237,18 +265,12 @@ export function registerTaskExecutionHandlers(
// Also pass baseBranch so worktrees are created from the correct branch
agentManager.startSpecCreation(taskId, project.path, taskDescription, specDir, task.metadata, baseBranch, project.id);
} else if (needsImplementation) {
// Spec exists but no subtasks - run run.py to create implementation plan and execute
// Read the spec.md to get the task description
const _taskDescription = task.description || task.title;
try {
readFileSync(specFilePath, 'utf-8');
} catch {
// Use default description
}

console.warn('[TASK_START] Starting task execution (no subtasks) for:', task.specId);
// Start task execution which will create the implementation plan
// Note: No parallel mode for planning phase - parallel only makes sense with multiple subtasks
// Spec exists but no valid subtasks in implementation plan
// FIX (#1562): Use startTaskExecution (run.py) which will create the planner
// agent session to generate the implementation plan. run.py handles the case
// where implementation_plan.json is missing or has no subtasks - the planner
// agent will generate the plan before the coder starts.
console.warn('[TASK_START] Starting task execution (no valid subtasks in plan) for:', task.specId);
agentManager.startTaskExecution(
taskId,
project.path,
Expand Down Expand Up @@ -716,7 +738,19 @@ export function registerTaskExecutionHandlers(
const specFilePath = path.join(specDir, AUTO_BUILD_PATHS.SPEC_FILE);
const hasSpec = existsSync(specFilePath);
const needsSpecCreation = !hasSpec;
const needsImplementation = hasSpec && task.subtasks.length === 0;
// FIX (#1562): Check actual plan file for subtasks, not just task.subtasks.length
const updatePlanFilePath = path.join(specDir, AUTO_BUILD_PATHS.IMPLEMENTATION_PLAN);
let updatePlanHasSubtasks = false;
const updatePlanContent = safeReadFileSync(updatePlanFilePath);
if (updatePlanContent) {
try {
const plan = JSON.parse(updatePlanContent);
updatePlanHasSubtasks = checkSubtasksCompletion(plan).totalCount > 0;
} catch {
// Invalid/corrupt plan file - treat as no subtasks
}
}
const needsImplementation = hasSpec && !updatePlanHasSubtasks;
Comment on lines +741 to +753
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Duplicated plan-reading logic — extract a helper.

The pattern of reading the plan file → parsing JSON → calling checkSubtasksCompletion is repeated here (lines 741–752) and in the TASK_START handler (lines 164–182). Extract a small helper like planFileHasSubtasks(planFilePath: string): boolean to DRY this up and reduce the chance of the two paths diverging.

♻️ Proposed helper and usage
+/**
+ * Check if an implementation_plan.json file contains valid subtasks.
+ */
+function planFileHasSubtasks(planFilePath: string): boolean {
+  const content = safeReadFileSync(planFilePath);
+  if (!content) return false;
+  try {
+    const plan = JSON.parse(content);
+    return checkSubtasksCompletion(plan).totalCount > 0;
+  } catch {
+    return false;
+  }
+}

Then replace the inline blocks in both TASK_START and TASK_UPDATE_STATUS:

-      let planHasSubtasks = false;
-      const planContent = safeReadFileSync(planFilePath);
-      if (planContent) {
-        try {
-          const plan = JSON.parse(planContent);
-          planHasSubtasks = checkSubtasksCompletion(plan).totalCount > 0;
-        } catch {
-          // Invalid/corrupt plan file - treat as no subtasks
-        }
-      }
+      const planHasSubtasks = planFileHasSubtasks(planFilePath);
🤖 Prompt for AI Agents
In `@apps/frontend/src/main/ipc-handlers/task/execution-handlers.ts` around lines
741 - 753, Extract a helper function planFileHasSubtasks(planFilePath: string):
boolean that encapsulates the current pattern: read file with safeReadFileSync,
JSON.parse the content inside a try/catch, call
checkSubtasksCompletion(parsedPlan).totalCount > 0 and return that result (false
on read/parse errors or missing file); then replace the duplicated inline logic
in both the TASK_UPDATE_STATUS handler (where updatePlanFilePath /
updatePlanHasSubtasks / needsImplementation are used) and the TASK_START handler
(the block using specDir, AUTO_BUILD_PATHS.IMPLEMENTATION_PLAN and
checkSubtasksCompletion) to call planFileHasSubtasks(...) instead so both paths
share the same implementation.


console.warn('[TASK_UPDATE_STATUS] hasSpec:', hasSpec, 'needsSpecCreation:', needsSpecCreation, 'needsImplementation:', needsImplementation);

Expand Down
6 changes: 5 additions & 1 deletion apps/frontend/src/shared/state-machines/task-machine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,16 @@ export const taskMachine = createMachine(
on: {
CREATE_PR: 'creating_pr',
MARK_DONE: 'done',
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' }
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' },
// Allow restarting planning from human_review (e.g., incomplete task with no subtasks)
PLANNING_STARTED: { target: 'planning', actions: 'clearReviewReason' }
}
},
error: {
on: {
USER_RESUMED: { target: 'coding', actions: 'clearReviewReason' },
// Allow restarting from error back to planning (e.g., spec creation crashed)
PLANNING_STARTED: { target: 'planning', actions: 'clearReviewReason' },
MARK_DONE: 'done'
}
},
Expand Down
Loading