diff --git a/apps/backend/cli/qa_commands.py b/apps/backend/cli/qa_commands.py index 7c1298e86c..95dcd11d04 100644 --- a/apps/backend/cli/qa_commands.py +++ b/apps/backend/cli/qa_commands.py @@ -101,8 +101,12 @@ def handle_qa_command( print("\n✅ Build already approved by QA.") else: completed, total = count_subtasks(spec_dir) - print(f"\n❌ Build not complete ({completed}/{total} subtasks).") - print("Complete all subtasks before running QA validation.") + print( + f"\n❌ Build not ready for QA ({completed}/{total} subtasks completed)." + ) + print( + "All subtasks must reach a terminal state (completed, failed, or stuck) before running QA." + ) return if has_human_feedback: diff --git a/apps/backend/core/progress.py b/apps/backend/core/progress.py index ffe74da7c9..7f69be7208 100644 --- a/apps/backend/core/progress.py +++ b/apps/backend/core/progress.py @@ -115,6 +115,65 @@ def is_build_complete(spec_dir: Path) -> bool: return total > 0 and completed == total +def _load_stuck_subtask_ids(spec_dir: Path) -> set[str]: + """Load IDs of subtasks marked as stuck from attempt_history.json.""" + stuck_subtask_ids: set[str] = set() + attempt_history_file = spec_dir / "memory" / "attempt_history.json" + if attempt_history_file.exists(): + try: + with open(attempt_history_file, encoding="utf-8") as f: + attempt_history = json.load(f) + for entry in attempt_history.get("stuck_subtasks", []): + if "subtask_id" in entry: + stuck_subtask_ids.add(entry["subtask_id"]) + except (OSError, json.JSONDecodeError, UnicodeDecodeError): + # Corrupted attempt history is non-fatal; skip stuck-subtask filtering + pass + return stuck_subtask_ids + + +def is_build_ready_for_qa(spec_dir: Path) -> bool: + """ + Check if the build is ready for QA validation. + + Unlike is_build_complete() which requires all subtasks to be "completed", + this function considers the build ready when all subtasks have reached + a terminal state: completed, failed, or stuck (exhausted retries in attempt_history.json). + + Args: + spec_dir: Directory containing implementation_plan.json + + Returns: + True if all subtasks are in a terminal state, False otherwise + """ + plan_file = spec_dir / "implementation_plan.json" + if not plan_file.exists(): + return False + + stuck_subtask_ids = _load_stuck_subtask_ids(spec_dir) + + try: + with open(plan_file, encoding="utf-8") as f: + plan = json.load(f) + + total = 0 + terminal = 0 + + for phase in plan.get("phases", []): + for subtask in phase.get("subtasks", []): + total += 1 + status = subtask.get("status", "pending") + subtask_id = subtask.get("id") + + if status in ("completed", "failed") or subtask_id in stuck_subtask_ids: + terminal += 1 + + return total > 0 and terminal == total + + except (OSError, json.JSONDecodeError, UnicodeDecodeError): + return False + + def get_progress_percentage(spec_dir: Path) -> float: """ Get the progress as a percentage. @@ -420,22 +479,7 @@ def get_next_subtask(spec_dir: Path) -> dict | None: if not plan_file.exists(): return None - # Load stuck subtasks from recovery manager's attempt history - stuck_subtask_ids = set() - attempt_history_file = spec_dir / "memory" / "attempt_history.json" - if attempt_history_file.exists(): - try: - with open(attempt_history_file, encoding="utf-8") as f: - attempt_history = json.load(f) - # Collect IDs of subtasks marked as stuck - stuck_subtask_ids = { - entry["subtask_id"] - for entry in attempt_history.get("stuck_subtasks", []) - if "subtask_id" in entry - } - except (OSError, json.JSONDecodeError, UnicodeDecodeError): - # If we can't read the file, continue without stuck checking - pass + stuck_subtask_ids = _load_stuck_subtask_ids(spec_dir) try: with open(plan_file, encoding="utf-8") as f: diff --git a/apps/backend/progress.py b/apps/backend/progress.py index 5cc2afeae5..96d56c8892 100644 --- a/apps/backend/progress.py +++ b/apps/backend/progress.py @@ -14,6 +14,7 @@ get_plan_summary, get_progress_percentage, is_build_complete, + is_build_ready_for_qa, print_build_complete_banner, print_paused_banner, print_progress_summary, @@ -29,6 +30,7 @@ "get_plan_summary", "get_progress_percentage", "is_build_complete", + "is_build_ready_for_qa", "print_build_complete_banner", "print_paused_banner", "print_progress_summary", diff --git a/apps/backend/qa/criteria.py b/apps/backend/qa/criteria.py index d58906d243..18ada8169d 100644 --- a/apps/backend/qa/criteria.py +++ b/apps/backend/qa/criteria.py @@ -8,7 +8,7 @@ import json from pathlib import Path -from progress import is_build_complete +from progress import is_build_ready_for_qa # ============================================================================= # IMPLEMENTATION PLAN I/O @@ -95,10 +95,10 @@ def should_run_qa(spec_dir: Path) -> bool: Determine if QA validation should run. QA should run when: - - All subtasks are completed + - All subtasks have reached a terminal state (completed, failed, or stuck) - QA has not yet approved """ - if not is_build_complete(spec_dir): + if not is_build_ready_for_qa(spec_dir): return False if is_qa_approved(spec_dir): diff --git a/apps/backend/qa/loop.py b/apps/backend/qa/loop.py index 577f38d879..9bf7f5d776 100644 --- a/apps/backend/qa/loop.py +++ b/apps/backend/qa/loop.py @@ -28,7 +28,7 @@ get_phase_model_betas, ) from phase_event import ExecutionPhase, emit_phase -from progress import count_subtasks, is_build_complete +from progress import count_subtasks, is_build_ready_for_qa from security.constants import PROJECT_DIR_ENV_VAR from task_logger import ( LogPhase, @@ -114,14 +114,25 @@ async def run_qa_validation_loop( # Initialize task logger for the validation phase task_logger = get_task_logger(spec_dir) - # Verify build is complete - if not is_build_complete(spec_dir): - debug_warning("qa_loop", "Build is not complete, cannot run QA") - print("\n❌ Build is not complete. Cannot run QA validation.") - completed, total = count_subtasks(spec_dir) - debug("qa_loop", "Build progress", completed=completed, total=total) - print(f" Progress: {completed}/{total} subtasks completed") - return False + # Check if there's pending human feedback that needs to be processed + fix_request_file = spec_dir / "QA_FIX_REQUEST.md" + has_human_feedback = fix_request_file.exists() + + # Human feedback takes priority — if the user explicitly asked to proceed, + # skip the build completeness gate entirely + if not has_human_feedback: + # Verify build is ready for QA (all subtasks in terminal state) + if not is_build_ready_for_qa(spec_dir): + debug_warning( + "qa_loop", "Build is not ready for QA - subtasks still in progress" + ) + print("\n❌ Build is not ready for QA validation.") + completed, total = count_subtasks(spec_dir) + debug("qa_loop", "Build progress", completed=completed, total=total) + print( + f" Progress: {completed}/{total} subtasks in terminal state (completed/failed/stuck)" + ) + return False # Emit phase event at start of QA validation (before any early returns) emit_phase(ExecutionPhase.QA_REVIEW, "Starting QA validation") @@ -136,10 +147,6 @@ async def run_qa_validation_loop( f"[Fast Mode] {'ENABLED' if fast_mode else 'disabled'} for QA validation", ) - # Check if there's pending human feedback that needs to be processed - fix_request_file = spec_dir / "QA_FIX_REQUEST.md" - has_human_feedback = fix_request_file.exists() - # Check if already approved - but if there's human feedback, we need to process it first if is_qa_approved(spec_dir) and not has_human_feedback: debug_success("qa_loop", "Build already approved by QA") diff --git a/apps/backend/services/recovery.py b/apps/backend/services/recovery.py index af9eb6f7a1..d23af5cc5c 100644 --- a/apps/backend/services/recovery.py +++ b/apps/backend/services/recovery.py @@ -21,6 +21,8 @@ from enum import Enum from pathlib import Path +from core.file_utils import write_json_atomic + # Recovery manager configuration ATTEMPT_WINDOW_SECONDS = 7200 # Only count attempts within last 2 hours MAX_ATTEMPT_HISTORY_PER_SUBTASK = 50 # Cap stored attempts per subtask @@ -514,6 +516,36 @@ def mark_subtask_stuck(self, subtask_id: str, reason: str) -> None: self._save_attempt_history(history) + # Also update the subtask status in implementation_plan.json + # so that other callers (like is_build_ready_for_qa) see accurate status + try: + plan_file = self.spec_dir / "implementation_plan.json" + if plan_file.exists(): + with open(plan_file, encoding="utf-8") as f: + plan = json.load(f) + + updated = False + for phase in plan.get("phases", []): + for subtask in phase.get("subtasks", []): + if subtask.get("id") == subtask_id: + subtask["status"] = "failed" + stuck_note = f"Marked as stuck: {reason}" + existing = subtask.get("actual_output", "") + subtask["actual_output"] = ( + f"{stuck_note}\n{existing}" if existing else stuck_note + ) + updated = True + break + if updated: + break + + if updated: + write_json_atomic(plan_file, plan, indent=2) + except (OSError, json.JSONDecodeError, UnicodeDecodeError) as e: + logger.warning( + f"Failed to update implementation_plan.json for stuck subtask {subtask_id}: {e}" + ) + def get_stuck_subtasks(self) -> list[dict]: """ Get all subtasks marked as stuck. diff --git a/tests/agents/test_agent_flow.py b/tests/agents/test_agent_flow.py index cb7973fb70..0437871bc8 100644 --- a/tests/agents/test_agent_flow.py +++ b/tests/agents/test_agent_flow.py @@ -922,8 +922,8 @@ class TestQALoopStateTransitions: def test_qa_not_required_when_build_incomplete(self, test_env): """QA should not run when build is incomplete.""" from qa_loop import save_implementation_plan - # Import the real is_build_complete to patch at the right level - from core.progress import is_build_complete as real_is_build_complete + # Import the real is_build_ready_for_qa to patch at the right level + from core.progress import is_build_ready_for_qa as real_is_build_ready_for_qa temp_dir, spec_dir, project_dir = test_env @@ -943,16 +943,16 @@ def test_qa_not_required_when_build_incomplete(self, test_env): } save_implementation_plan(spec_dir, plan) - # Patch is_build_complete where it's used (qa.criteria) to use real implementation + # Patch is_build_ready_for_qa where it's used (qa.criteria) to use real implementation # This is needed because test_qa_criteria.py module-level mocks may pollute - with patch('qa.criteria.is_build_complete', side_effect=real_is_build_complete): + with patch('qa.criteria.is_build_ready_for_qa', side_effect=real_is_build_ready_for_qa): from qa.criteria import should_run_qa assert should_run_qa(spec_dir) is False, "QA should not run with pending subtasks" def test_qa_required_when_build_complete(self, test_env): """QA should run when build is complete and not yet approved.""" from qa_loop import save_implementation_plan - from core.progress import is_build_complete as real_is_build_complete + from core.progress import is_build_ready_for_qa as real_is_build_ready_for_qa temp_dir, spec_dir, project_dir = test_env @@ -972,15 +972,15 @@ def test_qa_required_when_build_complete(self, test_env): } save_implementation_plan(spec_dir, plan) - # Patch is_build_complete where it's used (qa.criteria) to use real implementation - with patch('qa.criteria.is_build_complete', side_effect=real_is_build_complete): + # Patch is_build_ready_for_qa where it's used (qa.criteria) to use real implementation + with patch('qa.criteria.is_build_ready_for_qa', side_effect=real_is_build_ready_for_qa): from qa.criteria import should_run_qa assert should_run_qa(spec_dir) is True, "QA should run when build complete" def test_qa_not_required_when_already_approved(self, test_env): """QA should not run when build is already approved.""" from qa_loop import save_implementation_plan - from core.progress import is_build_complete as real_is_build_complete + from core.progress import is_build_ready_for_qa as real_is_build_ready_for_qa temp_dir, spec_dir, project_dir = test_env @@ -1003,8 +1003,8 @@ def test_qa_not_required_when_already_approved(self, test_env): } save_implementation_plan(spec_dir, plan) - # Patch is_build_complete where it's used (qa.criteria) to use real implementation - with patch('qa.criteria.is_build_complete', side_effect=real_is_build_complete): + # Patch is_build_ready_for_qa where it's used (qa.criteria) to use real implementation + with patch('qa.criteria.is_build_ready_for_qa', side_effect=real_is_build_ready_for_qa): from qa.criteria import should_run_qa assert should_run_qa(spec_dir) is False, "QA should not run when already approved" diff --git a/tests/test_cli_qa_commands.py b/tests/test_cli_qa_commands.py index a06357a1e9..07d192228f 100644 --- a/tests/test_cli_qa_commands.py +++ b/tests/test_cli_qa_commands.py @@ -284,7 +284,7 @@ def test_incomplete_build_message( ) captured = capsys.readouterr() - assert "Build not complete" in captured.out + assert "Build not ready for QA" in captured.out assert "1/2" in captured.out def test_processes_human_feedback( diff --git a/tests/test_progress_qa_readiness.py b/tests/test_progress_qa_readiness.py new file mode 100644 index 0000000000..6887e3cf32 --- /dev/null +++ b/tests/test_progress_qa_readiness.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python3 +""" +Tests for Progress Module - QA Readiness Check +=============================================== + +Tests the core/progress.py is_build_ready_for_qa() function which determines +if a build has reached a terminal state (all subtasks completed, failed, or stuck). + +This function differs from is_build_complete() in that it considers builds with +failed/stuck subtasks as ready for QA validation. +""" + +import json +import sys +from pathlib import Path + +import pytest + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "apps" / "backend")) + +from core.progress import is_build_ready_for_qa + + +@pytest.fixture +def spec_dir(tmp_path): + """Create a spec directory for testing.""" + spec = tmp_path / "spec" + spec.mkdir() + return spec + + +@pytest.fixture +def memory_dir(spec_dir): + """Create a memory directory for attempt_history.json.""" + memory = spec_dir / "memory" + memory.mkdir() + return memory + + +class TestIsBuildReadyForQA: + """Tests for is_build_ready_for_qa function.""" + + def test_all_subtasks_completed(self, spec_dir: Path): + """Returns True when all subtasks are completed.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "completed"}, + ], + }, + { + "phase": 2, + "name": "Phase 2", + "subtasks": [ + {"id": "subtask-2-1", "status": "completed"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_mix_completed_and_pending(self, spec_dir: Path): + """Returns False when some subtasks are still pending.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "pending"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_mix_completed_and_failed(self, spec_dir: Path): + """Returns True when all subtasks are terminal (completed + failed).""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "failed"}, + ], + }, + { + "phase": 2, + "name": "Phase 2", + "subtasks": [ + {"id": "subtask-2-1", "status": "completed"}, + {"id": "subtask-2-2", "status": "failed"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_subtask_stuck_in_attempt_history(self, spec_dir: Path, memory_dir: Path): + """Returns True when subtask is marked stuck in attempt_history even if plan shows pending.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "pending"}, # Stuck but plan not updated + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + # Create attempt_history with stuck subtask + attempt_history = { + "stuck_subtasks": [ + { + "subtask_id": "subtask-1-2", + "reason": "Circular fix after 3 attempts", + "escalated_at": "2024-01-01T12:00:00Z", + "attempt_count": 3, + } + ], + "subtasks": {}, + } + history_file = memory_dir / "attempt_history.json" + history_file.write_text(json.dumps(attempt_history)) + + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_no_plan_file(self, spec_dir: Path): + """Returns False when implementation_plan.json doesn't exist.""" + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_empty_phases(self, spec_dir: Path): + """Returns False when plan has no subtasks (total=0).""" + plan = { + "feature": "Test Feature", + "phases": [], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_phases_with_no_subtasks(self, spec_dir: Path): + """Returns False when phases exist but contain no subtasks.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_no_attempt_history_file(self, spec_dir: Path): + """Returns True based on plan file alone when attempt_history.json doesn't exist.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "failed"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + # No attempt_history.json created + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_invalid_json_in_attempt_history(self, spec_dir: Path, memory_dir: Path): + """Gracefully handles invalid JSON in attempt_history and falls back to plan-only check.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + # Create invalid JSON in attempt_history + history_file = memory_dir / "attempt_history.json" + history_file.write_text("{ invalid json }") + + # Should fallback to plan-only check and return True + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_invalid_json_in_plan(self, spec_dir: Path): + """Returns False when implementation_plan.json contains invalid JSON.""" + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text("{ invalid json }") + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_empty_plan_file(self, spec_dir: Path): + """Returns False when implementation_plan.json is empty.""" + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text("") + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_multiple_stuck_subtasks(self, spec_dir: Path, memory_dir: Path): + """Returns True when multiple subtasks are stuck in attempt_history.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "pending"}, + {"id": "subtask-1-2", "status": "pending"}, + {"id": "subtask-1-3", "status": "completed"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + # Mark two subtasks as stuck + attempt_history = { + "stuck_subtasks": [ + {"subtask_id": "subtask-1-1", "reason": "Error 1"}, + {"subtask_id": "subtask-1-2", "reason": "Error 2"}, + ], + "subtasks": {}, + } + history_file = memory_dir / "attempt_history.json" + history_file.write_text(json.dumps(attempt_history)) + + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_mix_of_all_terminal_states(self, spec_dir: Path, memory_dir: Path): + """Returns True with completed, failed, and stuck subtasks.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "failed"}, + {"id": "subtask-1-3", "status": "pending"}, # Will be stuck + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + attempt_history = { + "stuck_subtasks": [ + {"subtask_id": "subtask-1-3", "reason": "Stuck"}, + ], + "subtasks": {}, + } + history_file = memory_dir / "attempt_history.json" + history_file.write_text(json.dumps(attempt_history)) + + result = is_build_ready_for_qa(spec_dir) + assert result is True + + def test_in_progress_status(self, spec_dir: Path): + """Returns False when subtasks are in_progress.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2", "status": "in_progress"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_missing_status_field(self, spec_dir: Path): + """Returns False when subtask has no status field (defaults to pending).""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed"}, + {"id": "subtask-1-2"}, # No status field + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_stuck_subtask_without_id_field(self, spec_dir: Path, memory_dir: Path): + """Ignores stuck subtasks without subtask_id field in attempt_history.""" + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "pending"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan)) + + # Malformed stuck subtask entry without subtask_id + attempt_history = { + "stuck_subtasks": [ + {"reason": "Error", "escalated_at": "2024-01-01T12:00:00Z"} + ], + "subtasks": {}, + } + history_file = memory_dir / "attempt_history.json" + history_file.write_text(json.dumps(attempt_history)) + + # Should return False since subtask-1-1 is still pending + result = is_build_ready_for_qa(spec_dir) + assert result is False + + def test_unicode_encoding_in_files(self, spec_dir: Path, memory_dir: Path): + """Handles UTF-8 encoded content correctly.""" + plan = { + "feature": "Test Feature 测试功能", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + {"id": "subtask-1-1", "status": "completed", "notes": "完成"}, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan, ensure_ascii=False), encoding="utf-8") + + attempt_history = { + "stuck_subtasks": [], + "subtasks": {}, + } + history_file = memory_dir / "attempt_history.json" + history_file.write_text(json.dumps(attempt_history, ensure_ascii=False), encoding="utf-8") + + result = is_build_ready_for_qa(spec_dir) + assert result is True diff --git a/tests/test_qa_criteria.py b/tests/test_qa_criteria.py index 41c874d416..c8fc0fc419 100644 --- a/tests/test_qa_criteria.py +++ b/tests/test_qa_criteria.py @@ -529,7 +529,7 @@ def test_should_run_qa_build_not_complete(self, spec_dir: Path): plan = {"feature": "Test", "phases": []} save_implementation_plan(spec_dir, plan) - with patch('qa.criteria.is_build_complete', return_value=False): + with patch('qa.criteria.is_build_ready_for_qa', return_value=False): result = should_run_qa(spec_dir) assert result is False @@ -540,15 +540,15 @@ def test_should_run_qa_already_approved(self, spec_dir: Path, qa_signoff_approve plan = {"feature": "Test", "qa_signoff": qa_signoff_approved} save_implementation_plan(spec_dir, plan) - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): result = should_run_qa(spec_dir) assert result is False def test_should_run_qa_build_complete_not_approved(self, spec_dir: Path): """Returns True when build complete but not approved.""" - # Explicitly patch is_build_complete to return True + # Explicitly patch is_build_ready_for_qa to return True from unittest.mock import patch - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): plan = {"feature": "Test", "phases": []} save_implementation_plan(spec_dir, plan) @@ -563,15 +563,15 @@ def test_should_run_qa_rejected_status(self, spec_dir: Path, qa_signoff_rejected plan = {"feature": "Test", "qa_signoff": qa_signoff_rejected} save_implementation_plan(spec_dir, plan) - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): result = should_run_qa(spec_dir) assert result is True def test_should_run_qa_no_plan(self, spec_dir: Path): - """Returns False when no plan exists (build not complete).""" + """Returns False when no plan exists (build not ready).""" from unittest.mock import patch - with patch('qa.criteria.is_build_complete', return_value=False): + with patch('qa.criteria.is_build_ready_for_qa', return_value=False): result = should_run_qa(spec_dir) assert result is False @@ -905,7 +905,7 @@ def test_full_qa_workflow_approved_first_try(self, spec_dir: Path): save_implementation_plan(spec_dir, plan) # Should run QA - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): assert should_run_qa(spec_dir) is True # QA approves @@ -917,7 +917,7 @@ def test_full_qa_workflow_approved_first_try(self, spec_dir: Path): save_implementation_plan(spec_dir, plan) # Should not run QA again or fixes - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): assert should_run_qa(spec_dir) is False assert should_run_fixes(spec_dir) is False assert is_qa_approved(spec_dir) is True @@ -931,7 +931,7 @@ def test_full_qa_workflow_with_fixes(self, spec_dir: Path): save_implementation_plan(spec_dir, plan) # Should run QA - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): assert should_run_qa(spec_dir) is True # QA rejects @@ -979,5 +979,5 @@ def test_qa_workflow_max_iterations(self, spec_dir: Path): # Should not run more fixes after max iterations assert should_run_fixes(spec_dir) is False # But QA can still be run (to re-check) - with patch('qa.criteria.is_build_complete', return_value=True): + with patch('qa.criteria.is_build_ready_for_qa', return_value=True): assert should_run_qa(spec_dir) is True diff --git a/tests/test_recovery.py b/tests/test_recovery.py index 9741fdb5ca..cd40e4320d 100755 --- a/tests/test_recovery.py +++ b/tests/test_recovery.py @@ -250,6 +250,119 @@ def test_mark_subtask_stuck(test_env): assert history["status"] == "stuck", "Chunk status not updated to stuck" +def test_mark_subtask_stuck_updates_plan(test_env): + """Test that mark_subtask_stuck updates implementation_plan.json status.""" + temp_dir, spec_dir, project_dir = test_env + + # Create implementation_plan.json with subtask in_progress + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + { + "id": "subtask-1-1", + "description": "Implement feature A", + "status": "in_progress", + }, + { + "id": "subtask-1-2", + "description": "Implement feature B", + "status": "completed", + }, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan, indent=2)) + + manager = RecoveryManager(spec_dir, project_dir) + + # Record some attempts for subtask-1-1 + manager.record_attempt("subtask-1-1", 1, False, "Try 1", "Error 1") + manager.record_attempt("subtask-1-1", 2, False, "Try 2", "Error 2") + manager.record_attempt("subtask-1-1", 3, False, "Try 3", "Error 3") + + # Mark subtask-1-1 as stuck + reason = "Circular fix after 3 attempts" + manager.mark_subtask_stuck("subtask-1-1", reason) + + # Verify plan file was updated + with open(plan_file, encoding="utf-8") as f: + updated_plan = json.load(f) + + # Find the stuck subtask + subtask_1_1 = updated_plan["phases"][0]["subtasks"][0] + assert subtask_1_1["id"] == "subtask-1-1" + assert subtask_1_1["status"] == "failed", "Stuck subtask status should be 'failed'" + assert "actual_output" in subtask_1_1, "actual_output field should be added" + assert "Marked as stuck" in subtask_1_1["actual_output"], "actual_output should mention stuck status" + assert reason in subtask_1_1["actual_output"], "actual_output should include the reason" + + # Verify other subtask was not affected + subtask_1_2 = updated_plan["phases"][0]["subtasks"][1] + assert subtask_1_2["id"] == "subtask-1-2" + assert subtask_1_2["status"] == "completed", "Other subtask status should be unchanged" + + +def test_mark_subtask_stuck_plan_missing_subtask(test_env): + """Test mark_subtask_stuck when subtask doesn't exist in plan.""" + temp_dir, spec_dir, project_dir = test_env + + # Create plan without the subtask we'll mark as stuck + plan = { + "feature": "Test Feature", + "phases": [ + { + "phase": 1, + "name": "Phase 1", + "subtasks": [ + { + "id": "subtask-1-1", + "description": "Implement feature A", + "status": "completed", + }, + ], + }, + ], + } + plan_file = spec_dir / "implementation_plan.json" + plan_file.write_text(json.dumps(plan, indent=2)) + + manager = RecoveryManager(spec_dir, project_dir) + + # Mark a non-existent subtask as stuck + manager.mark_subtask_stuck("subtask-2-1", "Some error") + + # Verify plan file was not corrupted + with open(plan_file, encoding="utf-8") as f: + updated_plan = json.load(f) + + # Plan should remain unchanged + assert len(updated_plan["phases"]) == 1 + assert len(updated_plan["phases"][0]["subtasks"]) == 1 + assert updated_plan["phases"][0]["subtasks"][0]["status"] == "completed" + + +def test_mark_subtask_stuck_plan_missing_file(test_env): + """Test mark_subtask_stuck when implementation_plan.json doesn't exist.""" + temp_dir, spec_dir, project_dir = test_env + + manager = RecoveryManager(spec_dir, project_dir) + + # Record attempts and mark as stuck (should not crash) + manager.record_attempt("subtask-1", 1, False, "Try 1", "Error 1") + manager.mark_subtask_stuck("subtask-1", "Some error") + + # Verify stuck status in attempt_history + stuck_subtasks = manager.get_stuck_subtasks() + assert len(stuck_subtasks) == 1 + assert stuck_subtasks[0]["subtask_id"] == "subtask-1" + + def test_recovery_hints(test_env): """Test recovery hints generation.""" temp_dir, spec_dir, project_dir = test_env