-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
fix: resolve QA validation deadlock when subtasks are stuck or failed #1853
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,6 +115,61 @@ | |
| return total > 0 and completed == total | ||
|
|
||
|
|
||
| def is_build_ready_for_qa(spec_dir: Path) -> bool: | ||
| """ | ||
| Check if the build is ready for QA validation. | ||
|
|
||
| Unlike is_build_complete() which requires all subtasks to be "completed", | ||
| this function considers the build ready when all subtasks have reached | ||
| a terminal state: completed, failed, or stuck (exhausted retries in attempt_history.json). | ||
|
|
||
| Args: | ||
| spec_dir: Directory containing implementation_plan.json | ||
|
|
||
| Returns: | ||
| True if all subtasks are in a terminal state, False otherwise | ||
| """ | ||
| plan_file = spec_dir / "implementation_plan.json" | ||
| if not plan_file.exists(): | ||
| return False | ||
|
|
||
| # Load stuck subtask IDs from attempt_history.json | ||
| stuck_subtask_ids = set() | ||
| attempt_history_file = spec_dir / "memory" / "attempt_history.json" | ||
| if attempt_history_file.exists(): | ||
| try: | ||
| with open(attempt_history_file, encoding="utf-8") as f: | ||
| attempt_history = json.load(f) | ||
| stuck_subtask_ids = { | ||
| entry["subtask_id"] | ||
| for entry in attempt_history.get("stuck_subtasks", []) | ||
| if "subtask_id" in entry | ||
| } | ||
| except (OSError, json.JSONDecodeError, UnicodeDecodeError): | ||
| pass | ||
|
Comment on lines
+136
to
+149
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major Duplicated stuck-subtask loading logic — extract a shared helper. The block that reads Proposed helper+def _load_stuck_subtask_ids(spec_dir: Path) -> set[str]:
+ """Load IDs of subtasks marked as stuck from attempt_history.json."""
+ stuck_subtask_ids: set[str] = set()
+ attempt_history_file = spec_dir / "memory" / "attempt_history.json"
+ if attempt_history_file.exists():
+ try:
+ with open(attempt_history_file, encoding="utf-8") as f:
+ attempt_history = json.load(f)
+ stuck_subtask_ids = {
+ entry["subtask_id"]
+ for entry in attempt_history.get("stuck_subtasks", [])
+ if "subtask_id" in entry
+ }
+ except (OSError, json.JSONDecodeError, UnicodeDecodeError):
+ pass
+ return stuck_subtask_idsThen replace both call sites with 🤖 Prompt for AI Agents |
||
|
|
||
| try: | ||
| with open(plan_file, encoding="utf-8") as f: | ||
| plan = json.load(f) | ||
|
|
||
| total = 0 | ||
| terminal = 0 | ||
|
|
||
| for phase in plan.get("phases", []): | ||
| for subtask in phase.get("subtasks", []): | ||
| total += 1 | ||
| status = subtask.get("status", "pending") | ||
| subtask_id = subtask.get("id") | ||
|
|
||
| if status in ("completed", "failed") or subtask_id in stuck_subtask_ids: | ||
| terminal += 1 | ||
|
|
||
| return total > 0 and terminal == total | ||
|
|
||
| except (OSError, json.JSONDecodeError, UnicodeDecodeError): | ||
| return False | ||
|
|
||
|
|
||
| def get_progress_percentage(spec_dir: Path) -> float: | ||
| """ | ||
| Get the progress as a percentage. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ | |
| get_phase_model_betas, | ||
| ) | ||
| from phase_event import ExecutionPhase, emit_phase | ||
| from progress import count_subtasks, is_build_complete | ||
| from progress import count_subtasks, is_build_ready_for_qa | ||
| from security.constants import PROJECT_DIR_ENV_VAR | ||
| from task_logger import ( | ||
| LogPhase, | ||
|
|
@@ -114,14 +114,23 @@ async def run_qa_validation_loop( | |
| # Initialize task logger for the validation phase | ||
| task_logger = get_task_logger(spec_dir) | ||
|
|
||
| # Verify build is complete | ||
| if not is_build_complete(spec_dir): | ||
| debug_warning("qa_loop", "Build is not complete, cannot run QA") | ||
| print("\n❌ Build is not complete. Cannot run QA validation.") | ||
| completed, total = count_subtasks(spec_dir) | ||
| debug("qa_loop", "Build progress", completed=completed, total=total) | ||
| print(f" Progress: {completed}/{total} subtasks completed") | ||
| return False | ||
| # Check if there's pending human feedback that needs to be processed | ||
| fix_request_file = spec_dir / "QA_FIX_REQUEST.md" | ||
| has_human_feedback = fix_request_file.exists() | ||
|
|
||
| # Human feedback takes priority — if the user explicitly asked to proceed, | ||
| # skip the build completeness gate entirely | ||
| if not has_human_feedback: | ||
| # Verify build is ready for QA (all subtasks in terminal state) | ||
| if not is_build_ready_for_qa(spec_dir): | ||
| debug_warning( | ||
| "qa_loop", "Build is not ready for QA - subtasks still in progress" | ||
| ) | ||
| print("\n❌ Build is not ready for QA validation.") | ||
| completed, total = count_subtasks(spec_dir) | ||
| debug("qa_loop", "Build progress", completed=completed, total=total) | ||
| print(f" Progress: {completed}/{total} subtasks completed") | ||
| return False | ||
|
Comment on lines
+117
to
+133
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Find the count_subtasks function definition
rg -n 'def count_subtasks' --type=py -A 15Repository: AndyMik90/Auto-Claude Length of output: 1873 🏁 Script executed: #!/bin/bash
# Also search for is_build_ready_for_qa to understand the terminal state logic
rg -n 'def is_build_ready_for_qa' --type=py -A 20Repository: AndyMik90/Auto-Claude Length of output: 1568 🏁 Script executed: #!/bin/bash
# Check what files are in the qa directory
find apps/backend/qa -type f -name "*.py" | head -20Repository: AndyMik90/Auto-Claude Length of output: 250 🏁 Script executed: #!/bin/bash
# Get full count_subtasks implementation
sed -n '33,65p' apps/backend/core/progress.pyRepository: AndyMik90/Auto-Claude Length of output: 911 🏁 Script executed: #!/bin/bash
# Get full is_build_ready_for_qa implementation
sed -n '118,170p' apps/backend/core/progress.pyRepository: AndyMik90/Auto-Claude Length of output: 1926 🏁 Script executed: #!/bin/bash
# Check count_subtasks_detailed full implementation
sed -n '66,117p' apps/backend/core/progress.pyRepository: AndyMik90/Auto-Claude Length of output: 1370 🏁 Script executed: #!/bin/bash
# Get the context around line 130 in loop.py
sed -n '115,135p' apps/backend/qa/loop.pyRepository: AndyMik90/Auto-Claude Length of output: 1087 🏁 Script executed: #!/bin/bash
# Check if count_subtasks_detailed is used anywhere in loop.py or related files
rg -n 'count_subtasks_detailed' --type=py apps/backend/qa/Repository: AndyMik90/Auto-Claude Length of output: 47 Update the progress message to reflect terminal state semantics. When the QA readiness gate fails (line 130), the message shows Either update the message to show terminal/total count (e.g., 🤖 Prompt for AI Agents |
||
|
|
||
| # Emit phase event at start of QA validation (before any early returns) | ||
| emit_phase(ExecutionPhase.QA_REVIEW, "Starting QA validation") | ||
|
|
@@ -136,10 +145,6 @@ async def run_qa_validation_loop( | |
| f"[Fast Mode] {'ENABLED' if fast_mode else 'disabled'} for QA validation", | ||
| ) | ||
|
|
||
| # Check if there's pending human feedback that needs to be processed | ||
| fix_request_file = spec_dir / "QA_FIX_REQUEST.md" | ||
| has_human_feedback = fix_request_file.exists() | ||
|
|
||
| # Check if already approved - but if there's human feedback, we need to process it first | ||
| if is_qa_approved(spec_dir) and not has_human_feedback: | ||
| debug_success("qa_loop", "Build already approved by QA") | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -514,6 +514,33 @@ def mark_subtask_stuck(self, subtask_id: str, reason: str) -> None: | |||||||||
|
|
||||||||||
| self._save_attempt_history(history) | ||||||||||
|
|
||||||||||
| # Also update the subtask status in implementation_plan.json | ||||||||||
| # so that other callers (like is_build_ready_for_qa) see accurate status | ||||||||||
| try: | ||||||||||
| plan_file = self.spec_dir / "implementation_plan.json" | ||||||||||
| if plan_file.exists(): | ||||||||||
| with open(plan_file, encoding="utf-8") as f: | ||||||||||
| plan = json.load(f) | ||||||||||
|
|
||||||||||
| updated = False | ||||||||||
| for phase in plan.get("phases", []): | ||||||||||
| for subtask in phase.get("subtasks", []): | ||||||||||
| if subtask.get("id") == subtask_id: | ||||||||||
| subtask["status"] = "failed" | ||||||||||
| subtask["notes"] = f"Marked as stuck: {reason}" | ||||||||||
|
Comment on lines
+529
to
+530
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The introduction of a new To maintain data model consistency, I recommend using the existing Please also update the new test
Suggested change
|
||||||||||
| updated = True | ||||||||||
| break | ||||||||||
| if updated: | ||||||||||
| break | ||||||||||
|
|
||||||||||
| if updated: | ||||||||||
| with open(plan_file, "w", encoding="utf-8") as f: | ||||||||||
| json.dump(plan, f, indent=2) | ||||||||||
| except (OSError, json.JSONDecodeError, UnicodeDecodeError) as e: | ||||||||||
| logger.warning( | ||||||||||
| f"Failed to update implementation_plan.json for stuck subtask {subtask_id}: {e}" | ||||||||||
| ) | ||||||||||
|
Comment on lines
+517
to
+542
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Overwrites existing Line 530 unconditionally sets Proposed fix: prepend stuck reason to existing notes- subtask["notes"] = f"Marked as stuck: {reason}"
+ stuck_note = f"Marked as stuck: {reason}"
+ existing = subtask.get("notes", "")
+ subtask["notes"] = (
+ f"{stuck_note}\n{existing}" if existing else stuck_note
+ )🤖 Prompt for AI Agents |
||||||||||
|
|
||||||||||
| def get_stuck_subtasks(self) -> list[dict]: | ||||||||||
| """ | ||||||||||
| Get all subtasks marked as stuck. | ||||||||||
|
|
||||||||||
Check notice
Code scanning / CodeQL
Empty except Note
Copilot Autofix
AI 1 day ago
In general, empty
exceptblocks should either be removed (letting the exception propagate) or should perform some explicit handling such as logging, metrics, or substituting a safe default with a clear explanation. If ignoring an exception is truly desired, there should at least be a comment and usually a log statement to document that decision.For this specific case in
is_build_ready_for_qa, we want to keep the existing functional behavior: ifattempt_history.jsoncannot be read or parsed, the function should behave as though there are no stuck subtasks (stuck_subtask_idsremains an empty set) and continue. The best fix is therefore:except (OSError, json.JSONDecodeError, UnicodeDecodeError):block around loadingattempt_history_file, add a call to the existingloggerto record the failure, and optionally a brief doc comment that it is intentionally falling back to assuming no stuck subtasks.Only the block at lines 140–149 in
apps/backend/core/progress.pyneeds to change. No new imports are required becauseloggingandloggerare already present at the top of the file.