AndyMik90 · AndyMik90 · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/apps/backend/runners/github/services/followup_reviewer.py b/apps/backend/runners/github/services/followup_reviewer.py
@@ -886,7 +886,8 @@ async def _attempt_extraction_call(
         """Attempt a short SDK call with minimal schema to recover review data.
 
         This is the extraction recovery step when full structured output validation fails.
-        Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
+        Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
+        which has near-100% success rate.
 
         Uses create_client() + process_sdk_stream() for proper OAuth handling,
         matching the pattern in parallel_followup_reviewer.py.
@@ -900,7 +901,8 @@ async def _attempt_extraction_call(
             extraction_prompt = (
                 "Extract the key review data from the following AI analysis output. "
                 "Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
-                "one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
+                "structured summaries of any new findings (including severity, description, file path, and line number), "
+                "and counts of confirmed/dismissed findings.\n\n"
                 f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
             )
 
@@ -946,9 +948,16 @@ async def _attempt_extraction_call(
 
             # Convert extraction to internal format with reconstructed findings
             new_findings = []
-            for i, summary in enumerate(extracted.new_finding_summaries):
+            for i, summary_obj in enumerate(extracted.new_finding_summaries):
                 new_findings.append(
-                    create_finding_from_summary(summary, i, id_prefix="FR")
+                    create_finding_from_summary(
+                        summary=summary_obj.description,
+                        index=i,
+                        id_prefix="FR",
+                        severity_override=summary_obj.severity,
+                        file=summary_obj.file,
+                        line=summary_obj.line,
+                    )
                 )
 
             # Build finding_resolutions from extraction data for _apply_ai_resolutions

diff --git a/apps/backend/runners/github/services/parallel_followup_reviewer.py b/apps/backend/runners/github/services/parallel_followup_reviewer.py
@@ -1129,7 +1129,8 @@ async def _attempt_extraction_call(
         """Attempt a short SDK call with a minimal schema to recover review data.
 
         This is the Tier 2 recovery step when full structured output validation fails.
-        Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
+        Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
+        which has near-100% success rate.
 
         Returns parsed result dict on success, None on failure.
         """
@@ -1146,7 +1147,8 @@ async def _attempt_extraction_call(
             extraction_prompt = (
                 "Extract the key review data from the following AI analysis output. "
                 "Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
-                "one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
+                "structured summaries of any new findings (including severity, description, file path, and line number), "
+                "and counts of confirmed/dismissed findings.\n\n"
                 f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
             )
 
@@ -1205,10 +1207,17 @@ async def _attempt_extraction_call(
             findings = []
             new_finding_ids = []
 
-            # 1. Convert new_finding_summaries to minimal PRReviewFinding objects
-            # Uses shared helper for "SEVERITY: description" parsing and ID generation
-            for i, summary in enumerate(extracted.new_finding_summaries):
-                finding = create_finding_from_summary(summary, i, id_prefix="FU")
+            # 1. Convert new_finding_summaries to PRReviewFinding objects
+            # ExtractedFindingSummary objects carry file/line from extraction
+            for i, summary_obj in enumerate(extracted.new_finding_summaries):
+                finding = create_finding_from_summary(
+                    summary=summary_obj.description,
+                    index=i,
+                    id_prefix="FU",
+                    severity_override=summary_obj.severity,
+                    file=summary_obj.file,
+                    line=summary_obj.line,
+                )
                 new_finding_ids.append(finding.id)
                 findings.append(finding)
 

diff --git a/apps/backend/runners/github/services/pydantic_models.py b/apps/backend/runners/github/services/pydantic_models.py
@@ -533,10 +533,26 @@ class FindingValidationResponse(BaseModel):
 # =============================================================================
 
 
+class ExtractedFindingSummary(BaseModel):
+    """Per-finding summary with file location for extraction recovery."""
+
+    severity: str = Field(description="Severity level: LOW, MEDIUM, HIGH, or CRITICAL")
+    description: str = Field(description="One-line description of the finding")
+    file: str = Field(
+        default="unknown", description="File path where the issue was found"
+    )
+    line: int = Field(default=0, description="Line number in the file (0 if unknown)")
+
+    @field_validator("severity", mode="before")
+    @classmethod
+    def _normalize_severity(cls, v: str) -> str:
+        return _normalize_severity(v)
+
+
 class FollowupExtractionResponse(BaseModel):
     """Minimal extraction schema for recovering data when full structured output fails.
 
-    Deliberately kept small (~6 fields, no nesting) for near-100% validation success.
+    Uses ExtractedFindingSummary for new findings to preserve file/line information.
     Used as an intermediate recovery step before falling back to raw text parsing.
     """
 
@@ -552,9 +568,9 @@ class FollowupExtractionResponse(BaseModel):
         default_factory=list,
         description="IDs of previous findings that remain unresolved",
     )
-    new_finding_summaries: list[str] = Field(
+    new_finding_summaries: list[ExtractedFindingSummary] = Field(
         default_factory=list,
-        description="One-line summary of each new finding (e.g. 'HIGH: cleanup deletes QA-rejected specs in batch_commands.py')",
+        description="Structured summary of each new finding with file location",
     )
     confirmed_finding_count: int = Field(
         0, description="Number of findings confirmed as valid"

diff --git a/apps/backend/runners/github/services/recovery_utils.py b/apps/backend/runners/github/services/recovery_utils.py
@@ -80,6 +80,9 @@ def create_finding_from_summary(
     summary: str,
     index: int,
     id_prefix: str = "FR",
+    severity_override: str | None = None,
+    file: str = "unknown",
+    line: int = 0,
 ) -> PRReviewFinding:
     """Create a PRReviewFinding from an extraction summary string.
 
@@ -90,11 +93,20 @@ def create_finding_from_summary(
         summary: Raw summary string, e.g. "HIGH: Missing null check in parser.py"
         index: The index of the finding in the extraction list.
         id_prefix: ID prefix for traceability. Default "FR" (Followup Recovery).
+        severity_override: If provided, use this severity instead of parsing from summary.
+        file: File path where the issue was found (default "unknown").
+        line: Line number in the file (default 0).
 
     Returns:
         A PRReviewFinding with parsed severity, generated ID, and description.
     """
     severity, description = parse_severity_from_summary(summary)
+
+    # Use severity_override if provided
+    if severity_override is not None:
+        severity_map = {k.rstrip(":"): v for k, v in _EXTRACTION_SEVERITY_MAP}
+        severity = severity_map.get(severity_override.upper(), severity)
+
     finding_id = generate_recovery_finding_id(index, description, prefix=id_prefix)
 
     return PRReviewFinding(
@@ -103,6 +115,6 @@ def create_finding_from_summary(
         category=ReviewCategory.QUALITY,
         title=description[:80],
         description=f"[Recovered via extraction] {description}",
-        file="unknown",
-        line=0,
+        file=file,
+        line=line,
     )
diff --git a/tests/test_structured_output_recovery.py b/tests/test_structured_output_recovery.py
@@ -18,17 +18,22 @@
 # services/ package at both apps/backend/services/ and runners/github/services/.
 # To avoid collision, add the github services dir directly and import bare module names.
 _backend_dir = Path(__file__).parent.parent / "apps" / "backend"
-_github_services_dir = _backend_dir / "runners" / "github" / "services"
+_github_runner_dir = _backend_dir / "runners" / "github"
+_github_services_dir = _github_runner_dir / "services"
 if str(_backend_dir) not in sys.path:
     sys.path.insert(0, str(_backend_dir))
+if str(_github_runner_dir) not in sys.path:
+    sys.path.insert(0, str(_github_runner_dir))
 if str(_github_services_dir) not in sys.path:
     sys.path.insert(0, str(_github_services_dir))
 
 from agents.tools_pkg.models import AGENT_CONFIGS
 from pydantic_models import (
+    ExtractedFindingSummary,
     FollowupExtractionResponse,
     ParallelFollowupResponse,
 )
+from recovery_utils import create_finding_from_summary
 from sdk_utils import RECOVERABLE_ERRORS
 
 
@@ -53,20 +58,38 @@ def test_minimal_valid_response(self):
         assert resp.dismissed_finding_count == 0
 
     def test_full_valid_response(self):
-        """Accepts fully populated response."""
+        """Accepts fully populated response with ExtractedFindingSummary objects."""
         resp = FollowupExtractionResponse(
             verdict="READY_TO_MERGE",
             verdict_reasoning="All findings resolved",
             resolved_finding_ids=["NCR-001", "NCR-002"],
             unresolved_finding_ids=[],
-            new_finding_summaries=["HIGH: potential cleanup issue in batch_commands.py"],
+            new_finding_summaries=[
+                ExtractedFindingSummary(
+                    severity="HIGH",
+                    description="potential cleanup issue in batch_commands.py",
+                    file="apps/backend/cli/batch_commands.py",
+                    line=42,
+                )
+            ],
             confirmed_finding_count=1,
             dismissed_finding_count=1,
         )
         assert len(resp.resolved_finding_ids) == 2
         assert len(resp.new_finding_summaries) == 1
+        assert resp.new_finding_summaries[0].file == "apps/backend/cli/batch_commands.py"
+        assert resp.new_finding_summaries[0].line == 42
         assert resp.confirmed_finding_count == 1
 
+    def test_finding_summary_defaults(self):
+        """ExtractedFindingSummary defaults file='unknown' and line=0."""
+        summary = ExtractedFindingSummary(
+            severity="MEDIUM",
+            description="Some issue without location",
+        )
+        assert summary.file == "unknown"
+        assert summary.line == 0
+
     def test_schema_is_small(self):
         """Schema should be significantly smaller than ParallelFollowupResponse."""
         extraction_schema = json.dumps(
@@ -75,10 +98,11 @@ def test_schema_is_small(self):
         followup_schema = json.dumps(
             ParallelFollowupResponse.model_json_schema()
         )
-        # Extraction schema should be less than half the size of the full schema
-        assert len(extraction_schema) < len(followup_schema) / 2, (
+        # Actual ratio is ~50.7% after adding ExtractedFindingSummary nesting.
+        # Threshold at 55% gives headroom while still guarding against schema bloat.
+        assert len(extraction_schema) < len(followup_schema) * 0.55, (
             f"Extraction schema ({len(extraction_schema)} chars) should be "
-            f"less than half of full schema ({len(followup_schema)} chars)"
+            f"less than 55% of full schema ({len(followup_schema)} chars)"
         )
 
     def test_all_verdict_values_accepted(self):
@@ -143,3 +167,81 @@ def test_extraction_agent_low_thinking(self):
         """Extraction agent should use low thinking (lightweight call)."""
         config = AGENT_CONFIGS["pr_followup_extraction"]
         assert config["thinking_default"] == "low"
+
+
+# ============================================================================
+# Test create_finding_from_summary with file/line params
+# ============================================================================
+
+
+class TestCreateFindingFromSummary:
+    """Tests for create_finding_from_summary with file/line support."""
+
+    def test_backward_compatible_defaults(self):
+        """Calling without file/line still produces file='unknown', line=0."""
+        finding = create_finding_from_summary("HIGH: some issue", 0)
+        assert finding.file == "unknown"
+        assert finding.line == 0
+        assert finding.severity.value == "high"
+
+    def test_file_and_line_passed_through(self):
+        """File and line params are used in the resulting finding."""
+        finding = create_finding_from_summary(
+            summary="Missing null check",
+            index=0,
+            file="src/parser.py",
+            line=42,
+        )
+        assert finding.file == "src/parser.py"
+        assert finding.line == 42
+
+    def test_severity_override(self):
+        """severity_override takes precedence over parsed severity."""
+        finding = create_finding_from_summary(
+            summary="HIGH: some issue",
+            index=0,
+            severity_override="CRITICAL",
+        )
+        assert finding.severity.value == "critical"
+
+    def test_severity_override_case_insensitive(self):
+        """severity_override works regardless of case."""
+        finding = create_finding_from_summary(
+            summary="some issue",
+            index=0,
+            severity_override="high",
+        )
+        assert finding.severity.value == "high"
+
+    def test_severity_override_invalid_falls_back(self):
+        """Invalid severity_override falls back to parsed severity."""
+        finding = create_finding_from_summary(
+            summary="LOW: minor issue",
+            index=0,
+            severity_override="UNKNOWN",
+        )
+        # Falls back to parsed "LOW" from summary
+        assert finding.severity.value == "low"
+
+    def test_id_prefix(self):
+        """Custom id_prefix is used in the finding ID."""
+        finding = create_finding_from_summary(
+            summary="some issue", index=0, id_prefix="FU"
+        )
+        assert finding.id.startswith("FU-")
+
+    def test_all_params_together(self):
+        """All new params work together correctly."""
+        finding = create_finding_from_summary(
+            summary="Regex issue in subtask title truncation",
+            index=3,
+            id_prefix="FU",
+            severity_override="MEDIUM",
+            file="apps/backend/agents/planner.py",
+            line=187,
+        )
+        assert finding.id.startswith("FU-")
+        assert finding.severity.value == "medium"
+        assert finding.file == "apps/backend/agents/planner.py"
+        assert finding.line == 187
+        assert "Regex issue" in finding.title