AndyMik90
diff --git a/‎apps/backend/runners/github/services/followup_reviewer.py‎
Lines changed: 13 additions & 4 deletions b/‎apps/backend/runners/github/services/followup_reviewer.py‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎apps/backend/runners/github/services/parallel_followup_reviewer.py‎
Lines changed: 15 additions & 6 deletions b/‎apps/backend/runners/github/services/parallel_followup_reviewer.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎apps/backend/runners/github/services/parallel_orchestrator_reviewer.py‎
Lines changed: 77 additions & 18 deletions b/‎apps/backend/runners/github/services/parallel_orchestrator_reviewer.py‎
Lines changed: 77 additions & 18 deletions
diff --git a/‎apps/backend/runners/github/services/pydantic_models.py‎
Lines changed: 19 additions & 3 deletions b/‎apps/backend/runners/github/services/pydantic_models.py‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎apps/backend/runners/github/services/recovery_utils.py‎
Lines changed: 14 additions & 2 deletions b/‎apps/backend/runners/github/services/recovery_utils.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts‎
Lines changed: 8 additions & 0 deletions b/‎apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎apps/frontend/src/preload/api/modules/github-api.ts‎
Lines changed: 4 additions & 0 deletions b/‎apps/frontend/src/preload/api/modules/github-api.ts‎
Lines changed: 4 additions & 0 deletions
@@ -886,7 +886,8 @@ async def _attempt_extraction_call(
         """Attempt a short SDK call with minimal schema to recover review data.
 
         This is the extraction recovery step when full structured output validation fails.
-        Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
+        Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
+        which has near-100% success rate.
 
         Uses create_client() + process_sdk_stream() for proper OAuth handling,
         matching the pattern in parallel_followup_reviewer.py.
@@ -900,7 +901,8 @@ async def _attempt_extraction_call(
             extraction_prompt = (
                 "Extract the key review data from the following AI analysis output. "
                 "Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
-                "one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
+                "structured summaries of any new findings (including severity, description, file path, and line number), "
+                "and counts of confirmed/dismissed findings.\n\n"
                 f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
             )
 
@@ -946,9 +948,16 @@ async def _attempt_extraction_call(
 
             # Convert extraction to internal format with reconstructed findings
             new_findings = []
-            for i, summary in enumerate(extracted.new_finding_summaries):
+            for i, summary_obj in enumerate(extracted.new_finding_summaries):
                 new_findings.append(
-                    create_finding_from_summary(summary, i, id_prefix="FR")
+                    create_finding_from_summary(
+                        summary=summary_obj.description,
+                        index=i,
+                        id_prefix="FR",
+                        severity_override=summary_obj.severity,
+                        file=summary_obj.file,
+                        line=summary_obj.line,
+                    )
                 )
 
             # Build finding_resolutions from extraction data for _apply_ai_resolutions
 
@@ -1129,7 +1129,8 @@ async def _attempt_extraction_call(
         """Attempt a short SDK call with a minimal schema to recover review data.
 
         This is the Tier 2 recovery step when full structured output validation fails.
-        Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
+        Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
+        which has near-100% success rate.
 
         Returns parsed result dict on success, None on failure.
         """
@@ -1146,7 +1147,8 @@ async def _attempt_extraction_call(
             extraction_prompt = (
                 "Extract the key review data from the following AI analysis output. "
                 "Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
-                "one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
+                "structured summaries of any new findings (including severity, description, file path, and line number), "
+                "and counts of confirmed/dismissed findings.\n\n"
                 f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
             )
 
@@ -1205,10 +1207,17 @@ async def _attempt_extraction_call(
             findings = []
             new_finding_ids = []
 
-            # 1. Convert new_finding_summaries to minimal PRReviewFinding objects
-            # Uses shared helper for "SEVERITY: description" parsing and ID generation
-            for i, summary in enumerate(extracted.new_finding_summaries):
-                finding = create_finding_from_summary(summary, i, id_prefix="FU")
+            # 1. Convert new_finding_summaries to PRReviewFinding objects
+            # ExtractedFindingSummary objects carry file/line from extraction
+            for i, summary_obj in enumerate(extracted.new_finding_summaries):
+                finding = create_finding_from_summary(
+                    summary=summary_obj.description,
+                    index=i,
+                    id_prefix="FU",
+                    severity_override=summary_obj.severity,
+                    file=summary_obj.file,
+                    line=summary_obj.line,
+                )
                 new_finding_ids.append(finding.id)
                 findings.append(finding)
 
 
@@ -1289,12 +1289,30 @@ async def review(self, context: PRContext) -> PRReviewResult:
                 f"{len(filtered_findings)} filtered"
             )
 
-            # No confidence routing - validation is binary via finding-validator
-            unique_findings = validated_findings
-            logger.info(f"[PRReview] Final findings: {len(unique_findings)} validated")
+            # Separate active findings (drive verdict) from dismissed (shown in UI only)
+            active_findings = []
+            dismissed_findings = []
+            for f in validated_findings:
+                if f.validation_status == "dismissed_false_positive":
+                    dismissed_findings.append(f)
+                else:
+                    active_findings.append(f)
 
+            safe_print(
+                f"[ParallelOrchestrator] Final: {len(active_findings)} active, "
+                f"{len(dismissed_findings)} disputed by validator",
+                flush=True,
+            )
+            logger.info(
+                f"[PRReview] Final findings: {len(active_findings)} active, "
+                f"{len(dismissed_findings)} disputed"
+            )
+
+            # All findings (active + dismissed) go in the result for UI display
+            all_review_findings = validated_findings
             logger.info(
-                f"[ParallelOrchestrator] Review complete: {len(unique_findings)} findings"
+                f"[ParallelOrchestrator] Review complete: {len(all_review_findings)} findings "
+                f"({len(active_findings)} active, {len(dismissed_findings)} disputed)"
             )
 
             # Fetch CI status for verdict consideration
@@ -1304,9 +1322,9 @@ async def review(self, context: PRContext) -> PRReviewResult:
                 f"{ci_status.get('failing', 0)} failing, {ci_status.get('pending', 0)} pending"
             )
 
-            # Generate verdict (includes merge conflict check, branch-behind check, and CI status)
+            # Generate verdict from ACTIVE findings only (dismissed don't affect verdict)
             verdict, verdict_reasoning, blockers = self._generate_verdict(
-                unique_findings,
+                active_findings,
                 has_merge_conflicts=context.has_merge_conflicts,
                 merge_state_status=context.merge_state_status,
                 ci_status=ci_status,
@@ -1317,7 +1335,7 @@ async def review(self, context: PRContext) -> PRReviewResult:
                 verdict=verdict,
                 verdict_reasoning=verdict_reasoning,
                 blockers=blockers,
-                findings=unique_findings,
+                findings=all_review_findings,
                 agents_invoked=agents_invoked,
             )
 
@@ -1362,7 +1380,7 @@ async def review(self, context: PRContext) -> PRReviewResult:
                 pr_number=context.pr_number,
                 repo=self.config.repo,
                 success=True,
-                findings=unique_findings,
+                findings=all_review_findings,
                 summary=summary,
                 overall_status=overall_status,
                 verdict=verdict,
@@ -1937,12 +1955,38 @@ async def _validate_findings(
                 validated_findings.append(finding)
 
             elif validation.validation_status == "dismissed_false_positive":
-                # Dismiss - do not include
-                dismissed_count += 1
-                logger.info(
-                    f"[PRReview] Dismissed {finding.id} as false positive: "
-                    f"{validation.explanation[:100]}"
-                )
+                # Protect cross-validated findings from dismissal —
+                # if multiple specialists independently found the same issue,
+                # a single validator should not override that consensus
+                if finding.cross_validated:
+                    finding.validation_status = "confirmed_valid"
+                    finding.validation_evidence = validation.code_evidence
+                    finding.validation_explanation = (
+                        f"[Auto-kept: cross-validated by {len(finding.source_agents)} agents] "
+                        f"{validation.explanation}"
+                    )
+                    validated_findings.append(finding)
+                    safe_print(
+                        f"[FindingValidator] Kept cross-validated finding '{finding.title}' "
+                        f"despite dismissal (agents={finding.source_agents})",
+                        flush=True,
+                    )
+                else:
+                    # Keep finding but mark as dismissed (user can see it in UI)
+                    finding.validation_status = "dismissed_false_positive"
+                    finding.validation_evidence = validation.code_evidence
+                    finding.validation_explanation = validation.explanation
+                    validated_findings.append(finding)
+                    dismissed_count += 1
+                    safe_print(
+                        f"[FindingValidator] Disputed '{finding.title}': "
+                        f"{validation.explanation} (file={finding.file}:{finding.line})",
+                        flush=True,
+                    )
+                    logger.info(
+                        f"[PRReview] Disputed {finding.id}: "
+                        f"{validation.explanation[:200]}"
+                    )
 
             elif validation.validation_status == "needs_human_review":
                 # Keep but flag
@@ -2127,11 +2171,16 @@ def _generate_summary(
                 sev = f.severity.value
                 emoji = severity_emoji.get(sev, "⚪")
 
+                is_disputed = f.validation_status == "dismissed_false_positive"
+
                 # Finding header with location
                 line_range = f"L{f.line}"
                 if f.end_line and f.end_line != f.line:
                     line_range = f"L{f.line}-L{f.end_line}"
-                lines.append(f"#### {emoji} [{sev.upper()}] {f.title}")
+                if is_disputed:
+                    lines.append(f"#### ⚪ [DISPUTED] ~~{f.title}~~")
+                else:
+                    lines.append(f"#### {emoji} [{sev.upper()}] {f.title}")
                 lines.append(f"**File:** `{f.file}` ({line_range})")
 
                 # Cross-validation badge
@@ -2161,6 +2210,7 @@ def _generate_summary(
                     status_label = {
                         "confirmed_valid": "Confirmed",
                         "needs_human_review": "Needs human review",
+                        "dismissed_false_positive": "Disputed by validator",
                     }.get(f.validation_status, f.validation_status)
                     lines.append("")
                     lines.append(f"**Validation:** {status_label}")
@@ -2182,18 +2232,27 @@ def _generate_summary(
 
                 lines.append("")
 
-            # Findings count summary
+            # Findings count summary (exclude dismissed from active count)
+            active_count = 0
+            dismissed_count = 0
             by_severity: dict[str, int] = {}
             for f in findings:
+                if f.validation_status == "dismissed_false_positive":
+                    dismissed_count += 1
+                    continue
+                active_count += 1
                 sev = f.severity.value
                 by_severity[sev] = by_severity.get(sev, 0) + 1
             summary_parts = []
             for sev in ["critical", "high", "medium", "low"]:
                 if sev in by_severity:
                     summary_parts.append(f"{by_severity[sev]} {sev}")
-            lines.append(
-                f"**Total:** {len(findings)} finding(s) ({', '.join(summary_parts)})"
+            count_text = (
+                f"**Total:** {active_count} finding(s) ({', '.join(summary_parts)})"
             )
+            if dismissed_count > 0:
+                count_text += f" + {dismissed_count} disputed"
+            lines.append(count_text)
             lines.append("")
 
         lines.append("---")
 
@@ -533,10 +533,26 @@ class FindingValidationResponse(BaseModel):
 # =============================================================================
 
 
+class ExtractedFindingSummary(BaseModel):
+    """Per-finding summary with file location for extraction recovery."""
+
+    severity: str = Field(description="Severity level: LOW, MEDIUM, HIGH, or CRITICAL")
+    description: str = Field(description="One-line description of the finding")
+    file: str = Field(
+        default="unknown", description="File path where the issue was found"
+    )
+    line: int = Field(default=0, description="Line number in the file (0 if unknown)")
+
+    @field_validator("severity", mode="before")
+    @classmethod
+    def _normalize_severity(cls, v: str) -> str:
+        return _normalize_severity(v)
+
+
 class FollowupExtractionResponse(BaseModel):
     """Minimal extraction schema for recovering data when full structured output fails.
 
-    Deliberately kept small (~6 fields, no nesting) for near-100% validation success.
+    Uses ExtractedFindingSummary for new findings to preserve file/line information.
     Used as an intermediate recovery step before falling back to raw text parsing.
     """
 
@@ -552,9 +568,9 @@ class FollowupExtractionResponse(BaseModel):
         default_factory=list,
         description="IDs of previous findings that remain unresolved",
     )
-    new_finding_summaries: list[str] = Field(
+    new_finding_summaries: list[ExtractedFindingSummary] = Field(
         default_factory=list,
-        description="One-line summary of each new finding (e.g. 'HIGH: cleanup deletes QA-rejected specs in batch_commands.py')",
+        description="Structured summary of each new finding with file location",
     )
     confirmed_finding_count: int = Field(
         0, description="Number of findings confirmed as valid"
 
@@ -80,6 +80,9 @@ def create_finding_from_summary(
     summary: str,
     index: int,
     id_prefix: str = "FR",
+    severity_override: str | None = None,
+    file: str = "unknown",
+    line: int = 0,
 ) -> PRReviewFinding:
     """Create a PRReviewFinding from an extraction summary string.
 
@@ -90,11 +93,20 @@ def create_finding_from_summary(
         summary: Raw summary string, e.g. "HIGH: Missing null check in parser.py"
         index: The index of the finding in the extraction list.
         id_prefix: ID prefix for traceability. Default "FR" (Followup Recovery).
+        severity_override: If provided, use this severity instead of parsing from summary.
+        file: File path where the issue was found (default "unknown").
+        line: Line number in the file (default 0).
 
     Returns:
         A PRReviewFinding with parsed severity, generated ID, and description.
     """
     severity, description = parse_severity_from_summary(summary)
+
+    # Use severity_override if provided
+    if severity_override is not None:
+        severity_map = {k.rstrip(":"): v for k, v in _EXTRACTION_SEVERITY_MAP}
+        severity = severity_map.get(severity_override.upper(), severity)
+
     finding_id = generate_recovery_finding_id(index, description, prefix=id_prefix)
 
     return PRReviewFinding(
@@ -103,6 +115,6 @@ def create_finding_from_summary(
         category=ReviewCategory.QUALITY,
         title=description[:80],
         description=f"[Recovered via extraction] {description}",
-        file="unknown",
-        line=0,
+        file=file,
+        line=line,
     )
@@ -268,6 +268,10 @@ export interface PRReviewFinding {
   endLine?: number;
   suggestedFix?: string;
   fixable: boolean;
+  validationStatus?: "confirmed_valid" | "dismissed_false_positive" | "needs_human_review" | null;
+  validationExplanation?: string;
+  sourceAgents?: string[];
+  crossValidated?: boolean;
 }
 
 /**
@@ -1341,6 +1345,10 @@ function getReviewResult(project: Project, prNumber: number): PRReviewResult | n
           endLine: f.end_line,
           suggestedFix: f.suggested_fix,
           fixable: f.fixable ?? false,
+          validationStatus: f.validation_status ?? null,
+          validationExplanation: f.validation_explanation ?? undefined,
+          sourceAgents: f.source_agents ?? [],
+          crossValidated: f.cross_validated ?? false,
         })) ?? [],
       summary: data.summary ?? "",
       overallStatus: data.overall_status ?? "comment",
 
@@ -376,6 +376,10 @@ export interface PRReviewFinding {
   endLine?: number;
   suggestedFix?: string;
   fixable: boolean;
+  validationStatus?: 'confirmed_valid' | 'dismissed_false_positive' | 'needs_human_review' | null;
+  validationExplanation?: string;
+  sourceAgents?: string[];
+  crossValidated?: boolean;
 }
 
 /**