Skip to content

Commit 23cf032

Browse files
authored
Merge branch 'develop' into auto-claude/231-investigate-and-fix-subtask-title-display
2 parents eece1e4 + d98ff7d commit 23cf032

14 files changed

Lines changed: 389 additions & 61 deletions

File tree

apps/backend/runners/github/services/followup_reviewer.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,8 @@ async def _attempt_extraction_call(
886886
"""Attempt a short SDK call with minimal schema to recover review data.
887887
888888
This is the extraction recovery step when full structured output validation fails.
889-
Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
889+
Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
890+
which has near-100% success rate.
890891
891892
Uses create_client() + process_sdk_stream() for proper OAuth handling,
892893
matching the pattern in parallel_followup_reviewer.py.
@@ -900,7 +901,8 @@ async def _attempt_extraction_call(
900901
extraction_prompt = (
901902
"Extract the key review data from the following AI analysis output. "
902903
"Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
903-
"one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
904+
"structured summaries of any new findings (including severity, description, file path, and line number), "
905+
"and counts of confirmed/dismissed findings.\n\n"
904906
f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
905907
)
906908

@@ -946,9 +948,16 @@ async def _attempt_extraction_call(
946948

947949
# Convert extraction to internal format with reconstructed findings
948950
new_findings = []
949-
for i, summary in enumerate(extracted.new_finding_summaries):
951+
for i, summary_obj in enumerate(extracted.new_finding_summaries):
950952
new_findings.append(
951-
create_finding_from_summary(summary, i, id_prefix="FR")
953+
create_finding_from_summary(
954+
summary=summary_obj.description,
955+
index=i,
956+
id_prefix="FR",
957+
severity_override=summary_obj.severity,
958+
file=summary_obj.file,
959+
line=summary_obj.line,
960+
)
952961
)
953962

954963
# Build finding_resolutions from extraction data for _apply_ai_resolutions

apps/backend/runners/github/services/parallel_followup_reviewer.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,8 @@ async def _attempt_extraction_call(
11291129
"""Attempt a short SDK call with a minimal schema to recover review data.
11301130
11311131
This is the Tier 2 recovery step when full structured output validation fails.
1132-
Uses FollowupExtractionResponse (~6 flat fields) which has near-100% success rate.
1132+
Uses FollowupExtractionResponse (small schema with ExtractedFindingSummary nesting)
1133+
which has near-100% success rate.
11331134
11341135
Returns parsed result dict on success, None on failure.
11351136
"""
@@ -1146,7 +1147,8 @@ async def _attempt_extraction_call(
11461147
extraction_prompt = (
11471148
"Extract the key review data from the following AI analysis output. "
11481149
"Return the verdict, reasoning, resolved finding IDs, unresolved finding IDs, "
1149-
"one-line summaries of any new findings, and counts of confirmed/dismissed findings.\n\n"
1150+
"structured summaries of any new findings (including severity, description, file path, and line number), "
1151+
"and counts of confirmed/dismissed findings.\n\n"
11501152
f"--- AI ANALYSIS OUTPUT ---\n{text[:8000]}\n--- END ---"
11511153
)
11521154

@@ -1205,10 +1207,17 @@ async def _attempt_extraction_call(
12051207
findings = []
12061208
new_finding_ids = []
12071209

1208-
# 1. Convert new_finding_summaries to minimal PRReviewFinding objects
1209-
# Uses shared helper for "SEVERITY: description" parsing and ID generation
1210-
for i, summary in enumerate(extracted.new_finding_summaries):
1211-
finding = create_finding_from_summary(summary, i, id_prefix="FU")
1210+
# 1. Convert new_finding_summaries to PRReviewFinding objects
1211+
# ExtractedFindingSummary objects carry file/line from extraction
1212+
for i, summary_obj in enumerate(extracted.new_finding_summaries):
1213+
finding = create_finding_from_summary(
1214+
summary=summary_obj.description,
1215+
index=i,
1216+
id_prefix="FU",
1217+
severity_override=summary_obj.severity,
1218+
file=summary_obj.file,
1219+
line=summary_obj.line,
1220+
)
12121221
new_finding_ids.append(finding.id)
12131222
findings.append(finding)
12141223

apps/backend/runners/github/services/parallel_orchestrator_reviewer.py

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,12 +1289,30 @@ async def review(self, context: PRContext) -> PRReviewResult:
12891289
f"{len(filtered_findings)} filtered"
12901290
)
12911291

1292-
# No confidence routing - validation is binary via finding-validator
1293-
unique_findings = validated_findings
1294-
logger.info(f"[PRReview] Final findings: {len(unique_findings)} validated")
1292+
# Separate active findings (drive verdict) from dismissed (shown in UI only)
1293+
active_findings = []
1294+
dismissed_findings = []
1295+
for f in validated_findings:
1296+
if f.validation_status == "dismissed_false_positive":
1297+
dismissed_findings.append(f)
1298+
else:
1299+
active_findings.append(f)
12951300

1301+
safe_print(
1302+
f"[ParallelOrchestrator] Final: {len(active_findings)} active, "
1303+
f"{len(dismissed_findings)} disputed by validator",
1304+
flush=True,
1305+
)
1306+
logger.info(
1307+
f"[PRReview] Final findings: {len(active_findings)} active, "
1308+
f"{len(dismissed_findings)} disputed"
1309+
)
1310+
1311+
# All findings (active + dismissed) go in the result for UI display
1312+
all_review_findings = validated_findings
12961313
logger.info(
1297-
f"[ParallelOrchestrator] Review complete: {len(unique_findings)} findings"
1314+
f"[ParallelOrchestrator] Review complete: {len(all_review_findings)} findings "
1315+
f"({len(active_findings)} active, {len(dismissed_findings)} disputed)"
12981316
)
12991317

13001318
# Fetch CI status for verdict consideration
@@ -1304,9 +1322,9 @@ async def review(self, context: PRContext) -> PRReviewResult:
13041322
f"{ci_status.get('failing', 0)} failing, {ci_status.get('pending', 0)} pending"
13051323
)
13061324

1307-
# Generate verdict (includes merge conflict check, branch-behind check, and CI status)
1325+
# Generate verdict from ACTIVE findings only (dismissed don't affect verdict)
13081326
verdict, verdict_reasoning, blockers = self._generate_verdict(
1309-
unique_findings,
1327+
active_findings,
13101328
has_merge_conflicts=context.has_merge_conflicts,
13111329
merge_state_status=context.merge_state_status,
13121330
ci_status=ci_status,
@@ -1317,7 +1335,7 @@ async def review(self, context: PRContext) -> PRReviewResult:
13171335
verdict=verdict,
13181336
verdict_reasoning=verdict_reasoning,
13191337
blockers=blockers,
1320-
findings=unique_findings,
1338+
findings=all_review_findings,
13211339
agents_invoked=agents_invoked,
13221340
)
13231341

@@ -1362,7 +1380,7 @@ async def review(self, context: PRContext) -> PRReviewResult:
13621380
pr_number=context.pr_number,
13631381
repo=self.config.repo,
13641382
success=True,
1365-
findings=unique_findings,
1383+
findings=all_review_findings,
13661384
summary=summary,
13671385
overall_status=overall_status,
13681386
verdict=verdict,
@@ -1937,12 +1955,38 @@ async def _validate_findings(
19371955
validated_findings.append(finding)
19381956

19391957
elif validation.validation_status == "dismissed_false_positive":
1940-
# Dismiss - do not include
1941-
dismissed_count += 1
1942-
logger.info(
1943-
f"[PRReview] Dismissed {finding.id} as false positive: "
1944-
f"{validation.explanation[:100]}"
1945-
)
1958+
# Protect cross-validated findings from dismissal —
1959+
# if multiple specialists independently found the same issue,
1960+
# a single validator should not override that consensus
1961+
if finding.cross_validated:
1962+
finding.validation_status = "confirmed_valid"
1963+
finding.validation_evidence = validation.code_evidence
1964+
finding.validation_explanation = (
1965+
f"[Auto-kept: cross-validated by {len(finding.source_agents)} agents] "
1966+
f"{validation.explanation}"
1967+
)
1968+
validated_findings.append(finding)
1969+
safe_print(
1970+
f"[FindingValidator] Kept cross-validated finding '{finding.title}' "
1971+
f"despite dismissal (agents={finding.source_agents})",
1972+
flush=True,
1973+
)
1974+
else:
1975+
# Keep finding but mark as dismissed (user can see it in UI)
1976+
finding.validation_status = "dismissed_false_positive"
1977+
finding.validation_evidence = validation.code_evidence
1978+
finding.validation_explanation = validation.explanation
1979+
validated_findings.append(finding)
1980+
dismissed_count += 1
1981+
safe_print(
1982+
f"[FindingValidator] Disputed '{finding.title}': "
1983+
f"{validation.explanation} (file={finding.file}:{finding.line})",
1984+
flush=True,
1985+
)
1986+
logger.info(
1987+
f"[PRReview] Disputed {finding.id}: "
1988+
f"{validation.explanation[:200]}"
1989+
)
19461990

19471991
elif validation.validation_status == "needs_human_review":
19481992
# Keep but flag
@@ -2127,11 +2171,16 @@ def _generate_summary(
21272171
sev = f.severity.value
21282172
emoji = severity_emoji.get(sev, "⚪")
21292173

2174+
is_disputed = f.validation_status == "dismissed_false_positive"
2175+
21302176
# Finding header with location
21312177
line_range = f"L{f.line}"
21322178
if f.end_line and f.end_line != f.line:
21332179
line_range = f"L{f.line}-L{f.end_line}"
2134-
lines.append(f"#### {emoji} [{sev.upper()}] {f.title}")
2180+
if is_disputed:
2181+
lines.append(f"#### ⚪ [DISPUTED] ~~{f.title}~~")
2182+
else:
2183+
lines.append(f"#### {emoji} [{sev.upper()}] {f.title}")
21352184
lines.append(f"**File:** `{f.file}` ({line_range})")
21362185

21372186
# Cross-validation badge
@@ -2161,6 +2210,7 @@ def _generate_summary(
21612210
status_label = {
21622211
"confirmed_valid": "Confirmed",
21632212
"needs_human_review": "Needs human review",
2213+
"dismissed_false_positive": "Disputed by validator",
21642214
}.get(f.validation_status, f.validation_status)
21652215
lines.append("")
21662216
lines.append(f"**Validation:** {status_label}")
@@ -2182,18 +2232,27 @@ def _generate_summary(
21822232

21832233
lines.append("")
21842234

2185-
# Findings count summary
2235+
# Findings count summary (exclude dismissed from active count)
2236+
active_count = 0
2237+
dismissed_count = 0
21862238
by_severity: dict[str, int] = {}
21872239
for f in findings:
2240+
if f.validation_status == "dismissed_false_positive":
2241+
dismissed_count += 1
2242+
continue
2243+
active_count += 1
21882244
sev = f.severity.value
21892245
by_severity[sev] = by_severity.get(sev, 0) + 1
21902246
summary_parts = []
21912247
for sev in ["critical", "high", "medium", "low"]:
21922248
if sev in by_severity:
21932249
summary_parts.append(f"{by_severity[sev]} {sev}")
2194-
lines.append(
2195-
f"**Total:** {len(findings)} finding(s) ({', '.join(summary_parts)})"
2250+
count_text = (
2251+
f"**Total:** {active_count} finding(s) ({', '.join(summary_parts)})"
21962252
)
2253+
if dismissed_count > 0:
2254+
count_text += f" + {dismissed_count} disputed"
2255+
lines.append(count_text)
21972256
lines.append("")
21982257

21992258
lines.append("---")

apps/backend/runners/github/services/pydantic_models.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -533,10 +533,26 @@ class FindingValidationResponse(BaseModel):
533533
# =============================================================================
534534

535535

536+
class ExtractedFindingSummary(BaseModel):
537+
"""Per-finding summary with file location for extraction recovery."""
538+
539+
severity: str = Field(description="Severity level: LOW, MEDIUM, HIGH, or CRITICAL")
540+
description: str = Field(description="One-line description of the finding")
541+
file: str = Field(
542+
default="unknown", description="File path where the issue was found"
543+
)
544+
line: int = Field(default=0, description="Line number in the file (0 if unknown)")
545+
546+
@field_validator("severity", mode="before")
547+
@classmethod
548+
def _normalize_severity(cls, v: str) -> str:
549+
return _normalize_severity(v)
550+
551+
536552
class FollowupExtractionResponse(BaseModel):
537553
"""Minimal extraction schema for recovering data when full structured output fails.
538554
539-
Deliberately kept small (~6 fields, no nesting) for near-100% validation success.
555+
Uses ExtractedFindingSummary for new findings to preserve file/line information.
540556
Used as an intermediate recovery step before falling back to raw text parsing.
541557
"""
542558

@@ -552,9 +568,9 @@ class FollowupExtractionResponse(BaseModel):
552568
default_factory=list,
553569
description="IDs of previous findings that remain unresolved",
554570
)
555-
new_finding_summaries: list[str] = Field(
571+
new_finding_summaries: list[ExtractedFindingSummary] = Field(
556572
default_factory=list,
557-
description="One-line summary of each new finding (e.g. 'HIGH: cleanup deletes QA-rejected specs in batch_commands.py')",
573+
description="Structured summary of each new finding with file location",
558574
)
559575
confirmed_finding_count: int = Field(
560576
0, description="Number of findings confirmed as valid"

apps/backend/runners/github/services/recovery_utils.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ def create_finding_from_summary(
8080
summary: str,
8181
index: int,
8282
id_prefix: str = "FR",
83+
severity_override: str | None = None,
84+
file: str = "unknown",
85+
line: int = 0,
8386
) -> PRReviewFinding:
8487
"""Create a PRReviewFinding from an extraction summary string.
8588
@@ -90,11 +93,20 @@ def create_finding_from_summary(
9093
summary: Raw summary string, e.g. "HIGH: Missing null check in parser.py"
9194
index: The index of the finding in the extraction list.
9295
id_prefix: ID prefix for traceability. Default "FR" (Followup Recovery).
96+
severity_override: If provided, use this severity instead of parsing from summary.
97+
file: File path where the issue was found (default "unknown").
98+
line: Line number in the file (default 0).
9399
94100
Returns:
95101
A PRReviewFinding with parsed severity, generated ID, and description.
96102
"""
97103
severity, description = parse_severity_from_summary(summary)
104+
105+
# Use severity_override if provided
106+
if severity_override is not None:
107+
severity_map = {k.rstrip(":"): v for k, v in _EXTRACTION_SEVERITY_MAP}
108+
severity = severity_map.get(severity_override.upper(), severity)
109+
98110
finding_id = generate_recovery_finding_id(index, description, prefix=id_prefix)
99111

100112
return PRReviewFinding(
@@ -103,6 +115,6 @@ def create_finding_from_summary(
103115
category=ReviewCategory.QUALITY,
104116
title=description[:80],
105117
description=f"[Recovered via extraction] {description}",
106-
file="unknown",
107-
line=0,
118+
file=file,
119+
line=line,
108120
)

apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,10 @@ export interface PRReviewFinding {
268268
endLine?: number;
269269
suggestedFix?: string;
270270
fixable: boolean;
271+
validationStatus?: "confirmed_valid" | "dismissed_false_positive" | "needs_human_review" | null;
272+
validationExplanation?: string;
273+
sourceAgents?: string[];
274+
crossValidated?: boolean;
271275
}
272276

273277
/**
@@ -1341,6 +1345,10 @@ function getReviewResult(project: Project, prNumber: number): PRReviewResult | n
13411345
endLine: f.end_line,
13421346
suggestedFix: f.suggested_fix,
13431347
fixable: f.fixable ?? false,
1348+
validationStatus: f.validation_status ?? null,
1349+
validationExplanation: f.validation_explanation ?? undefined,
1350+
sourceAgents: f.source_agents ?? [],
1351+
crossValidated: f.cross_validated ?? false,
13441352
})) ?? [],
13451353
summary: data.summary ?? "",
13461354
overallStatus: data.overall_status ?? "comment",

apps/frontend/src/preload/api/modules/github-api.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,10 @@ export interface PRReviewFinding {
376376
endLine?: number;
377377
suggestedFix?: string;
378378
fixable: boolean;
379+
validationStatus?: 'confirmed_valid' | 'dismissed_false_positive' | 'needs_human_review' | null;
380+
validationExplanation?: string;
381+
sourceAgents?: string[];
382+
crossValidated?: boolean;
379383
}
380384

381385
/**

0 commit comments

Comments
 (0)