Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 9 additions & 24 deletions src/sentry/seer/autofix/autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,14 @@ def get_latest_iteration_index(state: SeerRunState) -> int:
for block in reversed(state.blocks):
metadata = block.message.metadata or {}
if metadata.get("step") == AutofixStep.PR_ITERATION.value:
return int(metadata["iteration_index"])
iteration_index = metadata.get("iteration_index")
if iteration_index is None:
logger.error(
"autofix.get_latest_iteration_index.missing_iteration_index",
extra={"run_id": state.run_id},
)
return 0
return int(iteration_index)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invalid zero iteration index emitted

Medium Severity

When a PR_ITERATION block lacks iteration_index metadata, get_latest_iteration_index logs and returns 0. Iteration numbering starts at 1, so ITERATION_COMPLETED webhooks and related analytics can publish an invalid iteration_index instead of failing or deriving the correct value.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 11bbfa9. Configure here.

return 0


Expand All @@ -241,27 +248,6 @@ def get_iteration_for_insert_index(state: SeerRunState, insert_index: int) -> in
return int(metadata["iteration_index"])


def recover_iteration_feedback(state: SeerRunState, insert_index: int) -> str | None:
"""
Recover the user feedback that originally triggered a PR iteration.

When a PR iteration is retried, the frontend truncates the run at the
iteration's first block (``insert_index``). That block carries the original
feedback in its metadata, so we reuse it to retry with the same feedback
rather than dropping it.
"""
if insert_index < 0 or insert_index >= len(state.blocks):
return None
metadata = state.blocks[insert_index].message.metadata
if metadata is None:
return None
raw = metadata.get("feedback")
if raw is None:
return None
# Feedback is stored as a JSON object (``{"text", "username", "timestamp"}``).
return json.loads(raw).get("text")


def get_autofix_agent_client(
group: Group,
intelligence_level: Literal["low", "medium", "high"] = "medium",
Expand Down Expand Up @@ -414,8 +400,7 @@ def trigger_autofix_agent(
}
if step == AutofixStep.PR_ITERATION and feedback is not None:
# Stored as a JSON object so the UI can attribute the feedback to its
# source and show when it was submitted. See ``recover_iteration_feedback``
# for the read side.
# source and show when it was submitted.
prompt_metadata["feedback"] = json.dumps(
{
"text": feedback.message,
Expand Down
85 changes: 84 additions & 1 deletion src/sentry/seer/autofix/introspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sentry.models.organization import Organization
from sentry.seer.agent.client_models import SeerRunState
from sentry.seer.autofix.artifact_schemas import RootCauseArtifact, SolutionArtifact
from sentry.seer.autofix.autofix_agent import AutofixStep
from sentry.seer.autofix.autofix_agent import AutofixStep, get_latest_iteration_index
from sentry.seer.models.seer_api_models import SeerApiError
from sentry.seer.signed_seer_api import LlmGenerateRequest, make_llm_generate_request
from sentry.utils import json, metrics
Expand Down Expand Up @@ -442,3 +442,86 @@ def introspect_code_changes(
)

return None


def _iteration_introspection_prompt(
*,
short_id: str,
title: str,
culprit: str,
diffs_by_repo: dict[str, str],
) -> str:
return dedent(f"""\
You are evaluating whether pull request iteration changes are ready for issue {short_id}: "{title}" (culprit: {culprit}).

## Revised Code Changes to Evaluate
{_format_code_changes_section(diffs_by_repo)}
## Your Task

Evaluate whether the revised changes are suitable to update the existing pull request.

Choose one action:

- **continue**: The revised changes are focused, coherent, and suitable to add to the existing pull request.

- **needs_more_context**: The revised changes are incomplete or there is not enough evidence to tell whether they address the requested iteration.

- **redo**: The revised changes introduce obvious bugs, contradict the prior fix, or make unrelated modifications.

- **not_actionable**: The pull request cannot be iterated through code changes. For example: no revised code changes were produced.

Include a brief reason (1-2 sentences) explaining your decision.\
""")


def introspect_iteration(
organization: Organization,
run_id: int,
state: SeerRunState,
group: Group,
) -> IntrospectionDecision | None:
iteration_index = get_latest_iteration_index(state)
try:
diffs_by_repo = state.get_diffs_by_repo()
if not diffs_by_repo:
logger.warning(
"autofix.introspection.no_artifact",
extra={
"run_id": run_id,
"organization_id": organization.id,
"step": "pr_iteration",
"type": "code_changes",
"iteration_index": iteration_index,
},
)
return None

diffs_by_repo_str = {
repo: "\n".join(fp.diff for fp in patches if fp.diff)
for repo, patches in diffs_by_repo.items()
}

prompt = _iteration_introspection_prompt(
short_id=group.qualified_short_id or str(group.id),
title=group.title or "",
culprit=group.culprit or "",
diffs_by_repo=diffs_by_repo_str,
)

return _run_introspection(
run_id,
AutofixStep.PR_ITERATION,
prompt,
)
except Exception:
logger.exception(
"autofix.introspection.failed",
extra={
"run_id": run_id,
"organization_id": organization.id,
"step": "pr_iteration",
"iteration_index": iteration_index,
},
)

return None
79 changes: 52 additions & 27 deletions src/sentry/seer/autofix/on_completion_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from sentry.seer.autofix.autofix_agent import (
STEP_CONFIGS,
AutofixStep,
get_latest_iteration_index,
trigger_autofix_agent,
trigger_coding_agent_handoff,
trigger_push_changes,
Expand All @@ -29,6 +30,7 @@
from sentry.seer.autofix.introspection import (
IntrospectionDecision,
introspect_code_changes,
introspect_iteration,
introspect_root_cause,
introspect_solution,
)
Expand Down Expand Up @@ -167,18 +169,7 @@ def _send_step_webhook(
# handled but the expectation is that we only create PRs once
# per seer run.
webhook_action_type = SeerActionType.PR_CREATED
webhook_payload["pull_requests"] = [
{
"provider": "unknown", # TODO: we don't have the repo object readily accessible here
"repo_name": pull_request.repo_name,
"pull_request": {
"pr_id": pull_request.pr_id,
"pr_number": pull_request.pr_number,
"pr_url": pull_request.pr_url,
},
}
for pull_request in state.repo_pr_states.values()
]
webhook_payload["pull_requests"] = cls._format_pull_requests_payload(state)
is_pr_created = True
analytics.record(
AiAutofixPrCreatedCompletedEvent(
Expand All @@ -190,20 +181,19 @@ def _send_step_webhook(
)
else:
webhook_action_type = SeerActionType.CODING_COMPLETED
diffs_by_repo = state.get_diffs_by_repo()
webhook_payload["code_changes"] = {
repo: [
{
"diff": p.diff,
"path": p.patch.path,
"type": p.patch.type,
"added": p.patch.added,
"removed": p.patch.removed,
}
for p in patches
]
for repo, patches in diffs_by_repo.items()
}
webhook_payload["code_changes"] = cls._format_code_changes_payload(state)
elif current_step == AutofixStep.PR_ITERATION:
# PR iteration only runs against an existing PR, so there should be pr states.
if not state.repo_pr_states:
logger.error(
"autofix.on_completion_hook.pr_iteration_missing_repo_pr_states",
extra={"run_id": run_id, "organization_id": organization.id},
)
webhook_action_type = SeerActionType.ITERATION_COMPLETED
iteration_index = get_latest_iteration_index(state)
webhook_payload["pull_requests"] = cls._format_pull_requests_payload(state)
webhook_payload["code_changes"] = cls._format_code_changes_payload(state)
webhook_payload["iteration_index"] = iteration_index
Comment thread
cursor[bot] marked this conversation as resolved.

if not webhook_action_type:
return
Expand Down Expand Up @@ -264,6 +254,38 @@ def _send_step_webhook(
)
)

@classmethod
Comment thread
cursor[bot] marked this conversation as resolved.
def _format_code_changes_payload(cls, state: SeerRunState) -> dict:
diffs_by_repo = state.get_diffs_by_repo()
return {
repo: [
{
"diff": p.diff,
"path": p.patch.path,
"type": p.patch.type,
"added": p.patch.added,
"removed": p.patch.removed,
}
for p in patches
]
for repo, patches in diffs_by_repo.items()
}

@classmethod
def _format_pull_requests_payload(cls, state: SeerRunState) -> list[dict]:
return [
{
"provider": "unknown",
"repo_name": pull_request.repo_name,
"pull_request": {
"pr_id": pull_request.pr_id,
"pr_number": pull_request.pr_number,
"pr_url": pull_request.pr_url,
},
}
for pull_request in state.repo_pr_states.values()
]

@classmethod
def _get_current_step(
cls, state: SeerRunState
Expand Down Expand Up @@ -350,6 +372,7 @@ def _maybe_continue_pipeline(
group,
)
if decision is not None:
iteration_index = get_latest_iteration_index(state)
analytics.record(
AiAutofixIntrospectionEvent(
organization_id=organization.id,
Expand All @@ -359,6 +382,7 @@ def _maybe_continue_pipeline(
step=current_step.value,
action=decision.action.value,
reached_stopping_point=reached_stopping_point,
iteration_index=iteration_index,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introspection index on wrong steps

Low Severity

Every introspection analytics event now sets iteration_index via get_latest_iteration_index, including root cause, solution, and code changes. Those steps never use PR iteration indices, so events get 0 or a stale iteration from earlier blocks instead of leaving the field unset.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 11bbfa9. Configure here.

)
)
logger.info(
Expand Down Expand Up @@ -452,7 +476,8 @@ def run_introspection(
return introspect_solution(organization, run_id, state, group)
elif step == AutofixStep.CODE_CHANGES:
return introspect_code_changes(organization, run_id, state, group)
Comment thread
sentry-warden[bot] marked this conversation as resolved.
return None
elif step == AutofixStep.PR_ITERATION:
return introspect_iteration(organization, run_id, state, group)

@classmethod
def _push_changes(cls, group: Group, run_id: int, state: SeerRunState) -> None:
Expand Down
32 changes: 1 addition & 31 deletions tests/sentry/seer/autofix/test_autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
generate_autofix_handoff_prompt,
get_iteration_for_insert_index,
get_latest_iteration_index,
recover_iteration_feedback,
trigger_autofix_agent,
trigger_coding_agent_handoff,
trigger_push_changes,
Expand All @@ -28,7 +27,6 @@
from sentry.seer.models import SeerPermissionError
from sentry.sentry_apps.utils.webhooks import SeerActionType
from sentry.testutils.cases import TestCase
from sentry.utils import json


class TestGenerateAutofixHandoffPrompt(TestCase):
Expand Down Expand Up @@ -247,20 +245,10 @@ def test_all_prompts_are_dedented(self) -> None:
assert not prompt.startswith("\t"), f"{step} prompt starts with tab"


def _iteration_block(
iteration_index: int | None = None, feedback: str | None = None
) -> MemoryBlock:
def _iteration_block(iteration_index: int | None = None) -> MemoryBlock:
metadata: dict[str, str] = {"step": AutofixStep.PR_ITERATION.value}
if iteration_index is not None:
metadata["iteration_index"] = str(iteration_index)
if feedback is not None:
metadata["feedback"] = json.dumps(
{
"text": feedback,
"source": "user",
"timestamp": "2024-01-01T00:00:00Z",
}
)
return MemoryBlock(
id=f"block-{iteration_index}",
message=Message(role="assistant", content="iteration", metadata=metadata),
Expand Down Expand Up @@ -291,24 +279,6 @@ def test_get_iteration_for_insert_index(self) -> None:
state = _state_with_blocks([_iteration_block(1), _iteration_block(2)])
assert get_iteration_for_insert_index(state, 1) == 2

def test_recover_iteration_feedback_returns_feedback(self) -> None:
state = _state_with_blocks([_iteration_block(1, feedback="please fix the tests")])
assert recover_iteration_feedback(state, 0) == "please fix the tests"

def test_recover_iteration_feedback_out_of_range(self) -> None:
state = _state_with_blocks([])
assert recover_iteration_feedback(state, 0) is None
assert recover_iteration_feedback(state, -1) is None

def test_recover_iteration_feedback_no_metadata(self) -> None:
block = MemoryBlock(
id="block-none",
message=Message(role="assistant", content="x", metadata=None),
timestamp="2024-01-01T00:00:00Z",
)
state = _state_with_blocks([block])
assert recover_iteration_feedback(state, 0) is None


class TestPrIterationPrompt(TestCase):
def setUp(self) -> None:
Expand Down
Loading
Loading