Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions src/sentry/seer/autofix/autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import re
from enum import StrEnum
from typing import TYPE_CHECKING, Any, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, cast

from django.utils import timezone
from pydantic import BaseModel
Expand Down Expand Up @@ -71,6 +71,13 @@ class UserUIFeedbackSource(TypedDict):
# use the same stable key (`user_id`) that `GroupSeen` uses to track which
# users have viewed an issue.
user_id: int
# The publicly serialized user, resolved at write time so the read path
# doesn't have to hydrate it. ``None`` if the user could not be serialized.
# This is serialized as an anonymous viewer (never as the requester) so the
# payload never includes the user's full email list, options, or flags: it
# is embedded in Seer prompt metadata and round-tripped back to any org
# member with group-read access.
user: NotRequired[Any]

@sentry-warden sentry-warden Bot Jun 16, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Self-serialized user (all emails, options, flags) stored in Seer feedback metadata and returned to other org members

When submitting pr_iteration feedback, the endpoint serializes the requesting user with as_user=serialize_generic_user(request.user). Because as_user equals the target user, UserSerializer._user_is_requester is True and the result is a UserSerializerResponseSelf containing the user's full registered email list, user options, and internal flags. This payload is stored in feedback.source["user"], embedded into the Seer message's prompt_metadata["feedback"], and round-tripped back through block.dict() in the GET endpoint, where any authenticated org member with group-read access can read another member's personal emails, options, and flags. Only a minimal attribution subset (id, name, avatarUrl) is needed.

Evidence
  • group_ai_autofix.py:329-331 calls user_service.serialize_many(filter={"user_ids":[request.user.id]}, as_user=serialize_generic_user(request.user)); since as_user is the same user, UserSerializer._user_is_requester returns True.
  • users/api/serializers/user.py:193-203 then populates emails (all UserEmail rows), options, and flags, producing UserSerializerResponseSelf rather than the public response.
  • The result is stored as feedback.source["user"] (Feedback in autofix_agent.py) and embedded via prompt_metadata["feedback"] = json.dumps({... "source": feedback.source ...}) at autofix_agent.py:418.
  • Message.metadata is dict[str, str] | None (agent/client_models.py:33) and is part of MemoryBlock; the GET handler returns blocks = [block.dict() for block in state.blocks] at group_ai_autofix.py:442, surfacing message.metadata.feedback to any org member with group-read access.
Also found at 2 additional locations
  • src/sentry/seer/endpoints/group_ai_autofix.py:68-69
  • src/sentry/seer/endpoints/group_ai_autofix.py:326-342

Identified by Warden wrdn-data-exfil, wrdn-pii · GE3-KTX



# Discriminated on ``type``. Add new TypedDict variants to this union as more
Expand All @@ -87,6 +94,14 @@ class NoSeerQuotaException(Exception):
pass


class PrIterationNoPullRequestException(Exception):
pass


class PrIterationNotEnabledException(Exception):
pass


class AutofixStep(StrEnum):
"""Available autofix steps."""

Expand Down Expand Up @@ -274,6 +289,11 @@ def get_autofix_agent_client(
)


def get_autofix_run_state(group: Group, run_id: int) -> SeerRunState:
client = get_autofix_agent_client(group)
return _get_group_run_state(client, group, run_id)


def _validate_run_belongs_to_group(state: SeerRunState, group: Group) -> None:
group_id = state.metadata.get("group_id") if state.metadata else None
if group_id != group.id:
Expand Down Expand Up @@ -374,7 +394,11 @@ def trigger_autofix_agent(
pr_iteration_enabled = run_state.metadata.get("pr_iteration_enabled", pr_iteration_enabled)

iteration_index: int | None = None
if step == AutofixStep.PR_ITERATION and run_state is not None:
if step == AutofixStep.PR_ITERATION:
Comment on lines 396 to +397

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The get_iteration_for_insert_index function lacks bounds checking for insert_index and safe access for metadata["iteration_index"], risking unhandled exceptions.
Severity: HIGH

Suggested Fix

Add validation to ensure insert_index is within the bounds of the state.blocks list before access. When retrieving iteration_index from a block's metadata, use the .get() method with a default value or wrap the access in a try...except KeyError block to prevent unhandled exceptions.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/seer/autofix/autofix_agent.py#L388-L389

Potential issue: In the `pr_iteration` flow, the function
`get_iteration_for_insert_index` uses a user-provided `insert_index` to access
`state.blocks[insert_index]` without performing a bounds check. It also accesses
`metadata["iteration_index"]` without a safe fallback like `.get()`. An out-of-bounds
`insert_index` will raise an `IndexError`, and a missing `iteration_index` key will
raise a `KeyError`. Neither of these exceptions are caught by the endpoint's existing
exception handlers, which will cause an unhandled 500 server error on the public API.

Did we get this right? 👍 / 👎 to inform future reviews.

if not pr_iteration_enabled:
raise PrIterationNotEnabledException()
if run_state is None or not run_state.repo_pr_states:
raise PrIterationNoPullRequestException()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR iteration allows non-created PRs

Medium Severity

pr_iteration is only blocked when repo_pr_states is empty, not when every entry lacks a created PR. Runs with only creating or error states still pass validation and return 202, which conflicts with the documented requirement for at least one created pull request.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 8b754c1. Configure here.

if insert_index is not None:
iteration_index = get_iteration_for_insert_index(run_state, insert_index)
else:
Expand Down
6 changes: 5 additions & 1 deletion src/sentry/seer/autofix/on_completion_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,11 @@ def _maybe_continue_pipeline(
group,
)
if decision is not None:
iteration_index = get_latest_iteration_index(state)
iteration_index = (
get_latest_iteration_index(state)
if current_step == AutofixStep.PR_ITERATION
else None
)
analytics.record(
AiAutofixIntrospectionEvent(
organization_id=organization.id,
Expand Down
57 changes: 55 additions & 2 deletions src/sentry/seer/endpoints/group_ai_autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
from sentry.seer.autofix.autofix_agent import (
UNKNOWN_RUN_ID_FOR_GROUP,
AutofixStep,
Feedback,
NoSeerQuotaException,
PrIterationNoPullRequestException,

Check warning on line 47 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Unhandled KeyError/IndexError in get_iteration_for_insert_index returns 500 for pr_iteration retries

`get_iteration_for_insert_index` in `autofix_agent.py` does `block = state.blocks[insert_index]` followed by `return int(metadata["iteration_index"])` with no bounds check and a bare dict-key access. It is called from `trigger_autofix_agent` at line 403 when `step == AutofixStep.PR_ITERATION` and `insert_index is not None`. The endpoint passes `insert_index=data.get("insert_index")` directly from the request body, where it is only validated as an optional `serializers.IntegerField` (no min/max, no block-type check). A client supplying an `insert_index` that is out of range raises `IndexError`, and one pointing at a non-iteration block (root_cause/solution/code_changes block whose metadata has no `iteration_index`) raises `KeyError`. The endpoint's `except` clauses (lines 374-388) only handle `NoSeerQuotaException`, `PrIterationNotEnabledException`, `PrIterationNoPullRequestException`, and `SeerPermissionError`, so either exception propagates as an unhandled 500.
PrIterationNotEnabledException,
get_autofix_agent_state,
trigger_autofix_agent,
trigger_coding_agent_handoff,
Expand All @@ -65,6 +68,7 @@
from sentry.seer.endpoints.utils import get_seer_run, resolve_seer_run
from sentry.seer.models import SeerPermissionError
from sentry.types.ratelimit import RateLimit, RateLimitCategory
from sentry.users.services.user.service import user_service

Check warning on line 71 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: wrdn-data-exfil

Anonymous-view UserSerializer leaks email/isSuperuser/isStaff into Seer feedback metadata

At `src/sentry/seer/endpoints/group_ai_autofix.py:328`, the `pr_iteration` handler serializes the feedback author with `user_service.serialize_many(filter={"user_ids": [request.user.id]})` and no `as_user`/auth context, expecting the "public" representation to omit sensitive fields. However `UserSerializer.serialize()` (src/sentry/users/api/serializers/user.py:163-191) unconditionally includes `email`, `isSuperuser`, `isStaff`, and `hasPasswordAuth`; only the extra `emails` list, `options`, and `flags` are gated on `_user_is_requester`/`user.is_superuser`. The full dict is stored in `feedback.source["user"]`, JSON-dumped into `prompt_metadata["feedback"]` (autofix_agent.py:428), sent to the external Seer service, and round-tripped back to any org member with group-read access. A Sentry staff/superuser submitting PR-iteration feedback therefore exposes their staff role and primary email to the customer org and to Seer. Replace the full serializer output with a narrow DTO (e.g. `{"id", "name", "avatarUrl"}`).

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,6 +98,7 @@
"root_cause",
"solution",
"code_changes",
"pr_iteration",
Comment thread
sentry-warden[bot] marked this conversation as resolved.
"open_pr",
"coding_agent_handoff",
],
Expand Down Expand Up @@ -295,18 +300,52 @@
}
return Response(open_pr_body, status=status.HTTP_202_ACCEPTED)

if step == "pr_iteration":
if resolved_run_id is None:
return Response(
{"detail": "run_id is required for pr_iteration"},
status=status.HTTP_400_BAD_REQUEST,
)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing endpoint-level feature flag check for pr_iteration

Medium Severity

The endpoint checks that resolved_run_id is not None for pr_iteration, but never checks features.has("organizations:autofix-pr-iteration", ...) before calling trigger_autofix_agent. The test test_pr_iteration_requires_feature_flag asserts mock_trigger_explorer.assert_not_called() and expects a 403, but since trigger_autofix_agent is mocked (and won't raise PrIterationNotEnabledException), the endpoint proceeds normally and returns 202. An endpoint-level feature flag gate is needed here to match the documented intended behavior.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit d36c8a5. Configure here.


# Handle all built-in Seer steps. A missing run_id means this call starts a new
# autofix run (the kickoff); a provided run_id is advancing an existing run.
is_autofix_kickoff = resolved_run_id is None
user_context = data.get("user_context")
feedback = None
if (
step == "pr_iteration"
and user_context is not None
and request.user
and request.user.is_authenticated
):
# Serialize the user here on write so the read path (GET) doesn't have
Comment on lines +317 to +321

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The check user_context is not None allows empty strings, creating unnecessary Feedback objects and triggering RPC calls even when no meaningful context is provided.
Severity: LOW

Suggested Fix

Change the condition at line 317 from if user_context is not None to if user_context. This truthy check will correctly filter out empty strings and prevent the creation of meaningless feedback objects.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/seer/endpoints/group_ai_autofix.py#L317-L321

Potential issue: When a client sends an empty string for `user_context`, the condition
`user_context is not None` evaluates to true. This results in the creation and storage
of a `Feedback` object with an empty message. This behavior leads to several
inefficiencies: an unnecessary RPC call to `user_service.serialize_many()`, storage of
meaningless feedback data in the run's metadata, and a misleading `has_user_context`
metric being set to `"yes"`. The intended behavior, as seen in `build_step_prompt`, is
likely to only process feedback when `user_context` is a non-empty string.

# to hydrate it from the stored user_id on every fetch. Serialize as
# an anonymous viewer (no ``as_user``) so the result is the public
# user representation rather than the self representation, which would
# leak the user's full email list, options, and flags. This payload is
# embedded in Seer prompt metadata and readable by any org member with
# group-read access.
serialized_users = user_service.serialize_many(
filter={"user_ids": [request.user.id]},
)
feedback = Feedback(
message=user_context,
source={
"type": "user-ui",
"user_id": request.user.id,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a thought but why not just store the user here and avoid having to hydrate the user later?

This is manual process so my expectations are that this wont take up that much storage anyways

"user": serialized_users[0] if serialized_users else None,

Check warning on line 336 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: wrdn-pii

Authenticated user's real email embedded in Seer prompt metadata sent to external vendor

The base `UserSerializer` always emits `"email": obj.email` regardless of `as_user`; this real email is JSON-dumped into `prompt_metadata["feedback"]` and forwarded to Seer (external AI vendor) via `client.continue_run()`, and the code comment confirms the payload is readable by any org member with group-read access. Replace the serialized user with only the fields needed for attribution (e.g., display name and avatar URL), or strip the `email` field before embedding in prompt metadata.
},
)
try:
run_id = trigger_autofix_agent(
group=group,
step=AutofixStep(step),
referrer=_parse_autofix_referrer(data.get("referrer")),
stopping_point=AutofixStoppingPoint(stopping_point) if stopping_point else None,
run_id=resolved_run_id,
user_context=data.get("user_context"),
user_context=user_context,
insert_index=data.get("insert_index"),
feedback=feedback,
)
if is_autofix_kickoff:
# Record the trigger action only on kickoff, not on each subsequent
Expand Down Expand Up @@ -334,6 +373,16 @@
return Response(kickoff_body, status=status.HTTP_202_ACCEPTED)
except NoSeerQuotaException:
return Response("No budget for Seer Autofix.", status=status.HTTP_402_PAYMENT_REQUIRED)
except PrIterationNotEnabledException:
return Response(
{"detail": "PR iteration is not enabled for this organization"},
status=status.HTTP_403_FORBIDDEN,
)
except PrIterationNoPullRequestException:
return Response(
{"detail": "Cannot iterate on a PR before one has been created"},
status=status.HTTP_400_BAD_REQUEST,
)
except SeerPermissionError as e:
if _is_unknown_run_id_error(e):
return Response(status=status.HTTP_404_NOT_FOUND)
Expand Down Expand Up @@ -393,13 +442,14 @@
)

run = get_seer_run(state.run_id, group.organization)
blocks = [block.dict() for block in state.blocks]
return Response(
{
"autofix": {
"run_id": state.run_id,
"sentry_run_id": str(run.uuid) if run else None,
"status": state.status,
"blocks": [block.dict() for block in state.blocks],
"blocks": blocks,
"updated_at": state.updated_at,
"pending_user_input": (
state.pending_user_input.dict() if state.pending_user_input else None
Expand All @@ -410,6 +460,9 @@
"coding_agents": {
agent_id: agent.dict() for agent_id, agent in state.coding_agents.items()
},
"pr_iteration_enabled": bool(
state.metadata.get("pr_iteration_enabled") if state.metadata else False
),
Comment thread
cursor[bot] marked this conversation as resolved.
}
}
)
10 changes: 10 additions & 0 deletions src/sentry/seer/entrypoints/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,16 @@ def _create_seer_activity(
pull_requests = event_payload.get("pull_requests", [])
if pull_requests:
activity_data["pull_requests"] = pull_requests
elif event_type == SentryAppEventType.SEER_ITERATION_COMPLETED:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain what this activity does?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mostly here for parity with the other steps having activities, but i don't think this ends up doing anything right now

there is code that seems like it'll handle activities in the future, that's behind a feature flag and there's currently no handlers implemented for these activities

there's also an activity log in the UI where we show these, that's behind a feature flag

pull_requests = event_payload.get("pull_requests", [])
if pull_requests:
activity_data["pull_requests"] = pull_requests
code_changes = event_payload.get("code_changes")
if code_changes:
activity_data["code_changes"] = code_changes
iteration_index = event_payload.get("iteration_index")
if iteration_index is not None:
activity_data["iteration_index"] = iteration_index

Activity.objects.create_group_activity(
group,
Expand Down
50 changes: 42 additions & 8 deletions tests/sentry/seer/autofix/test_autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
STEP_CONFIGS,
AutofixStep,
NoSeerQuotaException,
PrIterationNoPullRequestException,
build_step_prompt,
generate_autofix_handoff_prompt,
get_iteration_for_insert_index,
Expand Down Expand Up @@ -256,12 +257,17 @@ def _iteration_block(iteration_index: int | None = None) -> MemoryBlock:
)


def _state_with_blocks(blocks: list[MemoryBlock], group_id: int | None = None) -> SeerRunState:
def _state_with_blocks(
blocks: list[MemoryBlock],
group_id: int | None = None,
repo_pr_states: dict[str, RepoPRState] | None = None,
) -> SeerRunState:
return SeerRunState(
run_id=67890,
blocks=blocks,
status="completed",
updated_at="2024-01-01T00:00:00Z",
repo_pr_states=repo_pr_states or {},
metadata={"group_id": group_id} if group_id is not None else None,
)

Expand Down Expand Up @@ -459,21 +465,49 @@ def test_pr_iteration_continued_run_increments_iteration_index(
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.get_run.return_value = _state_with_blocks(
[_iteration_block(1)], group_id=self.group.id
[_iteration_block(1)],
group_id=self.group.id,
repo_pr_states={
"owner/repo": RepoPRState(
repo_name="owner/repo", pr_url="https://example.com/pull/7"
)
},
Comment thread
cursor[bot] marked this conversation as resolved.
)
mock_client.continue_run.return_value = 67890

trigger_autofix_agent(
group=self.group,
step=AutofixStep.PR_ITERATION,
referrer=AutofixReferrer.UNKNOWN,
run_id=67890,
)
with self.feature("organizations:autofix-pr-iteration"):
trigger_autofix_agent(
group=self.group,
step=AutofixStep.PR_ITERATION,
referrer=AutofixReferrer.UNKNOWN,
run_id=67890,
)

call_kwargs = mock_broadcast.call_args.kwargs
assert call_kwargs["event_name"] == SeerActionType.ITERATION_STARTED.value
assert call_kwargs["payload"]["iteration_index"] == 2

@patch("sentry.seer.autofix.autofix_agent.broadcast_webhooks_for_organization.delay")
@patch("sentry.seer.autofix.autofix_agent.SeerAgentClient")
def test_pr_iteration_requires_existing_pr(self, mock_client_class, mock_broadcast):
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.get_run.return_value = _state_with_blocks([], group_id=self.group.id)

with (
self.feature("organizations:autofix-pr-iteration"),
pytest.raises(PrIterationNoPullRequestException),
):
trigger_autofix_agent(
group=self.group,
step=AutofixStep.PR_ITERATION,
referrer=AutofixReferrer.UNKNOWN,
run_id=67890,
)

mock_client.continue_run.assert_not_called()
mock_broadcast.assert_not_called()

@patch("sentry.seer.autofix.autofix_agent.SeerAgentClient")
@patch("sentry.quotas.backend.check_seer_quota", return_value=False)
def test_when_no_quota(self, mock_check_quota, mock_client_class):
Expand Down
Loading
Loading