Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions src/sentry/seer/autofix/autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import re
from enum import StrEnum
from typing import TYPE_CHECKING, Any, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, cast

from django.utils import timezone
from pydantic import BaseModel
Expand Down Expand Up @@ -71,6 +71,13 @@
# use the same stable key (`user_id`) that `GroupSeen` uses to track which
# users have viewed an issue.
user_id: int
# The publicly serialized user, resolved at write time so the read path
# doesn't have to hydrate it. ``None`` if the user could not be serialized.
# This is serialized as an anonymous viewer (never as the requester) so the
# payload never includes the user's full email list, options, or flags: it

Check warning on line 77 in src/sentry/seer/autofix/autofix_agent.py

View check run for this annotation

@sentry/warden / warden: wrdn-data-exfil

[DWE-M4G] Feedback submitter's email and account-security flags (isSuperuser, isStaff, has2fa, lastLogin) leak to all org members via Seer block metadata (additional location)

The `user_service.serialize_many()` call at src/sentry/seer/endpoints/group_ai_autofix.py:328 serializes the feedback submitter as an anonymous viewer (no `as_user`) and stores the result in `Feedback.source["user"]`. The in-code comment claims anonymous serialization avoids leaking the user's data, but it only gates the `emails` array, `options`, and `flags` in `UserSerializer.serialize()`; the base response dict still unconditionally includes `email`, `isSuperuser`, `isStaff`, `hasPasswordAuth`, `has2fa`, `isActive`, `isManaged`, `dateJoined`, `lastLogin`, and `lastActive` (src/sentry/users/api/serializers/user.py). This object is JSON-embedded into `prompt_metadata["feedback"]`, round-tripped back by Seer into block metadata, and returned by the GET endpoint via `blocks = [block.dict() for block in state.blocks]` (line 445) to any authenticated org member with group-read access. Account-security-relevant fields (staff/superuser status, 2FA presence, last login/active timestamps) are not normally exposed to other org members. Replace the full serializer output with a minimal DTO containing only `id`, `name`, `username`, and `avatarUrl`.
# is embedded in Seer prompt metadata and round-tripped back to any org
# member with group-read access.
user: NotRequired[Any]

@sentry-warden sentry-warden Bot Jun 16, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Self-serialized user (all emails, options, flags) stored in Seer feedback metadata and returned to other org members

When submitting pr_iteration feedback, the endpoint serializes the requesting user with as_user=serialize_generic_user(request.user). Because as_user equals the target user, UserSerializer._user_is_requester is True and the result is a UserSerializerResponseSelf containing the user's full registered email list, user options, and internal flags. This payload is stored in feedback.source["user"], embedded into the Seer message's prompt_metadata["feedback"], and round-tripped back through block.dict() in the GET endpoint, where any authenticated org member with group-read access can read another member's personal emails, options, and flags. Only a minimal attribution subset (id, name, avatarUrl) is needed.

Evidence
  • group_ai_autofix.py:329-331 calls user_service.serialize_many(filter={"user_ids":[request.user.id]}, as_user=serialize_generic_user(request.user)); since as_user is the same user, UserSerializer._user_is_requester returns True.
  • users/api/serializers/user.py:193-203 then populates emails (all UserEmail rows), options, and flags, producing UserSerializerResponseSelf rather than the public response.
  • The result is stored as feedback.source["user"] (Feedback in autofix_agent.py) and embedded via prompt_metadata["feedback"] = json.dumps({... "source": feedback.source ...}) at autofix_agent.py:418.
  • Message.metadata is dict[str, str] | None (agent/client_models.py:33) and is part of MemoryBlock; the GET handler returns blocks = [block.dict() for block in state.blocks] at group_ai_autofix.py:442, surfacing message.metadata.feedback to any org member with group-read access.
Also found at 2 additional locations
  • src/sentry/seer/endpoints/group_ai_autofix.py:68-69
  • src/sentry/seer/endpoints/group_ai_autofix.py:326-342

Identified by Warden wrdn-data-exfil, wrdn-pii · GE3-KTX



# Discriminated on ``type``. Add new TypedDict variants to this union as more
Expand All @@ -87,6 +94,14 @@
pass


class PrIterationNoPullRequestException(Exception):
pass


class PrIterationNotEnabledException(Exception):
pass


class AutofixStep(StrEnum):
"""Available autofix steps."""

Expand Down Expand Up @@ -274,6 +289,11 @@
)


def get_autofix_run_state(group: Group, run_id: int) -> SeerRunState:
client = get_autofix_agent_client(group)
return _get_group_run_state(client, group, run_id)


def _validate_run_belongs_to_group(state: SeerRunState, group: Group) -> None:
group_id = state.metadata.get("group_id") if state.metadata else None
if group_id != group.id:
Expand Down Expand Up @@ -374,7 +394,11 @@
pr_iteration_enabled = run_state.metadata.get("pr_iteration_enabled", pr_iteration_enabled)

iteration_index: int | None = None
if step == AutofixStep.PR_ITERATION and run_state is not None:
if step == AutofixStep.PR_ITERATION:
Comment on lines 396 to +397

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The get_iteration_for_insert_index function lacks bounds checking for insert_index and safe access for metadata["iteration_index"], risking unhandled exceptions.
Severity: HIGH

Suggested Fix

Add validation to ensure insert_index is within the bounds of the state.blocks list before access. When retrieving iteration_index from a block's metadata, use the .get() method with a default value or wrap the access in a try...except KeyError block to prevent unhandled exceptions.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/seer/autofix/autofix_agent.py#L388-L389

Potential issue: In the `pr_iteration` flow, the function
`get_iteration_for_insert_index` uses a user-provided `insert_index` to access
`state.blocks[insert_index]` without performing a bounds check. It also accesses
`metadata["iteration_index"]` without a safe fallback like `.get()`. An out-of-bounds
`insert_index` will raise an `IndexError`, and a missing `iteration_index` key will
raise a `KeyError`. Neither of these exceptions are caught by the endpoint's existing
exception handlers, which will cause an unhandled 500 server error on the public API.

Did we get this right? 👍 / 👎 to inform future reviews.

if not pr_iteration_enabled:
raise PrIterationNotEnabledException()
if run_state is None or not run_state.repo_pr_states:
raise PrIterationNoPullRequestException()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR iteration allows non-created PRs

Medium Severity

pr_iteration is only blocked when repo_pr_states is empty, not when every entry lacks a created PR. Runs with only creating or error states still pass validation and return 202, which conflicts with the documented requirement for at least one created pull request.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 8b754c1. Configure here.

if insert_index is not None:
iteration_index = get_iteration_for_insert_index(run_state, insert_index)
else:
Expand Down
6 changes: 5 additions & 1 deletion src/sentry/seer/autofix/on_completion_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,11 @@ def _maybe_continue_pipeline(
group,
)
if decision is not None:
iteration_index = get_latest_iteration_index(state)
iteration_index = (
get_latest_iteration_index(state)
if current_step == AutofixStep.PR_ITERATION
else None
)
analytics.record(
AiAutofixIntrospectionEvent(
organization_id=organization.id,
Expand Down
57 changes: 55 additions & 2 deletions src/sentry/seer/endpoints/group_ai_autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
from sentry.seer.autofix.autofix_agent import (
UNKNOWN_RUN_ID_FOR_GROUP,
AutofixStep,
Feedback,
NoSeerQuotaException,
PrIterationNoPullRequestException,
PrIterationNotEnabledException,
get_autofix_agent_state,
trigger_autofix_agent,
trigger_coding_agent_handoff,
Expand All @@ -65,6 +68,7 @@
from sentry.seer.endpoints.utils import get_seer_run, resolve_seer_run
from sentry.seer.models import SeerPermissionError
from sentry.types.ratelimit import RateLimit, RateLimitCategory
from sentry.users.services.user.service import user_service

Check warning on line 71 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: wrdn-data-exfil

Feedback submitter's email and account-security flags (isSuperuser, isStaff, has2fa, lastLogin) leak to all org members via Seer block metadata

The `user_service.serialize_many()` call at src/sentry/seer/endpoints/group_ai_autofix.py:328 serializes the feedback submitter as an anonymous viewer (no `as_user`) and stores the result in `Feedback.source["user"]`. The in-code comment claims anonymous serialization avoids leaking the user's data, but it only gates the `emails` array, `options`, and `flags` in `UserSerializer.serialize()`; the base response dict still unconditionally includes `email`, `isSuperuser`, `isStaff`, `hasPasswordAuth`, `has2fa`, `isActive`, `isManaged`, `dateJoined`, `lastLogin`, and `lastActive` (src/sentry/users/api/serializers/user.py). This object is JSON-embedded into `prompt_metadata["feedback"]`, round-tripped back by Seer into block metadata, and returned by the GET endpoint via `blocks = [block.dict() for block in state.blocks]` (line 445) to any authenticated org member with group-read access. Account-security-relevant fields (staff/superuser status, 2FA presence, last login/active timestamps) are not normally exposed to other org members. Replace the full serializer output with a minimal DTO containing only `id`, `name`, `username`, and `avatarUrl`.

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,6 +98,7 @@
"root_cause",
"solution",
"code_changes",
"pr_iteration",

Check warning on line 101 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

`get_iteration_for_insert_index` raises KeyError/IndexError on user-supplied `insert_index` for non-iteration blocks

In `src/sentry/seer/autofix/autofix_agent.py`, `get_iteration_for_insert_index` does `block = state.blocks[insert_index]` followed by `int(metadata["iteration_index"])` with no bounds check and no `.get()` guard. `insert_index` is an unconstrained `IntegerField` from the request body, forwarded verbatim through `trigger_autofix_agent` to this function whenever `step == AutofixStep.PR_ITERATION` and `insert_index is not None`. If the index is out of range an `IndexError` is raised; if it points to any non-PR-iteration block (root_cause, solution, code_changes) the `iteration_index` key is absent and a `KeyError` is raised. Neither exception is handled by the endpoint, which only catches `NoSeerQuotaException`, `PrIterationNotEnabledException`, `PrIterationNoPullRequestException`, and `SeerPermissionError`, so the error surfaces as an unhandled HTTP 500.
Comment thread
sentry-warden[bot] marked this conversation as resolved.
"open_pr",
"coding_agent_handoff",
],
Expand Down Expand Up @@ -295,18 +300,52 @@
}
return Response(open_pr_body, status=status.HTTP_202_ACCEPTED)

if step == "pr_iteration":
if resolved_run_id is None:
return Response(
{"detail": "run_id is required for pr_iteration"},
status=status.HTTP_400_BAD_REQUEST,
)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing endpoint-level feature flag check for pr_iteration

Medium Severity

The endpoint checks that resolved_run_id is not None for pr_iteration, but never checks features.has("organizations:autofix-pr-iteration", ...) before calling trigger_autofix_agent. The test test_pr_iteration_requires_feature_flag asserts mock_trigger_explorer.assert_not_called() and expects a 403, but since trigger_autofix_agent is mocked (and won't raise PrIterationNotEnabledException), the endpoint proceeds normally and returns 202. An endpoint-level feature flag gate is needed here to match the documented intended behavior.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit d36c8a5. Configure here.


# Handle all built-in Seer steps. A missing run_id means this call starts a new
# autofix run (the kickoff); a provided run_id is advancing an existing run.
is_autofix_kickoff = resolved_run_id is None
user_context = data.get("user_context")
feedback = None
if (
step == "pr_iteration"
and user_context is not None
and request.user
and request.user.is_authenticated
):
# Serialize the user here on write so the read path (GET) doesn't have
Comment on lines +317 to +321

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The check user_context is not None allows empty strings, creating unnecessary Feedback objects and triggering RPC calls even when no meaningful context is provided.
Severity: LOW

Suggested Fix

Change the condition at line 317 from if user_context is not None to if user_context. This truthy check will correctly filter out empty strings and prevent the creation of meaningless feedback objects.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/seer/endpoints/group_ai_autofix.py#L317-L321

Potential issue: When a client sends an empty string for `user_context`, the condition
`user_context is not None` evaluates to true. This results in the creation and storage
of a `Feedback` object with an empty message. This behavior leads to several
inefficiencies: an unnecessary RPC call to `user_service.serialize_many()`, storage of
meaningless feedback data in the run's metadata, and a misleading `has_user_context`
metric being set to `"yes"`. The intended behavior, as seen in `build_step_prompt`, is
likely to only process feedback when `user_context` is a non-empty string.

# to hydrate it from the stored user_id on every fetch. Serialize as
# an anonymous viewer (no ``as_user``) so the result is the public
# user representation rather than the self representation, which would
# leak the user's full email list, options, and flags. This payload is
# embedded in Seer prompt metadata and readable by any org member with
# group-read access.
serialized_users = user_service.serialize_many(
filter={"user_ids": [request.user.id]},
)
feedback = Feedback(
message=user_context,
source={
"type": "user-ui",
"user_id": request.user.id,

Check warning on line 335 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: wrdn-data-exfil

[DWE-M4G] Feedback submitter's email and account-security flags (isSuperuser, isStaff, has2fa, lastLogin) leak to all org members via Seer block metadata (additional location)

The `user_service.serialize_many()` call at src/sentry/seer/endpoints/group_ai_autofix.py:328 serializes the feedback submitter as an anonymous viewer (no `as_user`) and stores the result in `Feedback.source["user"]`. The in-code comment claims anonymous serialization avoids leaking the user's data, but it only gates the `emails` array, `options`, and `flags` in `UserSerializer.serialize()`; the base response dict still unconditionally includes `email`, `isSuperuser`, `isStaff`, `hasPasswordAuth`, `has2fa`, `isActive`, `isManaged`, `dateJoined`, `lastLogin`, and `lastActive` (src/sentry/users/api/serializers/user.py). This object is JSON-embedded into `prompt_metadata["feedback"]`, round-tripped back by Seer into block metadata, and returned by the GET endpoint via `blocks = [block.dict() for block in state.blocks]` (line 445) to any authenticated org member with group-read access. Account-security-relevant fields (staff/superuser status, 2FA presence, last login/active timestamps) are not normally exposed to other org members. Replace the full serializer output with a minimal DTO containing only `id`, `name`, `username`, and `avatarUrl`.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a thought but why not just store the user here and avoid having to hydrate the user later?

This is manual process so my expectations are that this wont take up that much storage anyways

"user": serialized_users[0] if serialized_users else None,

Check warning on line 336 in src/sentry/seer/endpoints/group_ai_autofix.py

View check run for this annotation

@sentry/warden / warden: wrdn-pii

User email included in Seer prompt metadata despite intent to exclude it

The serialized user object at `source.user` unconditionally includes `email: obj.email` (always set in `UserSerializer.serialize` regardless of `as_user`), so the feedback submitter's email is sent to Seer and embedded in prompt metadata readable by any org member with group-read access. Use only `id`, `name`, `username`, and `avatar` fields, or construct a safe subset manually rather than including the full serialized user object.
},
)
try:
run_id = trigger_autofix_agent(
group=group,
step=AutofixStep(step),
referrer=_parse_autofix_referrer(data.get("referrer")),
stopping_point=AutofixStoppingPoint(stopping_point) if stopping_point else None,
run_id=resolved_run_id,
user_context=data.get("user_context"),
user_context=user_context,
insert_index=data.get("insert_index"),
feedback=feedback,
)
if is_autofix_kickoff:
# Record the trigger action only on kickoff, not on each subsequent
Expand Down Expand Up @@ -334,6 +373,16 @@
return Response(kickoff_body, status=status.HTTP_202_ACCEPTED)
except NoSeerQuotaException:
return Response("No budget for Seer Autofix.", status=status.HTTP_402_PAYMENT_REQUIRED)
except PrIterationNotEnabledException:
return Response(
{"detail": "PR iteration is not enabled for this organization"},
status=status.HTTP_403_FORBIDDEN,
)
except PrIterationNoPullRequestException:
return Response(
{"detail": "Cannot iterate on a PR before one has been created"},
status=status.HTTP_400_BAD_REQUEST,
)
except SeerPermissionError as e:
if _is_unknown_run_id_error(e):
return Response(status=status.HTTP_404_NOT_FOUND)
Expand Down Expand Up @@ -393,13 +442,14 @@
)

run = get_seer_run(state.run_id, group.organization)
blocks = [block.dict() for block in state.blocks]
return Response(
{
"autofix": {
"run_id": state.run_id,
"sentry_run_id": str(run.uuid) if run else None,
"status": state.status,
"blocks": [block.dict() for block in state.blocks],
"blocks": blocks,
"updated_at": state.updated_at,
"pending_user_input": (
state.pending_user_input.dict() if state.pending_user_input else None
Expand All @@ -410,6 +460,9 @@
"coding_agents": {
agent_id: agent.dict() for agent_id, agent in state.coding_agents.items()
},
"pr_iteration_enabled": bool(
state.metadata.get("pr_iteration_enabled") if state.metadata else False
),
Comment thread
cursor[bot] marked this conversation as resolved.
}
}
)
10 changes: 10 additions & 0 deletions src/sentry/seer/entrypoints/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,16 @@ def _create_seer_activity(
pull_requests = event_payload.get("pull_requests", [])
if pull_requests:
activity_data["pull_requests"] = pull_requests
elif event_type == SentryAppEventType.SEER_ITERATION_COMPLETED:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain what this activity does?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mostly here for parity with the other steps having activities, but i don't think this ends up doing anything right now

there is code that seems like it'll handle activities in the future, that's behind a feature flag and there's currently no handlers implemented for these activities

there's also an activity log in the UI where we show these, that's behind a feature flag

pull_requests = event_payload.get("pull_requests", [])
if pull_requests:
activity_data["pull_requests"] = pull_requests
code_changes = event_payload.get("code_changes")
if code_changes:
activity_data["code_changes"] = code_changes
iteration_index = event_payload.get("iteration_index")
if iteration_index is not None:
activity_data["iteration_index"] = iteration_index

Activity.objects.create_group_activity(
group,
Expand Down
34 changes: 32 additions & 2 deletions tests/sentry/seer/autofix/test_autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
STEP_CONFIGS,
AutofixStep,
NoSeerQuotaException,
PrIterationNoPullRequestException,
build_step_prompt,
generate_autofix_handoff_prompt,
get_iteration_for_insert_index,
Expand Down Expand Up @@ -256,12 +257,17 @@ def _iteration_block(iteration_index: int | None = None) -> MemoryBlock:
)


def _state_with_blocks(blocks: list[MemoryBlock], group_id: int | None = None) -> SeerRunState:
def _state_with_blocks(
blocks: list[MemoryBlock],
group_id: int | None = None,
repo_pr_states: dict[str, RepoPRState] | None = None,
) -> SeerRunState:
return SeerRunState(
run_id=67890,
blocks=blocks,
status="completed",
updated_at="2024-01-01T00:00:00Z",
repo_pr_states=repo_pr_states or {},
metadata={"group_id": group_id} if group_id is not None else None,
)

Expand Down Expand Up @@ -459,7 +465,13 @@ def test_pr_iteration_continued_run_increments_iteration_index(
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.get_run.return_value = _state_with_blocks(
[_iteration_block(1)], group_id=self.group.id
[_iteration_block(1)],
group_id=self.group.id,
repo_pr_states={
"owner/repo": RepoPRState(
repo_name="owner/repo", pr_url="https://example.com/pull/7"
)
},
Comment thread
cursor[bot] marked this conversation as resolved.
)
mock_client.continue_run.return_value = 67890

Expand All @@ -474,6 +486,24 @@ def test_pr_iteration_continued_run_increments_iteration_index(
assert call_kwargs["event_name"] == SeerActionType.ITERATION_STARTED.value
assert call_kwargs["payload"]["iteration_index"] == 2

@patch("sentry.seer.autofix.autofix_agent.broadcast_webhooks_for_organization.delay")
@patch("sentry.seer.autofix.autofix_agent.SeerAgentClient")
def test_pr_iteration_requires_existing_pr(self, mock_client_class, mock_broadcast):
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.get_run.return_value = _state_with_blocks([], group_id=self.group.id)

with pytest.raises(PrIterationNoPullRequestException):
trigger_autofix_agent(
group=self.group,
step=AutofixStep.PR_ITERATION,
referrer=AutofixReferrer.UNKNOWN,
run_id=67890,
)

mock_client.continue_run.assert_not_called()
mock_broadcast.assert_not_called()

@patch("sentry.seer.autofix.autofix_agent.SeerAgentClient")
@patch("sentry.quotas.backend.check_seer_quota", return_value=False)
def test_when_no_quota(self, mock_check_quota, mock_client_class):
Expand Down
Loading
Loading