Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
6cddc46
feat(seer): Add deliver_feature_result RPC for Seer agent features
trevor-e Jun 2, 2026
3130d25
feat(seer): Add night_shift delivery handler
trevor-e Jun 2, 2026
2e44223
fix(seer): Look up night shift run by SeerRun UUID
trevor-e Jun 2, 2026
1c51a11
ref(seer): Make delivery handler registry explicit
trevor-e Jun 2, 2026
c07574c
fix(seer): Add seer prefix to feature_delivery log key
trevor-e Jun 2, 2026
129553a
ref(seer): Use Protocol for FeatureDeliveryFn type
trevor-e Jun 2, 2026
0338b84
ref(seer): Simplify feature delivery params
trevor-e Jun 2, 2026
ccc50ca
fix(seer): Reorder params and add org_id to query
trevor-e Jun 2, 2026
0c0baaa
fix(night_shift): Address review feedback for delivery handler
trevor-e Jun 2, 2026
7dfbadf
fix(night_shift): Use fixable_verdicts count for candidates_selected …
trevor-e Jun 2, 2026
e1d5fa8
ref(night_shift): Remove duplicate FeatureRunStatus definition
trevor-e Jun 2, 2026
1ef7231
ref(night_shift): Move FeatureRunStatus to shared types module
trevor-e Jun 2, 2026
0061638
test(night_shift): Add tests for deliver_night_shift_result
trevor-e Jun 2, 2026
e853a2c
fix(night_shift): Add type annotations and re-export FeatureRunStatus
trevor-e Jun 3, 2026
7054125
ref(night_shift): Reuse _run_autofix_for_candidates from cron
trevor-e Jun 3, 2026
bb03aea
fix(night_shift): Avoid N+1 queries by caching org on each group's pr…
trevor-e Jun 3, 2026
33763d9
fix(night_shift): Use bulk query for project preferences
trevor-e Jun 3, 2026
75de6cd
fix(night_shift): Filter out inactive projects in groups query
trevor-e Jun 3, 2026
9bab14b
fix(night_shift): Handle missing project preferences defensively
trevor-e Jun 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/sentry/seer/agent/feature_delivery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Registry for Seer feature result delivery handlers."""

from __future__ import annotations

from typing import Any, Protocol

from sentry.seer.agent.types import FeatureRunStatus
from sentry.seer.night_shift.delivery import deliver_night_shift_result
Comment thread
sentry-warden[bot] marked this conversation as resolved.


class FeatureDeliveryFn(Protocol):
def __call__(
self,
organization_id: int,
run_uuid: str,
status: FeatureRunStatus,
result: dict[str, Any] | None,
error: str | None,
) -> None: ...


DELIVERY_HANDLERS: dict[str, FeatureDeliveryFn] = {
"night_shift": deliver_night_shift_result,
}
5 changes: 5 additions & 0 deletions src/sentry/seer/agent/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Shared types for Seer agent features."""

from typing import Literal

FeatureRunStatus = Literal["completed", "error"]
23 changes: 23 additions & 0 deletions src/sentry/seer/endpoints/seer_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
from sentry.search.events.types import SnubaParams
from sentry.seer.agent.custom_tool_utils import call_custom_tool
from sentry.seer.agent.feature_delivery import DELIVERY_HANDLERS, FeatureRunStatus
from sentry.seer.agent.index_data import (
rpc_get_issues_for_transaction,
rpc_get_profiles_for_trace,
Expand Down Expand Up @@ -956,6 +957,27 @@
return {str(project_id): pref.dict() for project_id, pref in preferences.items()}


def deliver_feature_result(
*,
organization_id: int,
feature_id: str,
run_uuid: str,
status: FeatureRunStatus,
result: dict[str, Any] | None = None,
error: str | None = None,
) -> None:
"""Dispatch a feature result from Seer to the registered handler."""
handler = DELIVERY_HANDLERS.get(feature_id)
if handler is None:
logger.warning(
"seer.feature_delivery.unknown_feature_id",
extra={"feature_id": feature_id, "run_uuid": run_uuid},
)
return

handler(organization_id, run_uuid, status, result, error)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Autofix runs inside Seer RPC

Medium Severity

deliver_feature_result invokes the night-shift handler synchronously in the Seer RPC request, and that handler can call trigger_autofix_agent for every fixable verdict. The prior cron path runs the same autofix loop inside a background task with a long processing deadline.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 0c0baaa. Configure here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

our result handling is very lightweight and should work fine within the RPC



seer_method_registry: dict[str, Callable] = { # return type must be serialized
# Common to Seer features
"get_github_enterprise_integration_config": get_github_enterprise_integration_config,
Expand Down Expand Up @@ -1013,6 +1035,7 @@
"get_repository_definition": get_repository_definition,
"call_custom_tool": call_custom_tool,
"call_on_completion_hook": call_on_completion_hook,
"deliver_feature_result": deliver_feature_result,

Check warning on line 1038 in src/sentry/seer/endpoints/seer_rpc.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

AutofixStoppingPoint enum conversion without ValueError guard can crash night-shift delivery

In `_trigger_autofix_for_fixable` (src/sentry/seer/night_shift/delivery.py ~line 171), the dict comprehension calls `AutofixStoppingPoint(value)` directly on `read_preference_from_sentry_db(project).automated_run_stopping_point`, which is read from the project option `sentry:seer_automated_run_stopping_point`. The `or SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT` fallback only handles None/empty string, not a stored string that is no longer a valid enum member. The same concern is already handled explicitly at the org level in `src/sentry/seer/autofix/utils.py:374-376` ("Guard against stored stopping points that are no longer valid"), but no analogous guard exists here. If any single project in the batch has a stale value, the comprehension raises `ValueError`, aborting `deliver_night_shift_result` entirely — no autofix is triggered for any verdict and no `SeerNightShiftRunResult` rows are written. This matches Check 4 (enum conversion without ValueError handling on values sourced from storage).
Comment thread
sentry-warden[bot] marked this conversation as resolved.
"get_log_attributes_for_trace": get_log_attributes_for_trace,
"get_metric_attributes_for_trace": get_metric_attributes_for_trace,
"get_baseline_tag_distribution": get_baseline_tag_distribution,
Expand Down
221 changes: 221 additions & 0 deletions src/sentry/seer/night_shift/delivery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""Delivery handler for night_shift feature results from Seer."""

from __future__ import annotations

import logging
from collections.abc import Mapping
from typing import Any

import sentry_sdk

from sentry.constants import SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT
from sentry.models.group import Group
from sentry.models.organization import Organization
from sentry.seer.agent.types import FeatureRunStatus
from sentry.seer.autofix.autofix_agent import AutofixStep, trigger_autofix_agent
from sentry.seer.autofix.constants import SeerAutomationSource
from sentry.seer.autofix.issue_summary import referrer_map
from sentry.seer.autofix.utils import AutofixStoppingPoint, read_preference_from_sentry_db
from sentry.seer.models.night_shift import SeerNightShiftRun, SeerNightShiftRunResult
from sentry.seer.models.run import SeerRun
from sentry.seer.models.workflow import SeerWorkflowStrategy
from sentry.seer.night_shift.models import TriageResponse, TriageVerdict
from sentry.tasks.seer.night_shift.models import TriageAction
from sentry.tasks.seer.night_shift.skip_cache import mark_skipped

logger = logging.getLogger(__name__)


def deliver_night_shift_result(
organization_id: int,
run_uuid: str,
status: FeatureRunStatus,
result: dict[str, Any] | None,
error: str | None,
) -> None:
"""Process a night_shift result from Seer."""
try:
run = SeerNightShiftRun.objects.select_related("organization", "seer_run").get(
organization_id=organization_id,
seer_run__uuid=run_uuid,
)
Comment on lines +33 to +36
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The lookup in deliver_feature_result uses seer_run__uuid, which fails if the nullable seer_run foreign key hasn't been populated yet, causing results from Seer to be silently dropped.
Severity: HIGH

Suggested Fix

Modify the query to not rely on the seer_run foreign key, which may be null. One option is to add the run_uuid to the SeerNightShiftRun model directly and query on that field. This avoids the join on a nullable foreign key and prevents the race condition where the callback from Seer arrives before the seer_run association is made.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/seer/night_shift/delivery.py#L33-L36

Potential issue: The `deliver_feature_result` function looks up a `SeerNightShiftRun`
record by joining on the `seer_run` foreign key using `seer_run__uuid`. However, the
`seer_run` field is nullable and is populated asynchronously after an agent run
completes. A race condition exists where Seer might call this endpoint with a `run_uuid`
before the corresponding `seer_run` foreign key has been set on the `SeerNightShiftRun`
record. In this scenario, the database query will fail to find the record due to the
INNER JOIN, causing a `DoesNotExist` exception. The handler catches this, logs a
warning, and silently discards the result from Seer, leading to data loss.

except SeerNightShiftRun.DoesNotExist:
logger.warning(
"night_shift.delivery.missing_run",
extra={"organization_id": organization_id, "run_uuid": run_uuid},
)
return
Comment thread
cursor[bot] marked this conversation as resolved.

if error:
run.update(extras={**(run.extras or {}), "error_message": error})

log_extra: dict[str, object] = {
"organization_id": run.organization_id,
"run_id": run.id,
}

if status == "error" or result is None:
sentry_sdk.metrics.count(
"night_shift.triage_error",
1,
attributes={"error_type": "delivery_error" if status == "error" else "no_artifact"},
)
Comment thread
cursor[bot] marked this conversation as resolved.
logger.warning("night_shift.delivery.no_result", extra={**log_extra, "status": status})
return

try:
triage_response = TriageResponse.parse_obj(result)
Comment thread
sentry-warden[bot] marked this conversation as resolved.
except Exception:
sentry_sdk.metrics.count(
"night_shift.triage_error", 1, attributes={"error_type": "invalid_artifact"}
)
logger.exception("night_shift.delivery.invalid_result", extra=log_extra)
return

options = (run.extras or {}).get("options") or {}
dry_run = bool(options.get("dry_run", False))

_process_verdicts(
run=run,
organization=run.organization,
triage_response=triage_response,
dry_run=dry_run,
log_extra=log_extra,
)
Comment thread
trevor-e marked this conversation as resolved.


def _process_verdicts(
*,
run: SeerNightShiftRun,
organization: Organization,
triage_response: TriageResponse,
dry_run: bool,
log_extra: Mapping[str, object],
) -> None:
"""Mark SKIPs, fire autofix for fixable verdicts, persist result rows."""
group_ids = [v.group_id for v in triage_response.verdicts]
groups_by_id: dict[int, Group] = {
g.id: g
for g in Group.objects.filter(
id__in=group_ids, project__organization_id=organization.id
).select_related("project")
}

unknown_group_ids = [gid for gid in group_ids if gid not in groups_by_id]
if unknown_group_ids:
logger.warning(
"night_shift.delivery.unknown_group_ids",
extra={**log_extra, "unknown_group_ids": unknown_group_ids},
)

for v in triage_response.verdicts:
if v.action == TriageAction.SKIP and v.group_id in groups_by_id:
mark_skipped(v.group_id)

fixable_verdicts = [
v
for v in triage_response.verdicts
if v.action in (TriageAction.AUTOFIX, TriageAction.ROOT_CAUSE_ONLY)
and v.group_id in groups_by_id
]
Comment thread
cursor[bot] marked this conversation as resolved.

sentry_sdk.metrics.distribution("night_shift.candidates_selected", len(fixable_verdicts))

results: list[SeerNightShiftRunResult] = []
if not dry_run:
results = _trigger_autofix_for_fixable(
run=run,
organization=organization,
verdicts=fixable_verdicts,
groups_by_id=groups_by_id,
log_extra=log_extra,
)

seer_run_id_by_group = {r.group_id: r.seer_run_id for r in results}
logger.info(
"night_shift.candidates_selected",
extra={
**log_extra,
"num_verdicts": len(triage_response.verdicts),
"dry_run": dry_run,
"candidates": [
{
"group_id": v.group_id,
"action": v.action,
"seer_run_id": seer_run_id_by_group.get(v.group_id),
}
for v in triage_response.verdicts
],
},
)


def _trigger_autofix_for_fixable(
*,
run: SeerNightShiftRun,
organization: Organization,
verdicts: list[TriageVerdict],
groups_by_id: dict[int, Group],
log_extra: Mapping[str, object],
) -> list[SeerNightShiftRunResult]:
if not verdicts:
return []

referrer = referrer_map[SeerAutomationSource.NIGHT_SHIFT]
project_ids = {groups_by_id[v.group_id].project_id for v in verdicts}
project_by_id = {g.project_id: g.project for g in groups_by_id.values()}

for project in project_by_id.values():
project.organization = organization

stopping_point_by_project_id = {
pid: AutofixStoppingPoint(
read_preference_from_sentry_db(project_by_id[pid]).automated_run_stopping_point
or SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT
)
for pid in project_ids
}

results: list[SeerNightShiftRunResult] = []
for v in verdicts:
group = groups_by_id[v.group_id]
stopping_point = (
AutofixStoppingPoint.ROOT_CAUSE
if v.action == TriageAction.ROOT_CAUSE_ONLY
else stopping_point_by_project_id[group.project_id]
)
user_context = (
f"Night-shift triage already investigated this issue and concluded:\n{v.reason}"
if v.reason
else None
)
try:
seer_run_id = trigger_autofix_agent(
group=group,
step=AutofixStep.ROOT_CAUSE,
referrer=referrer,
stopping_point=stopping_point,
user_context=user_context,
)
except Exception:
logger.exception(
"night_shift.autofix_trigger_failed",
extra={**log_extra, "group_id": group.id},
)
continue

result_seer_run = SeerRun.objects.filter(seer_run_state_id=seer_run_id).first()
results.append(
SeerNightShiftRunResult(
run=run,
kind=SeerWorkflowStrategy.AGENTIC_TRIAGE,
group=group,
seer_run_id=str(seer_run_id),
result_seer_run=result_seer_run,
extras={"action": str(v.action)},
)
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
)

SeerNightShiftRunResult.objects.bulk_create(results)
sentry_sdk.metrics.count("night_shift.autofix_triggered", len(results))
return results
22 changes: 22 additions & 0 deletions src/sentry/seer/night_shift/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Wire types for the night_shift feature result payload from Seer."""

from __future__ import annotations

from pydantic import BaseModel

from sentry.tasks.seer.night_shift.models import TriageAction


class _Base(BaseModel):
class Config:
extra = "ignore"


class TriageVerdict(_Base):
group_id: int
action: TriageAction
reason: str = ""


class TriageResponse(_Base):
verdicts: list[TriageVerdict]
Loading