diff --git a/src/sentry/seer/agent/feature_delivery.py b/src/sentry/seer/agent/feature_delivery.py new file mode 100644 index 00000000000000..27f270a802ee59 --- /dev/null +++ b/src/sentry/seer/agent/feature_delivery.py @@ -0,0 +1,26 @@ +"""Registry for Seer feature result delivery handlers.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from sentry.seer.agent.types import FeatureRunStatus +from sentry.seer.night_shift.delivery import deliver_night_shift_result + +__all__ = ["DELIVERY_HANDLERS", "FeatureDeliveryFn", "FeatureRunStatus"] + + +class FeatureDeliveryFn(Protocol): + def __call__( + self, + organization_id: int, + run_uuid: str, + status: FeatureRunStatus, + result: dict[str, Any] | None, + error: str | None, + ) -> None: ... + + +DELIVERY_HANDLERS: dict[str, FeatureDeliveryFn] = { + "night_shift": deliver_night_shift_result, +} diff --git a/src/sentry/seer/agent/types.py b/src/sentry/seer/agent/types.py new file mode 100644 index 00000000000000..ccd3c26cf94665 --- /dev/null +++ b/src/sentry/seer/agent/types.py @@ -0,0 +1,5 @@ +"""Shared types for Seer agent features.""" + +from typing import Literal + +FeatureRunStatus = Literal["completed", "error"] diff --git a/src/sentry/seer/endpoints/seer_rpc.py b/src/sentry/seer/endpoints/seer_rpc.py index 5fea1f8c74abe2..9b60818035800a 100644 --- a/src/sentry/seer/endpoints/seer_rpc.py +++ b/src/sentry/seer/endpoints/seer_rpc.py @@ -60,6 +60,7 @@ from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType from sentry.search.events.types import SnubaParams from sentry.seer.agent.custom_tool_utils import call_custom_tool +from sentry.seer.agent.feature_delivery import DELIVERY_HANDLERS, FeatureRunStatus from sentry.seer.agent.index_data import ( rpc_get_issues_for_transaction, rpc_get_profiles_for_trace, @@ -956,6 +957,27 @@ def bulk_get_project_preferences( return {str(project_id): pref.dict() for project_id, pref in preferences.items()} +def deliver_feature_result( + *, + organization_id: int, + feature_id: str, + run_uuid: str, + status: FeatureRunStatus, + result: dict[str, Any] | None = None, + error: str | None = None, +) -> None: + """Dispatch a feature result from Seer to the registered handler.""" + handler = DELIVERY_HANDLERS.get(feature_id) + if handler is None: + logger.warning( + "seer.feature_delivery.unknown_feature_id", + extra={"feature_id": feature_id, "run_uuid": run_uuid}, + ) + return + + handler(organization_id, run_uuid, status, result, error) + + seer_method_registry: dict[str, Callable] = { # return type must be serialized # Common to Seer features "get_github_enterprise_integration_config": get_github_enterprise_integration_config, @@ -1013,6 +1035,7 @@ def bulk_get_project_preferences( "get_repository_definition": get_repository_definition, "call_custom_tool": call_custom_tool, "call_on_completion_hook": call_on_completion_hook, + "deliver_feature_result": deliver_feature_result, "get_log_attributes_for_trace": get_log_attributes_for_trace, "get_metric_attributes_for_trace": get_metric_attributes_for_trace, "get_baseline_tag_distribution": get_baseline_tag_distribution, diff --git a/src/sentry/seer/night_shift/delivery.py b/src/sentry/seer/night_shift/delivery.py new file mode 100644 index 00000000000000..9f298b012ca4d0 --- /dev/null +++ b/src/sentry/seer/night_shift/delivery.py @@ -0,0 +1,167 @@ +"""Delivery handler for night_shift feature results from Seer.""" + +from __future__ import annotations + +import logging +from collections.abc import Mapping +from typing import Any + +import sentry_sdk + +from sentry.constants import SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT, ObjectStatus +from sentry.models.group import Group +from sentry.models.organization import Organization +from sentry.seer.agent.types import FeatureRunStatus +from sentry.seer.autofix.utils import AutofixStoppingPoint, bulk_read_preferences_from_sentry_db +from sentry.seer.models.night_shift import SeerNightShiftRun, SeerNightShiftRunResult +from sentry.seer.night_shift.models import TriageResponse +from sentry.tasks.seer.night_shift.models import TriageAction, TriageResult +from sentry.tasks.seer.night_shift.skip_cache import mark_skipped + +logger = logging.getLogger(__name__) + + +def deliver_night_shift_result( + organization_id: int, + run_uuid: str, + status: FeatureRunStatus, + result: dict[str, Any] | None, + error: str | None, +) -> None: + """Process a night_shift result from Seer.""" + try: + run = SeerNightShiftRun.objects.select_related("organization", "seer_run").get( + organization_id=organization_id, + seer_run__uuid=run_uuid, + ) + except SeerNightShiftRun.DoesNotExist: + logger.warning( + "night_shift.delivery.missing_run", + extra={"organization_id": organization_id, "run_uuid": run_uuid}, + ) + return + + if error: + run.update(extras={**(run.extras or {}), "error_message": error}) + + log_extra: dict[str, object] = { + "organization_id": run.organization_id, + "run_id": run.id, + } + + if status == "error" or result is None: + sentry_sdk.metrics.count( + "night_shift.triage_error", + 1, + attributes={"error_type": "delivery_error" if status == "error" else "no_artifact"}, + ) + logger.warning("night_shift.delivery.no_result", extra={**log_extra, "status": status}) + return + + try: + triage_response = TriageResponse.parse_obj(result) + except Exception: + sentry_sdk.metrics.count( + "night_shift.triage_error", 1, attributes={"error_type": "invalid_artifact"} + ) + logger.exception("night_shift.delivery.invalid_result", extra=log_extra) + return + + options = (run.extras or {}).get("options") or {} + dry_run = bool(options.get("dry_run", False)) + + _process_verdicts( + run=run, + organization=run.organization, + triage_response=triage_response, + dry_run=dry_run, + log_extra=log_extra, + ) + + +def _process_verdicts( + *, + run: SeerNightShiftRun, + organization: Organization, + triage_response: TriageResponse, + dry_run: bool, + log_extra: Mapping[str, object], +) -> None: + """Mark SKIPs, fire autofix for fixable verdicts, persist result rows.""" + # Import here to avoid circular import + from sentry.tasks.seer.night_shift.cron import _run_autofix_for_candidates + + group_ids = [v.group_id for v in triage_response.verdicts] + groups_by_id: dict[int, Group] = { + g.id: g + for g in Group.objects.filter( + id__in=group_ids, + project__organization_id=organization.id, + project__status=ObjectStatus.ACTIVE, + ).select_related("project") + } + + unknown_group_ids = [gid for gid in group_ids if gid not in groups_by_id] + if unknown_group_ids: + logger.warning( + "night_shift.delivery.unknown_group_ids", + extra={**log_extra, "unknown_group_ids": unknown_group_ids}, + ) + + for v in triage_response.verdicts: + if v.action == TriageAction.SKIP and v.group_id in groups_by_id: + mark_skipped(v.group_id) + + # Convert verdicts to TriageResult objects for the shared function + fixable_candidates = [ + TriageResult(group=groups_by_id[v.group_id], action=v.action, reason=v.reason) + for v in triage_response.verdicts + if v.action in (TriageAction.AUTOFIX, TriageAction.ROOT_CAUSE_ONLY) + and v.group_id in groups_by_id + ] + + sentry_sdk.metrics.distribution("night_shift.candidates_selected", len(fixable_candidates)) + + results: list[SeerNightShiftRunResult] = [] + if not dry_run and fixable_candidates: + # Cache organization on each group's project to avoid N+1 queries + for group in groups_by_id.values(): + group.project.organization = organization + + # Build stopping_point_by_project_id from project preferences (bulk query) + project_ids = {c.group.project_id for c in fixable_candidates} + preferences = bulk_read_preferences_from_sentry_db(organization.id, list(project_ids)) + default_stopping_point = AutofixStoppingPoint(SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT) + stopping_point_by_project_id = { + pid: AutofixStoppingPoint( + pref.automated_run_stopping_point or SEER_AUTOMATED_RUN_STOPPING_POINT_DEFAULT + ) + if (pref := preferences.get(pid)) is not None + else default_stopping_point + for pid in project_ids + } + + results = _run_autofix_for_candidates( + run=run, + candidates=fixable_candidates, + stopping_point_by_project_id=stopping_point_by_project_id, + log_extra=dict(log_extra), + ) + + seer_run_id_by_group = {r.group_id: r.seer_run_id for r in results} + logger.info( + "night_shift.candidates_selected", + extra={ + **log_extra, + "num_verdicts": len(triage_response.verdicts), + "dry_run": dry_run, + "candidates": [ + { + "group_id": v.group_id, + "action": v.action, + "seer_run_id": seer_run_id_by_group.get(v.group_id), + } + for v in triage_response.verdicts + ], + }, + ) diff --git a/src/sentry/seer/night_shift/models.py b/src/sentry/seer/night_shift/models.py new file mode 100644 index 00000000000000..34fb58e9695311 --- /dev/null +++ b/src/sentry/seer/night_shift/models.py @@ -0,0 +1,22 @@ +"""Wire types for the night_shift feature result payload from Seer.""" + +from __future__ import annotations + +from pydantic import BaseModel + +from sentry.tasks.seer.night_shift.models import TriageAction + + +class _Base(BaseModel): + class Config: + extra = "ignore" + + +class TriageVerdict(_Base): + group_id: int + action: TriageAction + reason: str = "" + + +class TriageResponse(_Base): + verdicts: list[TriageVerdict] diff --git a/tests/sentry/seer/night_shift/test_delivery.py b/tests/sentry/seer/night_shift/test_delivery.py new file mode 100644 index 00000000000000..df494ab1c6966c --- /dev/null +++ b/tests/sentry/seer/night_shift/test_delivery.py @@ -0,0 +1,365 @@ +from typing import Any +from unittest.mock import patch + +from sentry.models.organization import Organization +from sentry.seer.autofix.utils import AutofixStoppingPoint +from sentry.seer.models.night_shift import SeerNightShiftRun, SeerNightShiftRunResult +from sentry.seer.night_shift.delivery import deliver_night_shift_result +from sentry.tasks.seer.night_shift.models import TriageAction +from sentry.tasks.seer.night_shift.skip_cache import key as skip_cache_key +from sentry.testutils.cases import TestCase +from sentry.testutils.pytest.fixtures import django_db_all +from sentry.utils.redis import redis_clusters + + +@django_db_all +class TestDeliverNightShiftResult(TestCase): + def _create_night_shift_run( + self, organization: Organization | None = None, **extras_overrides: Any + ) -> SeerNightShiftRun: + """Create a SeerNightShiftRun with associated SeerRun.""" + org = organization or self.create_organization() + seer_run = self.create_seer_run(organization=org) + extras = {"options": {}, **extras_overrides} + return SeerNightShiftRun.objects.create( + organization=org, + seer_run=seer_run, + extras=extras, + ) + + def test_missing_run_logs_warning(self) -> None: + """When run_uuid doesn't match any SeerNightShiftRun, log and return.""" + org = self.create_organization() + + with patch("sentry.seer.night_shift.delivery.logger") as mock_logger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid="00000000-0000-0000-0000-000000000000", + status="completed", + result={"verdicts": []}, + error=None, + ) + + mock_logger.warning.assert_called_once() + assert "night_shift.delivery.missing_run" in mock_logger.warning.call_args.args[0] + + def test_error_status_records_error_and_returns(self) -> None: + """When status is 'error', record error message and return early.""" + run = self._create_night_shift_run() + assert run.seer_run is not None + + with patch("sentry.seer.night_shift.delivery.logger") as mock_logger: + deliver_night_shift_result( + organization_id=run.organization_id, + run_uuid=str(run.seer_run.uuid), + status="error", + result=None, + error="Seer exploded", + ) + + mock_logger.warning.assert_called() + assert "night_shift.delivery.no_result" in mock_logger.warning.call_args.args[0] + + run.refresh_from_db() + assert run.extras["error_message"] == "Seer exploded" + assert not SeerNightShiftRunResult.objects.filter(run=run).exists() + + def test_invalid_result_logs_exception(self) -> None: + """When result can't be parsed as TriageResponse, log and return.""" + run = self._create_night_shift_run() + assert run.seer_run is not None + + with patch("sentry.seer.night_shift.delivery.logger") as mock_logger: + deliver_night_shift_result( + organization_id=run.organization_id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result={"invalid": "schema"}, + error=None, + ) + + mock_logger.exception.assert_called_once() + assert "night_shift.delivery.invalid_result" in mock_logger.exception.call_args.args[0] + + assert not SeerNightShiftRunResult.objects.filter(run=run).exists() + + def test_skip_verdict_marks_group_skipped(self) -> None: + """SKIP verdicts should mark the group in skip cache.""" + org = self.create_organization() + project = self.create_project(organization=org) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + {"group_id": group.id, "action": TriageAction.SKIP.value, "reason": "not fixable"} + ] + } + + assert run.seer_run is not None + with patch("sentry.tasks.seer.night_shift.cron.trigger_autofix_agent") as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + mock_trigger.assert_not_called() + + # Verify skip cache was set + redis = redis_clusters.get("default") + try: + assert redis.exists(skip_cache_key(group.id)) + finally: + redis.delete(skip_cache_key(group.id)) + + # No results persisted for SKIP verdicts + assert not SeerNightShiftRunResult.objects.filter(run=run).exists() + + def test_autofix_verdict_triggers_autofix(self) -> None: + """AUTOFIX verdicts should trigger autofix with project stopping point.""" + org = self.create_organization() + project = self.create_project(organization=org) + project.update_option( + "sentry:seer_automated_run_stopping_point", AutofixStoppingPoint.OPEN_PR.value + ) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + {"group_id": group.id, "action": TriageAction.AUTOFIX.value, "reason": "looks good"} + ] + } + + assert run.seer_run is not None + with patch( + "sentry.tasks.seer.night_shift.cron.trigger_autofix_agent", return_value=42 + ) as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + mock_trigger.assert_called_once() + assert mock_trigger.call_args.kwargs["group"].id == group.id + assert mock_trigger.call_args.kwargs["stopping_point"] == AutofixStoppingPoint.OPEN_PR + + results = list(SeerNightShiftRunResult.objects.filter(run=run)) + assert len(results) == 1 + assert results[0].group_id == group.id + assert results[0].seer_run_id == "42" + assert results[0].extras["action"] == TriageAction.AUTOFIX.value + + def test_root_cause_only_verdict_uses_root_cause_stopping_point(self) -> None: + """ROOT_CAUSE_ONLY verdicts should use ROOT_CAUSE stopping point.""" + org = self.create_organization() + project = self.create_project(organization=org) + project.update_option( + "sentry:seer_automated_run_stopping_point", AutofixStoppingPoint.OPEN_PR.value + ) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + { + "group_id": group.id, + "action": TriageAction.ROOT_CAUSE_ONLY.value, + "reason": "needs investigation", + } + ] + } + + assert run.seer_run is not None + with patch( + "sentry.tasks.seer.night_shift.cron.trigger_autofix_agent", return_value=99 + ) as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + mock_trigger.assert_called_once() + assert ( + mock_trigger.call_args.kwargs["stopping_point"] == AutofixStoppingPoint.ROOT_CAUSE + ) + + def test_dry_run_skips_autofix(self) -> None: + """Dry run mode should not trigger autofix or persist results.""" + org = self.create_organization() + project = self.create_project(organization=org) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org, options={"dry_run": True}) + + result = { + "verdicts": [ + {"group_id": group.id, "action": TriageAction.AUTOFIX.value, "reason": "fixable"} + ] + } + + assert run.seer_run is not None + with patch("sentry.tasks.seer.night_shift.cron.trigger_autofix_agent") as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + mock_trigger.assert_not_called() + + assert not SeerNightShiftRunResult.objects.filter(run=run).exists() + + def test_trigger_failure_continues_with_other_groups(self) -> None: + """If trigger fails for one group, continue processing others.""" + org = self.create_organization() + project = self.create_project(organization=org) + failing_group = self.create_group(project=project) + ok_group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + { + "group_id": failing_group.id, + "action": TriageAction.AUTOFIX.value, + "reason": "will fail", + }, + { + "group_id": ok_group.id, + "action": TriageAction.AUTOFIX.value, + "reason": "will work", + }, + ] + } + + def trigger_side_effect(**kwargs: Any) -> int: + if kwargs["group"].id == failing_group.id: + raise RuntimeError("trigger failed") + return 7 + + assert run.seer_run is not None + with ( + patch( + "sentry.tasks.seer.night_shift.cron.trigger_autofix_agent", + side_effect=trigger_side_effect, + ), + patch("sentry.tasks.seer.night_shift.cron.logger") as mock_logger, + ): + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + exception_calls = [call.args[0] for call in mock_logger.exception.call_args_list] + assert "night_shift.autofix_trigger_failed" in exception_calls + + results = list(SeerNightShiftRunResult.objects.filter(run=run)) + assert len(results) == 1 + assert results[0].group_id == ok_group.id + assert results[0].seer_run_id == "7" + + def test_unknown_group_ids_logged(self) -> None: + """Groups not belonging to the org should be logged and skipped.""" + org = self.create_organization() + other_org = self.create_organization() + other_project = self.create_project(organization=other_org) + other_group = self.create_group(project=other_project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + { + "group_id": other_group.id, + "action": TriageAction.AUTOFIX.value, + "reason": "wrong org", + } + ] + } + + assert run.seer_run is not None + with ( + patch("sentry.tasks.seer.night_shift.cron.trigger_autofix_agent") as mock_trigger, + patch("sentry.seer.night_shift.delivery.logger") as mock_logger, + ): + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + mock_trigger.assert_not_called() + warning_calls = [call.args[0] for call in mock_logger.warning.call_args_list] + assert "night_shift.delivery.unknown_group_ids" in warning_calls + + def test_user_context_passed_to_autofix(self) -> None: + """Verdict reason should be passed as user_context to autofix.""" + org = self.create_organization() + project = self.create_project(organization=org) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [ + { + "group_id": group.id, + "action": TriageAction.AUTOFIX.value, + "reason": "This issue is caused by a null pointer", + } + ] + } + + assert run.seer_run is not None + with patch( + "sentry.tasks.seer.night_shift.cron.trigger_autofix_agent", return_value=1 + ) as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + user_context = mock_trigger.call_args.kwargs["user_context"] + assert "This issue is caused by a null pointer" in user_context + + def test_empty_reason_no_user_context(self) -> None: + """Empty reason should result in no user_context.""" + org = self.create_organization() + project = self.create_project(organization=org) + group = self.create_group(project=project) + run = self._create_night_shift_run(organization=org) + + result = { + "verdicts": [{"group_id": group.id, "action": TriageAction.AUTOFIX.value, "reason": ""}] + } + + assert run.seer_run is not None + with patch( + "sentry.tasks.seer.night_shift.cron.trigger_autofix_agent", return_value=1 + ) as mock_trigger: + deliver_night_shift_result( + organization_id=org.id, + run_uuid=str(run.seer_run.uuid), + status="completed", + result=result, + error=None, + ) + + assert mock_trigger.call_args.kwargs["user_context"] is None