Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/sentry/incidents/typings/metric_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any

from pydantic import BaseModel, ConfigDict

from sentry.incidents.models.alert_rule import (
AlertRule,
AlertRuleDetectionType,
Expand Down Expand Up @@ -281,15 +283,16 @@ def from_legacy_models(
)


@dataclass
class OpenPeriodContext:
class OpenPeriodContext(BaseModel):
"""
We want to eventually delete this class. it serves as a way to pass data around
that we used to use `incident` for.
"""

model_config = ConfigDict(frozen=True)

date_started: datetime
date_closed: datetime | None
date_closed: datetime | None = None
Comment thread
cursor[bot] marked this conversation as resolved.
id: int

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/notifications/notification_action/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ def invoke_legacy_registry(cls, invocation: ActionInvocation) -> None:
"notification_context": asdict(notification_context),
"alert_context": asdict(alert_context),
"metric_issue_context": asdict(metric_issue_context),
"open_period_context": asdict(open_period_context),
"open_period_context": open_period_context.dict(),
"trigger_status": trigger_status,
},
)
Expand Down
28 changes: 22 additions & 6 deletions src/sentry/notifications/platform/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ class NotificationServiceError(Exception):
pass


class NotificationRenderError(NotificationServiceError):
pass


class NotificationService[T: NotificationData]:
def __init__(self, *, data: T):
self.data: Final[T] = data
Expand Down Expand Up @@ -94,9 +98,15 @@ def notify_target(

# Update the lifecycle with the notification category now that we know it
event_lifecycle.notification_category = template.category
renderable = NotificationService.render_template(
data=self.data, template=template, provider=provider
)
try:
renderable = NotificationService.render_template(
data=self.data, template=template, provider=provider
)
except Exception as e:
lifecycle.record_failure(failure_reason=e, create_issue=True)
raise NotificationRenderError(
f"Failed to render notification for source={self.data.source}"
) from e

# Step 3: Resolve thread if threading requested
thread_context: ThreadContext | None = None
Expand Down Expand Up @@ -321,9 +331,15 @@ def notify_target_async(
template_cls = template_registry.get(notification_data.source)
template = template_cls()
lifecycle_metric.notification_category = template.category
renderable = NotificationService.render_template(
data=notification_data, template=template, provider=provider
)
try:
renderable = NotificationService.render_template(
data=notification_data, template=template, provider=provider
)
except Exception as e:
lifecycle.record_failure(failure_reason=e, create_issue=True)
raise NotificationRenderError(
f"Failed to render notification for source={notification_data.source}"
) from e

# Step 4: Resolve thread if threading requested
thread_context: ThreadContext | None = None
Expand Down
8 changes: 7 additions & 1 deletion src/sentry/notifications/platform/slack/provider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from typing import TYPE_CHECKING, NotRequired, TypedDict

from slack_sdk.models.blocks import (
ActionsBlock,
Expand Down Expand Up @@ -59,6 +59,7 @@ class SlackProviderThreadingContext(ProviderThreadingContext):
class SlackRenderable(TypedDict):
blocks: list[Block]
text: str
color: NotRequired[str]


class SlackRenderer(NotificationRenderer[SlackRenderable]):
Expand Down Expand Up @@ -138,12 +139,17 @@ def get_renderer(
from sentry.notifications.platform.slack.renderers.issue import (
IssueSlackRenderer,
)
from sentry.notifications.platform.slack.renderers.metric_alert import (
SlackMetricAlertRenderer,
)
from sentry.notifications.platform.slack.renderers.seer import SeerSlackRenderer

if category == NotificationCategory.SEER:
return SeerSlackRenderer
if category == NotificationCategory.ISSUE:
return IssueSlackRenderer
if category == NotificationCategory.METRIC_ALERT:
return SlackMetricAlertRenderer
return cls.default_renderer

@classmethod
Expand Down
107 changes: 107 additions & 0 deletions src/sentry/notifications/platform/slack/renderers/metric_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from __future__ import annotations

import sentry_sdk

from sentry import features
from sentry.incidents.charts import build_metric_alert_chart
from sentry.incidents.typings.metric_detector import MetricIssueContext
from sentry.integrations.slack.message_builder.incidents import SlackIncidentsMessageBuilder
from sentry.models.group import Group
from sentry.models.organization import Organization
from sentry.notifications.notification_action.metric_alert_registry.handlers.utils import (
get_alert_rule_serializer,
get_detector_serializer,
)
from sentry.notifications.notification_action.types import BaseMetricAlertHandler
from sentry.notifications.platform.renderer import NotificationRenderer
from sentry.notifications.platform.slack.provider import SlackRenderable
from sentry.notifications.platform.templates.metric_alert import (
ActivityMetricAlertNotificationData,
BaseMetricAlertNotificationData,
MetricAlertNotificationData,
)
from sentry.notifications.platform.types import (
NotificationData,
NotificationProviderKey,
NotificationRenderedTemplate,
)
from sentry.services import eventstore
from sentry.services.eventstore.models import GroupEvent
from sentry.workflow_engine.models.detector import Detector


def _build_metric_issue_context_from_group_event(
data: MetricAlertNotificationData,
) -> MetricIssueContext:
event = eventstore.backend.get_event_by_id(
data.project_id, data.event_id, group_id=data.group_id
)
if event is None:
raise ValueError(f"Event {data.event_id} not found")
elif not isinstance(event, GroupEvent):
raise ValueError(f"Event {data.event_id} is not a GroupEvent")

evidence_data, priority = BaseMetricAlertHandler._extract_from_group_event(event)
return MetricIssueContext.from_group_event(event.group, evidence_data, priority)


def _build_metric_issue_context_from_activity(
data: ActivityMetricAlertNotificationData,
) -> MetricIssueContext:
from sentry.models.activity import Activity

Check failure on line 51 in src/sentry/notifications/platform/slack/renderers/metric_alert.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[LK6-VGU] Detector.DoesNotExist not handled - notification task will crash if detector is deleted (additional location)

The `Detector.objects.get(id=data.detector_id)` call on line 72 has no DoesNotExist handling. Since notification data is serialized and processed asynchronously via a task queue (see service.py lines 191-197), the detector can be deleted between notification creation and rendering. This matches the SENTRY-5D9J pattern which caused 610,142 production events.

Check failure on line 52 in src/sentry/notifications/platform/slack/renderers/metric_alert.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[LK6-VGU] Detector.DoesNotExist not handled - notification task will crash if detector is deleted (additional location)

The `Detector.objects.get(id=data.detector_id)` call on line 72 has no DoesNotExist handling. Since notification data is serialized and processed asynchronously via a task queue (see service.py lines 191-197), the detector can be deleted between notification creation and rendering. This matches the SENTRY-5D9J pattern which caused 610,142 production events.
activity = Activity.objects.get(id=data.activity_id)
group = Group.objects.get_from_cache(id=data.group_id)
evidence_data, priority = BaseMetricAlertHandler._extract_from_activity(activity)
return MetricIssueContext.from_group_event(group, evidence_data, priority)


class SlackMetricAlertRenderer(NotificationRenderer[SlackRenderable]):
provider_key = NotificationProviderKey.SLACK

@classmethod
def render[DataT: NotificationData](
cls, *, data: DataT, rendered_template: NotificationRenderedTemplate
) -> SlackRenderable:
if not isinstance(data, BaseMetricAlertNotificationData):
raise ValueError(f"SlackMetricAlertRenderer does not support {data.__class__.__name__}")

if isinstance(data, MetricAlertNotificationData):
metric_issue_context = _build_metric_issue_context_from_group_event(data)
elif isinstance(data, ActivityMetricAlertNotificationData):
metric_issue_context = _build_metric_issue_context_from_activity(data)

Check failure on line 72 in src/sentry/notifications/platform/slack/renderers/metric_alert.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Detector.DoesNotExist not handled - notification task will crash if detector is deleted

The `Detector.objects.get(id=data.detector_id)` call on line 72 has no DoesNotExist handling. Since notification data is serialized and processed asynchronously via a task queue (see service.py lines 191-197), the detector can be deleted between notification creation and rendering. This matches the SENTRY-5D9J pattern which caused 610,142 production events.
Comment thread
Christinarlong marked this conversation as resolved.
Comment thread
Christinarlong marked this conversation as resolved.

organization = Organization.objects.get_from_cache(id=data.organization_id)
detector = Detector.objects.get(id=data.detector_id)
alert_context = data.alert_context.to_alert_context()
open_period_context = data.open_period_context

chart_url = None
if features.has("organizations:metric-alert-chartcuterie", organization):
try:
chart_url = build_metric_alert_chart(
organization=organization,
alert_rule_serialized_response=get_alert_rule_serializer(detector),
snuba_query=metric_issue_context.snuba_query,
alert_context=alert_context,
open_period_context=open_period_context,
subscription=metric_issue_context.subscription,
detector_serialized_response=get_detector_serializer(detector),
)
except Exception as e:
sentry_sdk.capture_exception(e)

slack_body = SlackIncidentsMessageBuilder(
alert_context=alert_context,
metric_issue_context=metric_issue_context,
organization=organization,
date_started=open_period_context.date_started,
chart_url=chart_url,
notification_uuid=data.notification_uuid,
).build()

return SlackRenderable(
blocks=slack_body.get("blocks", []),
text=slack_body.get("text", ""),
color=slack_body.get("color", ""),
)
Comment thread
Christinarlong marked this conversation as resolved.
Comment thread
Christinarlong marked this conversation as resolved.
3 changes: 3 additions & 0 deletions src/sentry/notifications/platform/templates/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from .data_export import DataExportFailureTemplate, DataExportSuccessTemplate
from .issue import IssueNotificationTemplate
from .metric_alert import ActivityMetricAlertNotificationTemplate, MetricAlertNotificationTemplate

__all__ = (
"DataExportSuccessTemplate",
"DataExportFailureTemplate",
"IssueNotificationTemplate",
"MetricAlertNotificationTemplate",
"ActivityMetricAlertNotificationTemplate",
)
# All templates should be imported here so they are registered in the notifications Django app.
# See sentry/notifications/apps.py
Expand Down
147 changes: 147 additions & 0 deletions src/sentry/notifications/platform/templates/metric_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from __future__ import annotations

from datetime import datetime
from typing import Self

from pydantic import BaseModel, ConfigDict

from sentry.incidents.typings.metric_detector import AlertContext, OpenPeriodContext
from sentry.notifications.platform.registry import template_registry
from sentry.notifications.platform.types import (
NotificationCategory,
NotificationData,
NotificationRenderedTemplate,
NotificationSource,
NotificationTemplate,
)
from sentry.seer.anomaly_detection.types import AnomalyDetectionThresholdType


class SerializableAlertContext(BaseModel):
model_config = ConfigDict(frozen=True)

name: str
action_identifier_id: int
threshold_type: int | None = None # AlertRuleThresholdType or AnomalyDetectionThresholdType
detection_type: str # AlertRuleDetectionType value (TextChoices str)
comparison_delta: int | None = None
sensitivity: str | None = None
resolve_threshold: float | None = None
alert_threshold: float | None = None

@classmethod
def from_alert_context(cls, ac: AlertContext) -> Self:
return cls(
name=ac.name,
action_identifier_id=ac.action_identifier_id,
threshold_type=int(ac.threshold_type.value) if ac.threshold_type is not None else None,
detection_type=ac.detection_type.value,
comparison_delta=ac.comparison_delta,
sensitivity=ac.sensitivity,
resolve_threshold=ac.resolve_threshold,
alert_threshold=ac.alert_threshold,
)

def to_alert_context(self) -> AlertContext:
from sentry.incidents.models.alert_rule import (
AlertRuleDetectionType,
AlertRuleThresholdType,
)

detection_type = AlertRuleDetectionType(self.detection_type)

threshold_type: AlertRuleThresholdType | AnomalyDetectionThresholdType | None = None
if self.threshold_type is not None:
if detection_type == AlertRuleDetectionType.DYNAMIC:
threshold_type = AnomalyDetectionThresholdType(self.threshold_type)

Check warning on line 56 in src/sentry/notifications/platform/templates/metric_alert.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Unguarded enum conversions in to_alert_context can raise ValueError on invalid data

The `to_alert_context` method performs enum conversions without try/except guards: `AlertRuleDetectionType(self.detection_type)` at line 49 and `AlertRuleThresholdType`/`AnomalyDetectionThresholdType` at lines 54/56. If serialized notification data contains invalid enum values (due to data corruption, schema changes, or task queue issues), these will raise `ValueError`. While upstream exception handling in the notification service catches this and logs it, the error will still fire continuously for every affected notification and create Sentry issues.
else:
threshold_type = AlertRuleThresholdType(self.threshold_type)

return AlertContext(
name=self.name,
action_identifier_id=self.action_identifier_id,
threshold_type=threshold_type,
detection_type=detection_type,
comparison_delta=self.comparison_delta,
sensitivity=self.sensitivity,
resolve_threshold=self.resolve_threshold,
alert_threshold=self.alert_threshold,
)


class BaseMetricAlertNotificationData(NotificationData):
group_id: int
organization_id: int
detector_id: int

alert_context: SerializableAlertContext
open_period_context: OpenPeriodContext

notification_uuid: str


class MetricAlertNotificationData(BaseMetricAlertNotificationData):
"""GroupEvent / firing path. Renderer re-fetches GroupEvent from Snuba."""

source: NotificationSource = NotificationSource.METRIC_ALERT

event_id: str
project_id: int


class ActivityMetricAlertNotificationData(BaseMetricAlertNotificationData):
"""Activity / SET_RESOLVED path. Renderer re-fetches Activity from Postgres."""

source: NotificationSource = NotificationSource.ACTIVITY_METRIC_ALERT

activity_id: int


_EXAMPLE_ALERT_CONTEXT = SerializableAlertContext(
name="Example Alert",
action_identifier_id=1,
detection_type="static",
)
_EXAMPLE_OPEN_PERIOD_CONTEXT = OpenPeriodContext(
id=1,
date_started=datetime(2024, 1, 1, 0, 0, 0),
)


@template_registry.register(NotificationSource.METRIC_ALERT)
class MetricAlertNotificationTemplate(NotificationTemplate[MetricAlertNotificationData]):
category = NotificationCategory.METRIC_ALERT
hide_from_debugger = True
example_data = MetricAlertNotificationData(
event_id="abc123",
project_id=1,
group_id=1,
organization_id=1,
detector_id=1,
alert_context=_EXAMPLE_ALERT_CONTEXT,
open_period_context=_EXAMPLE_OPEN_PERIOD_CONTEXT,
notification_uuid="test-uuid",
)

def render(self, data: MetricAlertNotificationData) -> NotificationRenderedTemplate:
return NotificationRenderedTemplate(subject="Metric Alert", body=[])


@template_registry.register(NotificationSource.ACTIVITY_METRIC_ALERT)
class ActivityMetricAlertNotificationTemplate(
NotificationTemplate[ActivityMetricAlertNotificationData]
):
category = NotificationCategory.METRIC_ALERT
hide_from_debugger = True
example_data = ActivityMetricAlertNotificationData(
group_id=1,
organization_id=1,
detector_id=1,
alert_context=_EXAMPLE_ALERT_CONTEXT,
open_period_context=_EXAMPLE_OPEN_PERIOD_CONTEXT,
notification_uuid="test-uuid",
activity_id=1,
)

def render(self, data: ActivityMetricAlertNotificationData) -> NotificationRenderedTemplate:
return NotificationRenderedTemplate(subject="Metric Alert", body=[])
Loading
Loading