Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions src/sentry/incidents/metric_issue_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from sentry.workflow_engine.endpoints.validators.base.data_condition import (
BaseDataConditionValidator,
)
from sentry.workflow_engine.models import DataSource, Detector
from sentry.workflow_engine.models import DataCondition, DataSource, Detector

Check warning on line 44 in src/sentry/incidents/metric_issue_detector.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Uncaught DetectorException (500) when converting a detector to anomaly detection without an ANOMALY_DETECTION condition in the payload

In `update_anomaly_detection` (metric_issue_detector.py), when a detector is converted to `DYNAMIC`, the new in-memory `DataCondition` is only built when the payload actually contains an `ANOMALY_DETECTION` condition. `MetricIssueConditionGroupValidator.validate_conditions` only requires *either* an OK-result condition *or* an ANOMALY_DETECTION condition, so a `detection_type=DYNAMIC` payload such as `[GREATER(HIGH), MEDIUM]` or `[GREATER(HIGH), OK]` passes validation with no ANOMALY_DETECTION condition. In that case `data_condition` is `None`, so `update_detector_data` falls back to `_fetch_anomaly_data_condition(detector, 'update')`, which reads the detector's *previous* (still-persisted, e.g. static) conditions. That `DataCondition.objects.get(condition_result__in=[HIGH, MEDIUM])` raises `DataCondition.DoesNotExist` or `MultipleObjectsReturned`, which is re-raised as `DetectorException`. `DetectorException` subclasses `Exception` (not `ValidationError`), so neither the `except (TimeoutError, MaxRetryError, ParseError, ValidationError)` block in `update_detector_data` nor the one in `update_anomaly_detection` catches it, and it propagates to the API layer as an unhandled 500. This is the same class of failure the PR set out to fix, but the fix is incomplete for the no-anomaly-condition subcase.
Comment thread
saponifi3d marked this conversation as resolved.
from sentry.workflow_engine.models.data_condition import Condition
from sentry.workflow_engine.types import DetectorPriorityLevel, SnubaQueryDataSourceType

Expand Down Expand Up @@ -415,9 +415,27 @@
validated_data.get("config", {}).get("detection_type") == AlertRuleDetectionType.DYNAMIC
)
if not is_currently_dynamic_detector and is_update_dynamic_detector:
# Detector has been changed to become a dynamic detector
# Detector has been changed to become a dynamic detector. The detector's
# persisted conditions still describe its previous (e.g. static) configuration
# at this point, so build the anomaly condition to send to Seer from the
# incoming payload rather than looking up the (not-yet-written) condition.
conditions = validated_data.get("condition_group", {}).get("conditions", [])
anomaly_condition_data = next(
(c for c in conditions if c.get("type") == Condition.ANOMALY_DETECTION),
None,
)
data_condition = (
DataCondition(
type=anomaly_condition_data["type"],
comparison=anomaly_condition_data["comparison"],
condition_result=anomaly_condition_data["condition_result"],
condition_group=instance.workflow_condition_group,
)
if anomaly_condition_data
else None
)
try:

Check warning on line 437 in src/sentry/incidents/metric_issue_detector.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[ZMC-F3B] Uncaught DetectorException (500) when converting a detector to anomaly detection without an ANOMALY_DETECTION condition in the payload (additional location)

In `update_anomaly_detection` (metric_issue_detector.py), when a detector is converted to `DYNAMIC`, the new in-memory `DataCondition` is only built when the payload actually contains an `ANOMALY_DETECTION` condition. `MetricIssueConditionGroupValidator.validate_conditions` only requires *either* an OK-result condition *or* an ANOMALY_DETECTION condition, so a `detection_type=DYNAMIC` payload such as `[GREATER(HIGH), MEDIUM]` or `[GREATER(HIGH), OK]` passes validation with no ANOMALY_DETECTION condition. In that case `data_condition` is `None`, so `update_detector_data` falls back to `_fetch_anomaly_data_condition(detector, 'update')`, which reads the detector's *previous* (still-persisted, e.g. static) conditions. That `DataCondition.objects.get(condition_result__in=[HIGH, MEDIUM])` raises `DataCondition.DoesNotExist` or `MultipleObjectsReturned`, which is re-raised as `DetectorException`. `DetectorException` subclasses `Exception` (not `ValidationError`), so neither the `except (TimeoutError, MaxRetryError, ParseError, ValidationError)` block in `update_detector_data` nor the one in `update_anomaly_detection` catches it, and it propagates to the API layer as an unhandled 500. This is the same class of failure the PR set out to fix, but the fix is incomplete for the no-anomaly-condition subcase.
update_detector_data(instance, validated_data)
update_detector_data(instance, validated_data, data_condition=data_condition)
Comment thread
saponifi3d marked this conversation as resolved.
seer_updated = True
except (TimeoutError, MaxRetryError, ParseError, ValidationError) as e:
# Don't update if we failed to send data to Seer
Expand Down
35 changes: 23 additions & 12 deletions src/sentry/seer/anomaly_detection/store_data_workflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@
logger = logging.getLogger(__name__)


def _fetch_related_models(
detector: Detector, method: str
) -> tuple[DataSource, DataCondition, SnubaQuery]:
def _fetch_related_models(detector: Detector, method: str) -> tuple[DataSource, SnubaQuery]:
# XXX: it is technically possible (though not used today) that a detector could have multiple data sources
data_source_detector = DataSourceDetector.objects.filter(detector_id=detector.id).first()
if not data_source_detector:
Expand All @@ -55,8 +53,18 @@
raise DetectorException(
f"Could not {method} detector, snuba query {query_subscription.snuba_query_id} not found."
)
return data_source, snuba_query


def _fetch_anomaly_data_condition(detector: Detector, method: str) -> DataCondition:
"""
Read the detector's single persisted anomaly detection condition. Only valid for
detectors that are already dynamic (e.g. create, or a data-source-only update). A
detector being converted to anomaly detection does not have an anomaly condition
persisted yet, so its caller must supply the condition.
"""
try:
data_condition = DataCondition.objects.get(
return DataCondition.objects.get(
condition_group=detector.workflow_condition_group,
condition_result__in=[
DetectorPriorityLevel.HIGH,
Expand All @@ -73,22 +81,24 @@
raise DetectorException(
f"Could not {method} detector, data condition {dcg_id} not found or too many found."
)
return data_source, data_condition, snuba_query


def update_detector_data(
detector: Detector,
updated_fields: dict[str, Any],
data_condition: DataCondition | None = None,
) -> None:
data_source, data_condition, snuba_query = _fetch_related_models(detector, "update")
data_source, snuba_query = _fetch_related_models(detector, "update")

# When a detector is being converted to anomaly detection the new condition is
# supplied by the caller, since the detector's persisted conditions still describe
# its previous (e.g. static) configuration. Otherwise read it from the detector's
# single persisted anomaly condition.
if data_condition is None:
data_condition = _fetch_anomaly_data_condition(detector, "update")

Check warning on line 98 in src/sentry/seer/anomaly_detection/store_data_workflow_engine.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

[ZMC-F3B] Uncaught DetectorException (500) when converting a detector to anomaly detection without an ANOMALY_DETECTION condition in the payload (additional location)

In `update_anomaly_detection` (metric_issue_detector.py), when a detector is converted to `DYNAMIC`, the new in-memory `DataCondition` is only built when the payload actually contains an `ANOMALY_DETECTION` condition. `MetricIssueConditionGroupValidator.validate_conditions` only requires *either* an OK-result condition *or* an ANOMALY_DETECTION condition, so a `detection_type=DYNAMIC` payload such as `[GREATER(HIGH), MEDIUM]` or `[GREATER(HIGH), OK]` passes validation with no ANOMALY_DETECTION condition. In that case `data_condition` is `None`, so `update_detector_data` falls back to `_fetch_anomaly_data_condition(detector, 'update')`, which reads the detector's *previous* (still-persisted, e.g. static) conditions. That `DataCondition.objects.get(condition_result__in=[HIGH, MEDIUM])` raises `DataCondition.DoesNotExist` or `MultipleObjectsReturned`, which is re-raised as `DetectorException`. `DetectorException` subclasses `Exception` (not `ValidationError`), so neither the `except (TimeoutError, MaxRetryError, ParseError, ValidationError)` block in `update_detector_data` nor the one in `update_anomaly_detection` catches it, and it propagates to the API layer as an unhandled 500. This is the same class of failure the PR set out to fix, but the fix is incomplete for the no-anomaly-condition subcase.

# use setattr to avoid saving the models until the Seer call has successfully finished,
# otherwise they would be in a bad state
updated_data_condition_data = updated_fields.get("condition_group", {}).get("conditions")
if updated_data_condition_data:
for k, v in updated_data_condition_data[0].items():
setattr(data_condition, k, v)

event_types = snuba_query.event_types
updated_data_source_data = updated_fields.get("data_sources")
if updated_data_source_data:
Expand Down Expand Up @@ -130,7 +140,8 @@
"""
Send historical data for a new Detector to Seer.
"""
data_source, data_condition, snuba_query = _fetch_related_models(detector, "create")
data_source, snuba_query = _fetch_related_models(detector, "create")
data_condition = _fetch_anomaly_data_condition(detector, "create")

try:
handle_send_historical_data_to_seer(
Expand Down
86 changes: 86 additions & 0 deletions tests/sentry/incidents/endpoints/validators/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,92 @@ def test_update_anomaly_detection_from_static(
data=dynamic_detector.get_audit_log_data(),
)

@mock.patch("sentry.seer.anomaly_detection.store_data_workflow_engine.make_store_data_request")
@mock.patch("sentry.workflow_engine.endpoints.validators.base.detector.create_audit_entry")
def test_update_anomaly_detection_from_static_with_warning_and_critical(
self, mock_audit: mock.MagicMock, mock_seer_request: mock.MagicMock
) -> None:
"""
Test that a static detector with both a warning (MEDIUM) and a critical (HIGH)
trigger can be converted to a dynamic detector. The detector's persisted
conditions still describe the static config at the point we send data to Seer,
so the anomaly comparison must come from the incoming payload rather than from a
lookup of a single existing HIGH/MEDIUM condition (which would match two rows).
"""
static_data_with_warning = {
**self.valid_data,
"conditionGroup": {
"logicType": self.data_condition_group.logic_type,
"conditions": [
{
"type": Condition.GREATER,
"comparison": 100,
"conditionResult": DetectorPriorityLevel.HIGH,
},
{
"type": Condition.GREATER,
"comparison": 50,
"conditionResult": DetectorPriorityLevel.MEDIUM,
},
{
"type": Condition.LESS_OR_EQUAL,
"comparison": 50,
"conditionResult": DetectorPriorityLevel.OK,
},
],
},
"config": {
"thresholdPeriod": 1,
"detectionType": AlertRuleDetectionType.STATIC.value,
},
}
validator = MetricIssueDetectorValidator(
data=static_data_with_warning, context=self.context
)
assert validator.is_valid(), validator.errors
with self.tasks():
static_detector = validator.save()

# Confirm the static detector has both a HIGH and a MEDIUM condition.
static_conditions = list(
DataCondition.objects.filter(
condition_group=static_detector.workflow_condition_group,
condition_result__in=[DetectorPriorityLevel.HIGH, DetectorPriorityLevel.MEDIUM],
)
)
assert len(static_conditions) == 2

mock_audit.reset_mock()

# Change to become a dynamic detector
seer_return_value: StoreDataResponse = {"success": True}
mock_seer_request.return_value = HTTPResponse(orjson.dumps(seer_return_value), status=200)

update_validator = MetricIssueDetectorValidator(
instance=static_detector,
data=self.valid_anomaly_detection_data,
context=self.context,
partial=True,
)
assert update_validator.is_valid(), update_validator.errors
dynamic_detector = update_validator.save()

assert mock_seer_request.call_count == 1

# Verify conditions in DB now describe a single anomaly detection condition
conditions = list(
DataCondition.objects.filter(
condition_group=dynamic_detector.workflow_condition_group_id
)
)
assert len(conditions) == 1
assert conditions[0].type == Condition.ANOMALY_DETECTION
assert conditions[0].comparison == {
"sensitivity": AnomalyDetectionSensitivity.HIGH,
"seasonality": AnomalyDetectionSeasonality.AUTO,
"threshold_type": AnomalyDetectionThresholdType.ABOVE_AND_BELOW,
}

@mock.patch("sentry.seer.anomaly_detection.store_data_workflow_engine.make_store_data_request")
@mock.patch("sentry.workflow_engine.endpoints.validators.base.detector.create_audit_entry")
def test_update_anomaly_detection_snuba_query_query(
Expand Down
Loading