diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index c3a64a33142fe4..bb8d8c2a4621e6 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -897,6 +897,11 @@ default={7001: 0.15}, flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE, ) +register( + "snuba.search.recommended.message-penalty-weight", + default=0.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) # The percentage of tagkeys that we want to cache. Set to 1.0 in order to cache everything, <=0.0 to stop caching register( diff --git a/src/sentry/search/snuba/executors.py b/src/sentry/search/snuba/executors.py index 38865a082d7a12..1a12bdcb203822 100644 --- a/src/sentry/search/snuba/executors.py +++ b/src/sentry/search/snuba/executors.py @@ -805,6 +805,11 @@ def _recommended_aggregation( # Group type boost: additive signal per issue type group_type_boosts = options.get("snuba.search.recommended.group-type-boost") + # Message penalty: downranks capture_message issues (no exception/stacktrace). + # Subtracted from the score below, and only on the events dataset -- issue-platform + # occurrences don't have exception_stacks. + message_penalty_weight = options.get("snuba.search.recommended.message-penalty-weight") + # Skip zero-weighted factors: their term is always 0, so computing them in # ClickHouse is wasted work -- especially expensive aggregates like user # impact's uniq(tags[sentry:user]). @@ -838,6 +843,11 @@ def _recommended_aggregation( for term in terms[1:]: score_expr = f"plus({score_expr}, {term})" + if type_column is None and message_penalty_weight: + has_exception_ratio = "divide(countIf(notEmpty(exception_stacks.type)), count())" + message_penalty = f"multiply({message_penalty_weight}, minus(1.0, {has_exception_ratio}))" + score_expr = f"minus({score_expr}, {message_penalty})" + return [score_expr, ""] diff --git a/tests/snuba/search/test_backend.py b/tests/snuba/search/test_backend.py index b3b5026fc8b603..61ca50da34474f 100644 --- a/tests/snuba/search/test_backend.py +++ b/tests/snuba/search/test_backend.py @@ -4164,6 +4164,43 @@ def _recommended_scores(self, group_ids: list[int]) -> dict[int, float]: )[0] return {gid: score for gid, score in results} + def test_recommended_message_penalty(self) -> None: + ts = before_now(hours=1).isoformat() + exception_event = self.store_event( + data={ + "fingerprint": ["exception-group"], + "event_id": "a" * 32, + "timestamp": ts, + "message": "exception-group", + "level": "error", + "exception": { + "values": [ + { + "type": "ValueError", + "value": "something broke", + "stacktrace": {"frames": [{"module": "app.main"}]}, + } + ] + }, + }, + project_id=self.project.id, + ) + message_event = self.store_event( + data={ + "fingerprint": ["message-group"], + "event_id": "b" * 32, + "timestamp": ts, + "message": "message-group", + "level": "error", + }, + project_id=self.project.id, + ) + + with self.options({"snuba.search.recommended.message-penalty-weight": 0.10}): + scores = self._recommended_scores([exception_event.group.id, message_event.group.id]) + + assert scores[exception_event.group.id] > scores[message_event.group.id] + def test_recommended_zero_weight_factor_excluded(self) -> None: ts = before_now(hours=1).isoformat() for i in range(5):