From a5ce2e5e9d3b2df6b7d02aa10a0e84b60a54fa0a Mon Sep 17 00:00:00 2001 From: Richard Roggenkemper Date: Mon, 1 Jun 2026 13:11:06 -0400 Subject: [PATCH 1/3] feat(search): Downrank capture_message issues in recommended sort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a message penalty signal to the recommended sort formula that downranks issues without exception data (e.g. from capture_message). Uses exception_stacks.type to detect whether events have real stacktraces — pure message groups get a configurable penalty (default 0.10) while exception groups are unaffected. Co-Authored-By: Claude --- src/sentry/options/defaults.py | 5 +++ src/sentry/search/snuba/executors.py | 29 ++++++++++----- tests/snuba/search/test_backend.py | 54 ++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 0657074617e0..0c8576f3cb3f 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -901,6 +901,11 @@ default={7001: 0.15}, flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE, ) +register( + "snuba.search.recommended.message-penalty-weight", + default=0.10, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) # The percentage of tagkeys that we want to cache. Set to 1.0 in order to cache everything, <=0.0 to stop caching register( diff --git a/src/sentry/search/snuba/executors.py b/src/sentry/search/snuba/executors.py index f5a1d2937206..e2090925e252 100644 --- a/src/sentry/search/snuba/executors.py +++ b/src/sentry/search/snuba/executors.py @@ -813,16 +813,27 @@ def _recommended_aggregation( else: type_boost = "0.0" + # Message penalty: downrank issues from capture_message (no exception/stacktrace) + # Only applies to Events dataset — issue-platform occurrences don't have exception_stacks + if type_column is None: + message_penalty_weight = options.get("snuba.search.recommended.message-penalty-weight") + has_exception_ratio = "divide(countIf(notEmpty(exception_stacks.type)), plus(count(), 1))" + message_penalty = f"multiply({message_penalty_weight}, minus(1.0, {has_exception_ratio}))" + else: + message_penalty = "0.0" + + weighted_score = ( + f"plus(plus(plus(plus(plus(" + f"multiply({recency_weight}, {recency}), " + f"multiply({spike_weight}, {spike})), " + f"multiply({severity_weight}, {severity})), " + f"multiply({user_impact_weight}, {user_impact})), " + f"multiply({event_volume_weight}, {event_volume})), " + f"{type_boost})" + ) + return [ - ( - f"plus(plus(plus(plus(plus(" - f"multiply({recency_weight}, {recency}), " - f"multiply({spike_weight}, {spike})), " - f"multiply({severity_weight}, {severity})), " - f"multiply({user_impact_weight}, {user_impact})), " - f"multiply({event_volume_weight}, {event_volume})), " - f"{type_boost})" - ), + f"minus({weighted_score}, {message_penalty})", "", ] diff --git a/tests/snuba/search/test_backend.py b/tests/snuba/search/test_backend.py index bd62a1e818d6..e6ca2085ac3c 100644 --- a/tests/snuba/search/test_backend.py +++ b/tests/snuba/search/test_backend.py @@ -4122,3 +4122,57 @@ def test_recommended_group_type_boost(self) -> None: scores = {gid: score for gid, score in results} assert scores[profile_group.id] > scores[error_group.id] + + def test_recommended_message_penalty(self) -> None: + base_datetime = before_now(hours=1) + + error_event = self.store_event( + data={ + "fingerprint": ["exception-group"], + "event_id": "a" * 32, + "timestamp": base_datetime.isoformat(), + "message": "real error", + "level": "error", + "exception": { + "values": [ + { + "type": "ValueError", + "value": "something broke", + "stacktrace": {"frames": [{"module": "app.main"}]}, + } + ] + }, + "tags": {"sentry:user": "user1@example.com"}, + }, + project_id=self.project.id, + ) + exception_group = error_event.group + + message_event = self.store_event( + data={ + "fingerprint": ["message-group"], + "event_id": "b" * 32, + "timestamp": base_datetime.isoformat(), + "message": "disk space low", + "level": "error", + "tags": {"sentry:user": "user2@example.com"}, + }, + project_id=self.project.id, + ) + message_group = message_event.group + + query_executor = self.backend._get_query_executor() + results = query_executor.snuba_search( + start=None, + end=None, + project_ids=[self.project.id], + environment_ids=[], + sort_field="recommended", + organization=self.organization, + group_ids=[exception_group.id, message_group.id], + limit=150, + referrer=Referrer.TESTING_TEST, + )[0] + + scores = {gid: score for gid, score in results} + assert scores[exception_group.id] > scores[message_group.id] From 2c5db34a616bda2a6cdba8c3debf052ccc7f8a7c Mon Sep 17 00:00:00 2001 From: Richard Roggenkemper Date: Mon, 1 Jun 2026 13:39:16 -0400 Subject: [PATCH 2/3] fix(search): Use count() instead of count()+1 in message penalty ratio The +1 in the denominator prevented the has_exception_ratio from reaching 1.0 for exception-only groups, causing them to receive a small residual penalty instead of zero. Co-Authored-By: Claude --- src/sentry/search/snuba/executors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/search/snuba/executors.py b/src/sentry/search/snuba/executors.py index e2090925e252..ada9cd8f6682 100644 --- a/src/sentry/search/snuba/executors.py +++ b/src/sentry/search/snuba/executors.py @@ -817,7 +817,7 @@ def _recommended_aggregation( # Only applies to Events dataset — issue-platform occurrences don't have exception_stacks if type_column is None: message_penalty_weight = options.get("snuba.search.recommended.message-penalty-weight") - has_exception_ratio = "divide(countIf(notEmpty(exception_stacks.type)), plus(count(), 1))" + has_exception_ratio = "divide(countIf(notEmpty(exception_stacks.type)), count())" message_penalty = f"multiply({message_penalty_weight}, minus(1.0, {has_exception_ratio}))" else: message_penalty = "0.0" From 1a9738a72622ca8dbc04e2105efcc2599f1cc378 Mon Sep 17 00:00:00 2001 From: Richard Roggenkemper Date: Wed, 3 Jun 2026 15:18:28 -0400 Subject: [PATCH 3/3] ref(search): Read message-penalty weight alongside other factor weights Co-Authored-By: Claude --- src/sentry/search/snuba/executors.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sentry/search/snuba/executors.py b/src/sentry/search/snuba/executors.py index 269a1deb2c55..1a12bdcb2038 100644 --- a/src/sentry/search/snuba/executors.py +++ b/src/sentry/search/snuba/executors.py @@ -805,6 +805,11 @@ def _recommended_aggregation( # Group type boost: additive signal per issue type group_type_boosts = options.get("snuba.search.recommended.group-type-boost") + # Message penalty: downranks capture_message issues (no exception/stacktrace). + # Subtracted from the score below, and only on the events dataset -- issue-platform + # occurrences don't have exception_stacks. + message_penalty_weight = options.get("snuba.search.recommended.message-penalty-weight") + # Skip zero-weighted factors: their term is always 0, so computing them in # ClickHouse is wasted work -- especially expensive aggregates like user # impact's uniq(tags[sentry:user]). @@ -838,9 +843,6 @@ def _recommended_aggregation( for term in terms[1:]: score_expr = f"plus({score_expr}, {term})" - # Message penalty: downrank capture_message issues (no exception/stacktrace). - # Only the events dataset has exception_stacks; issue-platform occurrences don't. - message_penalty_weight = options.get("snuba.search.recommended.message-penalty-weight") if type_column is None and message_penalty_weight: has_exception_ratio = "divide(countIf(notEmpty(exception_stacks.type)), count())" message_penalty = f"multiply({message_penalty_weight}, minus(1.0, {has_exception_ratio}))"