diff --git a/src/sentry/seer/agent/client_utils.py b/src/sentry/seer/agent/client_utils.py index a136f86a1117d9..bdd3e64abd97a1 100644 --- a/src/sentry/seer/agent/client_utils.py +++ b/src/sentry/seer/agent/client_utils.py @@ -8,6 +8,7 @@ from __future__ import annotations import logging +import re import time from datetime import datetime from typing import Any, NotRequired, TypedDict @@ -392,6 +393,26 @@ def poll_until_done( time.sleep(poll_interval) +_WILDCARD_LABEL_MAP = { + "\uf00dDoesNotContain\uf00d": " does not contain ", + "\uf00dDoesNotStartWith\uf00d": " does not start with ", + "\uf00dDoesNotEndWith\uf00d": " does not end with ", + "\uf00dContains\uf00d": " contains ", + "\uf00dStartsWith\uf00d": " starts with ", + "\uf00dEndsWith\uf00d": " ends with ", +} + +_ESCAPED_WILDCARD_RE = re.compile(r"\\uf00d", re.IGNORECASE) + + +def _normalize_wildcard_operators(text: str) -> str: + """Replace U+F00D-delimited wildcard operators with readable labels.""" + text = _ESCAPED_WILDCARD_RE.sub("\uf00d", text) + for pattern, label in _WILDCARD_LABEL_MAP.items(): + text = text.replace(pattern, label) + return text + + def _render_node(node: dict[str, Any], depth: int) -> str: """Recursively render an LLMContextSnapshot node and its children as markdown.""" heading = "#" * min(depth + 1, 6) @@ -437,4 +458,5 @@ def snapshot_to_markdown(snapshot: dict[str, Any]) -> str: preamble = ( "> This is a structured summary of the page the user is viewing, not an exact screenshot.\n" ) - return preamble + "\n".join(_render_node(node, 0) for node in selected) + result = preamble + "\n".join(_render_node(node, 0) for node in selected) + return _normalize_wildcard_operators(result) diff --git a/tests/sentry/seer/agent/test_client_utils.py b/tests/sentry/seer/agent/test_client_utils.py index f7919d58ca4f44..379c21ba0420f6 100644 --- a/tests/sentry/seer/agent/test_client_utils.py +++ b/tests/sentry/seer/agent/test_client_utils.py @@ -3,6 +3,7 @@ from sentry.models.organizationmember import OrganizationMember from sentry.seer.agent.client_utils import ( + _normalize_wildcard_operators, collect_user_org_context, get_proxy_headers, has_seer_agent_access_with_detail, @@ -372,6 +373,61 @@ def test_node_with_non_dict_data(self) -> None: assert "# Widget" in result assert '- "some string"' in result + def test_wildcard_operators_normalized_in_output(self) -> None: + snapshot = { + "version": 1, + "nodes": [ + { + "nodeType": "explorer", + "data": { + "searchQuery": ( + "span.name:\uf00dStartsWith\uf00d/api" + " title:\uf00dDoesNotContain\uf00dtest" + ) + }, + "children": [], + } + ], + } + result = snapshot_to_markdown(snapshot) + assert "\uf00d" not in result + assert "starts with" in result + assert "does not contain" in result + + +class NormalizeWildcardOperatorsTest(TestCase): + W = "\uf00d" + + def test_all_operators(self) -> None: + W = self.W + cases = [ + (f"f:{W}Contains{W}x", "f: contains x"), + (f"f:{W}DoesNotContain{W}x", "f: does not contain x"), + (f"f:{W}StartsWith{W}x", "f: starts with x"), + (f"f:{W}DoesNotStartWith{W}x", "f: does not start with x"), + (f"f:{W}EndsWith{W}x", "f: ends with x"), + (f"f:{W}DoesNotEndWith{W}x", "f: does not end with x"), + ] + for input_text, expected in cases: + assert _normalize_wildcard_operators(input_text) == expected + + def test_escaped_and_mixed_sequences(self) -> None: + assert _normalize_wildcard_operators("f:\\uf00dContains\\uf00dx") == "f: contains x" + assert _normalize_wildcard_operators("f:\\uF00DContains\\uF00Dx") == "f: contains x" + assert _normalize_wildcard_operators("f:\uf00dContains\\uf00dx") == "f: contains x" + + def test_passthrough(self) -> None: + assert _normalize_wildcard_operators("") == "" + assert ( + _normalize_wildcard_operators("browser:Firefox count():>100") + == "browser:Firefox count():>100" + ) + + def test_multiple_operators(self) -> None: + W = self.W + text = f"a:{W}Contains{W}foo b:{W}EndsWith{W}.js" + assert _normalize_wildcard_operators(text) == "a: contains foo b: ends with .js" + _TEST_SECRET = "test-secret-must-be-at-least-32-bytes-long-for-hs256"