From 2a49452e45b1df8aa336a96d860b7dd80e881a81 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 16 Jun 2026 15:28:53 -0700 Subject: [PATCH 1/5] ref(seer): Type the bug-prediction-stats RPC responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the `dict[str, Any]` return annotations on three seer RPC methods that emit attribute / tag distribution data for the bug-prediction agent: - `get_baseline_tag_distribution` → `BaselineTagDistributionResponse` with typed `BaselineTagDistributionEntry` items (tag_key, tag_value, count) - `get_comparative_attribute_distributions` → `ComparativeAttributeDistributionsResponse` with the five named fields the spans frequency-stats endpoint produces (baseline_distribution, total_baseline, outliers_distribution, total_outliers, outliers_function_value) - `get_issues_stats` → `IssuesStatsResponse | None` — a `__root__`-based list passthrough since the issues-stats API shape is wider than the documented `id/count/userCount/firstSeen/lastSeen/stats/lifetime` contract Wire-identical across all paths. --- src/sentry/seer/agent/tools.py | 31 ++++---- .../seer/assisted_query/issues_tools.py | 7 +- src/sentry/seer/sentry_data_models.py | 79 +++++++++++++++++++ .../seer/assisted_query/test_issues_tools.py | 10 +-- 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/src/sentry/seer/agent/tools.py b/src/sentry/seer/agent/tools.py index c9f0a2eaae9d..e5818d2c38fd 100644 --- a/src/sentry/seer/agent/tools.py +++ b/src/sentry/seer/agent/tools.py @@ -59,6 +59,9 @@ from sentry.seer.autofix.autofix import get_all_tags_overview from sentry.seer.seer_setup import get_supported_scm_providers from sentry.seer.sentry_data_models import ( + BaselineTagDistributionEntry, + BaselineTagDistributionResponse, + ComparativeAttributeDistributionsResponse, EAPTrace, EmptyResponse, EventDetailsResponse, @@ -2123,7 +2126,7 @@ def get_baseline_tag_distribution( stats_period: str | None = None, start: str | None = None, end: str | None = None, -) -> dict[str, Any] | None: +) -> BaselineTagDistributionResponse: """ Get baseline tag distribution for suspect attributes analysis. @@ -2158,7 +2161,7 @@ def get_baseline_tag_distribution( ) if not tag_keys: - return {"baseline_tag_distribution": []} + return BaselineTagDistributionResponse(baseline_tag_distribution=[]) # Use first/last seen if date params are not provided. start_dt, end_dt = get_group_date_range(group, organization, start_dt, end_dt) @@ -2226,15 +2229,11 @@ def get_baseline_tag_distribution( combined_counts[key] = combined_counts.get(key, 0) + result["count"] baseline_distribution = [ - { - "tag_key": tag_key, - "tag_value": tag_value, - "count": count, - } + BaselineTagDistributionEntry(tag_key=tag_key, tag_value=tag_value, count=count) for (tag_key, tag_value), count in combined_counts.items() ] - return {"baseline_tag_distribution": baseline_distribution} + return BaselineTagDistributionResponse(baseline_tag_distribution=baseline_distribution) def get_comparative_attribute_distributions( @@ -2251,7 +2250,7 @@ def get_comparative_attribute_distributions( project_ids: list[int] | None = None, project_slugs: list[str] | None = None, sampling_mode: SAMPLING_MODES = "NORMAL", -) -> dict[str, Any] | None: +) -> ComparativeAttributeDistributionsResponse: """ Fetch span attribute distributions for a selected time range (minute precision) compared to a baseline (defined by start/end/stats_period params). The selected range should be smaller and within the larger range. This is not validated. @@ -2320,13 +2319,13 @@ def get_comparative_attribute_distributions( query_2=query_2, ) - return { - "baseline_distribution": distributions_result["cohort_2_distribution"], - "total_baseline": distributions_result["total_cohort_2"], - "outliers_distribution": distributions_result["cohort_1_distribution"], - "total_outliers": distributions_result["total_cohort_1"], - "outliers_function_value": distributions_result["cohort_1_function_value"], - } + return ComparativeAttributeDistributionsResponse( + baseline_distribution=distributions_result["cohort_2_distribution"], + total_baseline=distributions_result["total_cohort_2"], + outliers_distribution=distributions_result["cohort_1_distribution"], + total_outliers=distributions_result["total_cohort_1"], + outliers_function_value=distributions_result["cohort_1_function_value"], + ) def get_dsn( diff --git a/src/sentry/seer/assisted_query/issues_tools.py b/src/sentry/seer/assisted_query/issues_tools.py index 4856b53d01ee..34c47e3ca1ed 100644 --- a/src/sentry/seer/assisted_query/issues_tools.py +++ b/src/sentry/seer/assisted_query/issues_tools.py @@ -14,6 +14,7 @@ FilterKeyValuesResponse, IssueFilterBuiltInField, IssueFilterKeysResponse, + IssuesStatsResponse, TagFilterKeyValue, ) from sentry.snuba.dataset import Dataset @@ -738,7 +739,7 @@ def get_issues_stats( stats_period: str | None = None, start: str | None = None, end: str | None = None, -) -> list[dict[str, Any]] | None: +) -> IssuesStatsResponse | None: """ Get stats for specific issues by calling the issues-stats endpoint. @@ -765,7 +766,7 @@ def get_issues_stats( return None if not issue_ids: - return [] + return IssuesStatsResponse(__root__=[]) api_key = ApiKey(organization_id=organization.id, scope_list=API_KEY_SCOPES) @@ -788,4 +789,4 @@ def get_issues_stats( params=params, ) - return resp.data + return IssuesStatsResponse(__root__=resp.data) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index 092bf955061e..95331ec0fa35 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -435,6 +435,7 @@ def __contains__(self, key: object) -> bool: def __getitem__(self, key: str) -> Any: return self.dict()[key] +<<<<<<< HEAD class _DictProxyMixin(BaseModel): @@ -563,3 +564,81 @@ class UpdatePrMetricsErrorResponse(BaseModel): success: Literal[False] = False error: str + + +class BaselineTagDistributionEntry(BaseModel): + tag_key: str + tag_value: str + count: int + + # Inline dict-proxy: lets test sites and seer callers read entries with + # `entry["tag_key"]` until they're migrated to attribute access. + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class BaselineTagDistributionResponse(BaseModel): + """`get_baseline_tag_distribution` returns + `{"baseline_tag_distribution": [{tag_key, tag_value, count}, ...]}`.""" + + baseline_tag_distribution: list[BaselineTagDistributionEntry] + + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class ComparativeAttributeDistributionsResponse(BaseModel): + """`get_comparative_attribute_distributions` returns a baseline vs outliers + pair of attribute-value distributions. Each distribution is a list of + `(attribute_name, label, value)` triples passed through from the spans + frequency-stats endpoint (`query_attribute_distributions`).""" + + baseline_distribution: list[tuple[str, str, float]] + total_baseline: int + outliers_distribution: list[tuple[str, str, float]] + total_outliers: int + outliers_function_value: float | None + + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class IssuesStatsResponse(BaseModel): + """`get_issues_stats` returns the issues-stats API response verbatim — a list + of dicts with `id, count, userCount, firstSeen, lastSeen, stats, lifetime`. + Items are passed through since the issues-stats shape is wider than the + documented contract and the seer caller treats it as a record stream.""" + + __root__: list[dict[str, Any]] + + def dict(self, **kwargs: Any) -> Any: + # Unwrap to the bare list the dispatcher previously returned. + return list(self.__root__) + + # List-like proxy so callers can treat the response like the list it + # serializes to. + def __iter__(self) -> Any: + return iter(self.__root__) + + def __len__(self) -> int: + return len(self.__root__) + + def __getitem__(self, idx: int) -> Any: + return self.__root__[idx] + + def __eq__(self, other: object) -> bool: + if isinstance(other, list): + return list(self.__root__) == other + return super().__eq__(other) + + def __hash__(self) -> int: + return id(self) diff --git a/tests/sentry/seer/assisted_query/test_issues_tools.py b/tests/sentry/seer/assisted_query/test_issues_tools.py index e1f1033cb8c5..46dff98e8d77 100644 --- a/tests/sentry/seer/assisted_query/test_issues_tools.py +++ b/tests/sentry/seer/assisted_query/test_issues_tools.py @@ -14,6 +14,7 @@ from sentry.seer.sentry_data_models import ( FilterKeyValuesResponse, IssueFilterKeysResponse, + IssuesStatsResponse, ) from sentry.testutils.cases import APITestCase, SnubaTestCase from sentry.testutils.helpers.datetime import before_now @@ -717,8 +718,7 @@ def test_get_issues_stats_success(self) -> None: stats_period="24h", ) - assert result is not None - assert isinstance(result, (list, FilterKeyValuesResponse)) + assert isinstance(result, IssuesStatsResponse) assert len(result) == 2 # Verify each stat has the expected fields @@ -797,8 +797,7 @@ def test_get_issues_stats_with_multiple_projects(self) -> None: stats_period="24h", ) - assert result is not None - assert isinstance(result, (list, FilterKeyValuesResponse)) + assert isinstance(result, IssuesStatsResponse) # Should return stats for both issues assert len(result) >= 2 returned_issue_ids = {stat["id"] for stat in result} @@ -827,8 +826,7 @@ def test_get_issues_stats_empty_issue_ids(self) -> None: stats_period="24h", ) - assert result is not None - assert isinstance(result, (list, FilterKeyValuesResponse)) + assert isinstance(result, IssuesStatsResponse) assert len(result) == 0 def test_get_issues_stats_stats_and_lifetime_structure(self) -> None: From 2d20f27b728837dd765639ad75440c8a75eadeb7 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 16 Jun 2026 15:46:41 -0700 Subject: [PATCH 2/5] ref(seer): Type the replay/profile/export/error-event RPC responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the `dict[str, Any]` / TypedDict / un-annotated return types on five seer RPC methods with explicit Pydantic models: - `export_explorer_indexes` → `AgentExportIndexesResponse` — migrated from a TypedDict (`org_id, version, tables`) so the typed-registry contract holds - `get_replay_metadata` → `ReplayMetadataResponse | None` — `__root__` dict passthrough; the aggregate replay-event shape lives in the replays UI's `ReplayDetailsResponse` and is wider than what sentry-side can lock down - `rpc_get_profile_flamegraph` → `ProfileFlamegraphSuccessResponse | ProfileFlamegraphErrorResponse` discriminated by the presence of `execution_tree`/`metadata` vs `error` - `rpc_get_replay_summary_logs` → `ReplaySummaryLogsResponse` (`{"logs": [str, ...]}`) - `get_error_event_details` → `ErrorEventDetailsResponse | None` — `__root__` dict passthrough; the bare `EventSerializer` shape (a `SentryEventData`-ish dict the seer caller casts to its own model) is too wide for sentry to fix here without a coordinated update Wire-identical across all paths. The non-passthrough response models carry a small `__getitem__` / `__contains__` proxy so existing test sites and seer callers can read fields with `result["x"]` / `"x" in result` until they're migrated to attribute access. --- src/sentry/replays/usecases/summarize.py | 7 +- src/sentry/seer/agent/snapshot_indexes.py | 10 +-- src/sentry/seer/agent/tools.py | 48 ++++++----- src/sentry/seer/autofix/autofix_tools.py | 6 +- src/sentry/seer/sentry_data_models.py | 97 ++++++++++++++++++++++- 5 files changed, 133 insertions(+), 35 deletions(-) diff --git a/src/sentry/replays/usecases/summarize.py b/src/sentry/replays/usecases/summarize.py index 0047ee0a0dba..148fb9c81ccd 100644 --- a/src/sentry/replays/usecases/summarize.py +++ b/src/sentry/replays/usecases/summarize.py @@ -23,6 +23,7 @@ ) from sentry.replays.usecases.reader import fetch_segments_metadata, iter_segment_data from sentry.search.events.types import SnubaParams +from sentry.seer.sentry_data_models import ReplaySummaryLogsResponse from sentry.services.eventstore.models import Event from sentry.snuba.referrer import Referrer from sentry.utils import json, metrics @@ -519,7 +520,7 @@ def rpc_get_replay_summary_logs( project_id: int, replay_id: str, num_segments: int, -) -> dict[str, Any]: +) -> ReplaySummaryLogsResponse: """ RPC call for Seer. Downloads a replay's segment data, queries associated errors, and parses this into summary logs. """ @@ -546,7 +547,7 @@ def rpc_get_replay_summary_logs( # 404s should be handled in the originating Sentry endpoint. # If the replay is missing here just return an empty response. if not processed_response: - return {"logs": []} + return ReplaySummaryLogsResponse(logs=[]) error_ids = processed_response[0].get("error_ids", []) trace_ids = processed_response[0].get("trace_ids", []) @@ -611,4 +612,4 @@ def rpc_get_replay_summary_logs( is_mobile_replay=is_mobile_replay, replay_start=replay_start, ) - return {"logs": logs} + return ReplaySummaryLogsResponse(logs=logs) diff --git a/src/sentry/seer/agent/snapshot_indexes.py b/src/sentry/seer/agent/snapshot_indexes.py index 7350163592a7..e9eb54179b5d 100644 --- a/src/sentry/seer/agent/snapshot_indexes.py +++ b/src/sentry/seer/agent/snapshot_indexes.py @@ -1,7 +1,7 @@ import logging -from typing import Any, TypedDict from sentry.seer.models import SeerApiError +from sentry.seer.sentry_data_models import AgentExportIndexesResponse from sentry.seer.signed_seer_api import ( AgentExportIndexesRequest, SeerViewerContext, @@ -12,12 +12,6 @@ logger = logging.getLogger(__name__) -class AgentExportIndexesResponse(TypedDict): - org_id: int - version: int - tables: dict[str, list[dict[str, Any]]] - - def export_agent_indexes(*, org_id: int) -> AgentExportIndexesResponse: """Export all explorer index rows for an org from Seer's database. @@ -31,7 +25,7 @@ def export_agent_indexes(*, org_id: int) -> AgentExportIndexesResponse: raise SeerApiError("Seer export-indexes request failed", response.status) try: - return response.json() + return AgentExportIndexesResponse(**response.json()) except JSONDecodeError: logger.exception("Failed to parse Seer export-indexes response") raise SeerApiError("Seer returned invalid JSON response", response.status) diff --git a/src/sentry/seer/agent/tools.py b/src/sentry/seer/agent/tools.py index e5818d2c38fd..f44a575a8cb7 100644 --- a/src/sentry/seer/agent/tools.py +++ b/src/sentry/seer/agent/tools.py @@ -70,6 +70,10 @@ GetDsnResponse, IssueAndEventDetailsResponse, IssueDetailsResponse, + ProfileFlamegraphErrorResponse, + ProfileFlamegraphMetadata, + ProfileFlamegraphSuccessResponse, + ReplayMetadataResponse, RepositoryDefinitionResponse, TraceItemAttributesResponse, TraceItemEventsResponse, @@ -657,7 +661,7 @@ def rpc_get_profile_flamegraph( organization_id: int, trace_id: str | None = None, span_description: str | None = None, -) -> dict[str, Any]: +) -> ProfileFlamegraphSuccessResponse | ProfileFlamegraphErrorResponse: """ Fetch and format a profile flamegraph by profile ID (8-char or full 32-char). @@ -686,7 +690,7 @@ def rpc_get_profile_flamegraph( "rpc_get_profile_flamegraph: Organization not found", extra={"organization_id": organization_id}, ) - return {"error": "Organization not found"} + return ProfileFlamegraphErrorResponse(error="Organization not found") # Get all projects for the organization projects = list(Project.objects.filter(organization=organization, status=ObjectStatus.ACTIVE)) @@ -696,7 +700,7 @@ def rpc_get_profile_flamegraph( "rpc_get_profile_flamegraph: No projects found for organization", extra={"organization_id": organization_id}, ) - return {"error": "No projects found for organization"} + return ProfileFlamegraphErrorResponse(error="No projects found for organization") # Search up to 90 days back using 14-day sliding windows now = datetime.now(UTC) @@ -797,13 +801,13 @@ def rpc_get_profile_flamegraph( "rpc_get_profile_flamegraph: Profile not found", extra={"profile_id": profile_id, "organization_id": organization_id}, ) - return {"error": "Profile not found in the last 90 days"} + return ProfileFlamegraphErrorResponse(error="Profile not found in the last 90 days") if not project_id: logger.warning( "rpc_get_profile_flamegraph: Could not find project id for profile", extra={"profile_id": profile_id, "organization_id": organization_id}, ) - return {"error": "Project not found"} + return ProfileFlamegraphErrorResponse(error="Project not found") logger.info( "rpc_get_profile_flamegraph: Found profile", @@ -831,7 +835,9 @@ def rpc_get_profile_flamegraph( "rpc_get_profile_flamegraph: Failed to fetch profile data from profiling service", extra={"profile_id": actual_profile_id, "project_id": project_id}, ) - return {"error": "Failed to fetch profile data from profiling service"} + return ProfileFlamegraphErrorResponse( + error="Failed to fetch profile data from profiling service" + ) # Convert to execution tree (returns dicts, not Pydantic models) execution_tree, selected_thread_id = _convert_profile_to_execution_tree(profile_data) @@ -845,19 +851,21 @@ def rpc_get_profile_flamegraph( "raw_profile_data": profile_data, }, ) - return {"error": "Failed to generate execution tree from profile data"} + return ProfileFlamegraphErrorResponse( + error="Failed to generate execution tree from profile data" + ) - return { - "execution_tree": execution_tree, - "metadata": { - "profile_id": actual_profile_id, - "project_id": project_id, - "is_continuous": is_continuous, - "start_ts": min_start_ts, - "end_ts": max_end_ts, - "thread_id": selected_thread_id, - }, - } + return ProfileFlamegraphSuccessResponse( + execution_tree=execution_tree, + metadata=ProfileFlamegraphMetadata( + profile_id=actual_profile_id, + project_id=project_id, + is_continuous=is_continuous, + start_ts=min_start_ts, + end_ts=max_end_ts, + thread_id=selected_thread_id, + ), + ) def get_repository_definition( @@ -1686,7 +1694,7 @@ def get_replay_metadata( replay_id: str, organization_id: int, project_slug: str | None = None, -) -> dict[str, Any] | None: +) -> ReplayMetadataResponse | None: """ Get the metadata for a replay through an aggregate replay event query. @@ -1783,7 +1791,7 @@ def get_replay_metadata( result["project_slug"] = next( filter(lambda x: x[0] == int(result["project_id"]), p_ids_and_slugs) )[1] - return result + return ReplayMetadataResponse(__root__=result) def get_trace_item_attributes( diff --git a/src/sentry/seer/autofix/autofix_tools.py b/src/sentry/seer/autofix/autofix_tools.py index 33bfbe692ae9..31c8be43d694 100644 --- a/src/sentry/seer/autofix/autofix_tools.py +++ b/src/sentry/seer/autofix/autofix_tools.py @@ -1,6 +1,6 @@ from sentry.api.serializers import EventSerializer, serialize from sentry.seer.agent.utils import _convert_profile_to_execution_tree, fetch_profile_data -from sentry.seer.sentry_data_models import ProfileDetailsResponse +from sentry.seer.sentry_data_models import ErrorEventDetailsResponse, ProfileDetailsResponse from sentry.services import eventstore @@ -29,10 +29,10 @@ def get_profile_details( return ProfileDetailsResponse(execution_tree=execution_tree) -def get_error_event_details(project_id: int, event_id: str): +def get_error_event_details(project_id: int, event_id: str) -> ErrorEventDetailsResponse | None: event = eventstore.backend.get_event_by_id(project_id, event_id) if not event: return None serialized_event = serialize(objects=event, user=None, serializer=EventSerializer()) - return serialized_event + return ErrorEventDetailsResponse(__root__=serialized_event) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index 95331ec0fa35..7a37669d0b24 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -435,7 +435,6 @@ def __contains__(self, key: object) -> bool: def __getitem__(self, key: str) -> Any: return self.dict()[key] -<<<<<<< HEAD class _DictProxyMixin(BaseModel): @@ -612,6 +611,102 @@ def __contains__(self, key: object) -> bool: return key in self.dict() +class AgentExportIndexesResponse(BaseModel): + """`export_explorer_indexes` returns the seer-side export of the explorer + index tables: `{"org_id", "version", "tables"}` where `tables` is a map of + table name → list of rows. Migrated from a TypedDict shape so the seer SDK + consumer sees the contract through the typed registry.""" + + org_id: int + version: int + tables: dict[str, list[dict[str, Any]]] + + +class ReplayMetadataResponse(BaseModel): + """`get_replay_metadata` returns the aggregate replay-event response dict + plus an added `project_slug` field. The replay-events shape is the + `ReplayDetailsResponse` typedict-ish from the replays UI — wider than what + sentry-side can lock down — so the body is a dict passthrough.""" + + __root__: dict[str, Any] + + def dict(self, **kwargs: Any) -> Any: + return dict(self.__root__) + + def __getitem__(self, key: str) -> Any: + return self.__root__[key] + + def __contains__(self, key: object) -> bool: + return key in self.__root__ + + +class ProfileFlamegraphMetadata(BaseModel): + profile_id: str + project_id: int + is_continuous: bool + start_ts: int | None + end_ts: int | None + thread_id: int | None + + +class ProfileFlamegraphSuccessResponse(BaseModel): + """`rpc_get_profile_flamegraph` success: `{"execution_tree", "metadata"}`. + `execution_tree` items are dicts (not Pydantic models — the converter at + `_convert_profile_to_execution_tree` returns dicts) so they pass through.""" + + execution_tree: list[dict[str, Any]] + metadata: ProfileFlamegraphMetadata + + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class ProfileFlamegraphErrorResponse(BaseModel): + """`rpc_get_profile_flamegraph` error: `{"error": }`. Discriminated + against the success shape by the presence of `error` vs `execution_tree`.""" + + error: str + + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class ReplaySummaryLogsResponse(BaseModel): + """`rpc_get_replay_summary_logs` returns `{"logs": [, ...]}`.""" + + logs: list[str] + + def __getitem__(self, key: str) -> Any: + return self.dict()[key] + + def __contains__(self, key: object) -> bool: + return key in self.dict() + + +class ErrorEventDetailsResponse(BaseModel): + """`get_error_event_details` returns the bare `EventSerializer` output — + a `SentryEventData`-shaped dict the seer caller casts to its own typed + model. The shape is too wide for sentry-side to lock down here, so the + body is a dict passthrough.""" + + __root__: dict[str, Any] + + def dict(self, **kwargs: Any) -> Any: + return dict(self.__root__) + + def __getitem__(self, key: str) -> Any: + return self.__root__[key] + + def __contains__(self, key: object) -> bool: + return key in self.__root__ + + class IssuesStatsResponse(BaseModel): """`get_issues_stats` returns the issues-stats API response verbatim — a list of dicts with `id, count, userCount, firstSeen, lastSeen, stats, lifetime`. From 113d9e6f97ef5aea5770097961aa6f5b25dd0533 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 16 Jun 2026 15:59:23 -0700 Subject: [PATCH 3/5] fix(seer): Convert ReplayMetadataResponse before re-parsing in tests The replay-metadata tests parse the returned value through a private `_ReplayMetadataResponse` schema as a contract check, but `get_replay_metadata` now returns the typed `ReplayMetadataResponse` (a `__root__` model) rather than a bare dict. `BaseModel.parse_obj` rejects a model instance, so pass `result.dict()` to round-trip through the wire shape. --- tests/sentry/seer/agent/test_tools.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/sentry/seer/agent/test_tools.py b/tests/sentry/seer/agent/test_tools.py index 387348fe7a6d..181f58dbacf6 100644 --- a/tests/sentry/seer/agent/test_tools.py +++ b/tests/sentry/seer/agent/test_tools.py @@ -3128,7 +3128,7 @@ def test_get_replay_metadata_full_id(self) -> None: assert result["id"] == replay1_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # With dashes result = get_replay_metadata( @@ -3140,7 +3140,7 @@ def test_get_replay_metadata_full_id(self) -> None: assert result["id"] == replay1_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # Invalid result = get_replay_metadata( @@ -3160,7 +3160,7 @@ def test_get_replay_metadata_full_id(self) -> None: assert result["id"] == replay2_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # No project slug result = get_replay_metadata( @@ -3171,7 +3171,7 @@ def test_get_replay_metadata_full_id(self) -> None: assert result["id"] == replay1_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # Different project slug result = get_replay_metadata( @@ -3214,7 +3214,7 @@ def test_get_replay_metadata_short_id(self) -> None: assert result["id"] == replay1_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # Replay 2 result = get_replay_metadata( @@ -3225,7 +3225,7 @@ def test_get_replay_metadata_short_id(self) -> None: assert result["id"] == replay2_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # Upper (supported but not expected) result = get_replay_metadata( @@ -3236,7 +3236,7 @@ def test_get_replay_metadata_short_id(self) -> None: assert result["id"] == replay1_id assert result["project_id"] == str(self.project.id) assert result["project_slug"] == self.project.slug - self._ReplayMetadataResponse.parse_obj(result) + self._ReplayMetadataResponse.parse_obj(result.dict()) # Short ID < 8 characters or not hex - returns None assert ( From f1d71fd685076bb368fda3300d7b5e59eed0b40b Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 16 Jun 2026 16:11:01 -0700 Subject: [PATCH 4/5] fix(seer): Wrap ValidationError on the export-indexes response `AgentExportIndexesResponse(**response.json())` raises pydantic's `ValidationError` when the seer response is valid JSON but missing or mistyped against the declared schema. The pre-existing handler only caught `JSONDecodeError`, so a schema mismatch would surface as an uncaught 500 instead of a `SeerApiError`. Add the missing arm so the caller sees the same `SeerApiError` shape it gets for malformed JSON. --- src/sentry/seer/agent/snapshot_indexes.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/sentry/seer/agent/snapshot_indexes.py b/src/sentry/seer/agent/snapshot_indexes.py index e9eb54179b5d..27cdb5a4adf8 100644 --- a/src/sentry/seer/agent/snapshot_indexes.py +++ b/src/sentry/seer/agent/snapshot_indexes.py @@ -1,5 +1,7 @@ import logging +from pydantic import ValidationError + from sentry.seer.models import SeerApiError from sentry.seer.sentry_data_models import AgentExportIndexesResponse from sentry.seer.signed_seer_api import ( @@ -29,3 +31,9 @@ def export_agent_indexes(*, org_id: int) -> AgentExportIndexesResponse: except JSONDecodeError: logger.exception("Failed to parse Seer export-indexes response") raise SeerApiError("Seer returned invalid JSON response", response.status) + except ValidationError: + logger.exception("Seer export-indexes response failed schema validation") + raise SeerApiError( + "Seer returned a response that did not match the export-indexes schema", + response.status, + ) From 76e22746e631f17ff0836dc6bb0941813a63a9d8 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 16 Jun 2026 16:58:49 -0700 Subject: [PATCH 5/5] fix(seer): Preserve float timestamps and string thread id on flamegraph `ProfileFlamegraphMetadata.start_ts`/`end_ts` come from float-valued `min(precise.start_ts)` / `max(precise.finish_ts)` aggregates and `thread_id` is the dict key from `_convert_profile_to_execution_tree`'s `dict[str, int]` count map (always a string). Typing them as `int | None` made Pydantic v1 silently truncate the fractional seconds and risk rejecting non-numeric thread ids. Type as `float | None` / `str | None` to match the upstream values. --- src/sentry/seer/sentry_data_models.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index 7a37669d0b24..2491d6639f36 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -644,9 +644,14 @@ class ProfileFlamegraphMetadata(BaseModel): profile_id: str project_id: int is_continuous: bool - start_ts: int | None - end_ts: int | None - thread_id: int | None + # `start_ts`/`end_ts` are float seconds from `min(precise.start_ts)` / + # `max(precise.finish_ts)` aggregates — Pydantic v1 truncates `float → int` + # silently, so type as float to preserve sub-second precision on the wire. + start_ts: float | None + end_ts: float | None + # `selected_thread_id` is the dict key from a `dict[str, int]` count map in + # `_convert_profile_to_execution_tree` — always a string. + thread_id: str | None class ProfileFlamegraphSuccessResponse(BaseModel):