-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
feat(preprod): Add build distribution CSV export endpoint #117539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
d606274
feat(preprod): Add build distribution CSV export endpoint
jamieQ 4aac457
pr feedback
jamieQ cf82ade
feat(preprod): Add CSV export columns, exclude non-installable builds
jamieQ 03552b5
fix(preprod): Harden CSV escaping, scope export to distribution
jamieQ add8847
make comments more concise
jamieQ ecb8df6
:hammer_and_wrench: Sync API Urls to TypeScript
getsantry[bot] fe06fdd
typo fix
jamieQ File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
|
|
||
| from django.http.response import HttpResponseBase | ||
| from django.utils import timezone | ||
| from rest_framework import serializers | ||
| from rest_framework.request import Request | ||
|
|
||
| from sentry.api.api_owners import ApiOwner | ||
| from sentry.api.api_publish_status import ApiPublishStatus | ||
| from sentry.api.base import cell_silo_endpoint | ||
| from sentry.api.bases.organization import NoProjects, OrganizationEndpoint | ||
| from sentry.api.utils import handle_query_errors | ||
| from sentry.models.organization import Organization | ||
| from sentry.preprod.build_distribution_utils import is_installable_artifact | ||
| from sentry.preprod.builds_query import filtered_builds_queryset | ||
| from sentry.preprod.models import PreprodArtifact | ||
| from sentry.ratelimits.config import RateLimitConfig | ||
| from sentry.types.ratelimit import RateLimit, RateLimitCategory | ||
| from sentry.utils import json | ||
| from sentry.web.frontend.csv import CsvResponder | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| CSV_EXPORT_ROW_LIMIT = 10_000 | ||
|
|
||
| _FORMULA_PREFIXES = ("=", "+", "-", "@") | ||
|
|
||
|
|
||
| def _escape_csv_value(value: object) -> str: | ||
| """Stringify a value, neutralizing spreadsheet formula injection.""" | ||
| if value is None: | ||
| return "" | ||
| text = str(value) | ||
| stripped = text.lstrip() | ||
| if stripped and stripped[0] in _FORMULA_PREFIXES: | ||
| return "'" + text | ||
| return text | ||
|
|
||
|
|
||
| class BuildsCsvResponder(CsvResponder[PreprodArtifact]): | ||
| def get_header(self) -> tuple[str, ...]: | ||
| return ( | ||
| "app_name", | ||
| "project_slug", | ||
| "artifact_id", | ||
| "app_id", | ||
| "build_configuration", | ||
| "version", | ||
| "platform", | ||
| "install_groups", | ||
| "upload_date", | ||
| "download_count", | ||
| ) | ||
|
|
||
| def get_row(self, item: PreprodArtifact) -> tuple[str, ...]: | ||
| mobile_app_info = item.get_mobile_app_info() | ||
| platform = item.platform | ||
| build_configuration = item.build_configuration | ||
| download_count = getattr(item, "download_count", 0) | ||
|
|
||
| # Emit install_groups as a JSON array | ||
| raw_install_groups = (item.extras or {}).get("install_groups") | ||
| install_groups = json.dumps( | ||
| raw_install_groups if isinstance(raw_install_groups, list) else [] | ||
| ) | ||
| return ( | ||
| _escape_csv_value(mobile_app_info.app_name if mobile_app_info else None), | ||
| _escape_csv_value(item.project.slug), | ||
| _escape_csv_value(item.id), | ||
| _escape_csv_value(item.app_id), | ||
| _escape_csv_value(build_configuration.name if build_configuration else None), | ||
| _escape_csv_value(mobile_app_info.build_version if mobile_app_info else None), | ||
| _escape_csv_value(platform.value if platform else None), | ||
| install_groups, | ||
| _escape_csv_value(item.date_added.isoformat() if item.date_added else None), | ||
| _escape_csv_value(download_count), | ||
| ) | ||
|
|
||
|
|
||
| @cell_silo_endpoint | ||
| class BuildsExportEndpoint(OrganizationEndpoint): | ||
| owner = ApiOwner.EMERGE_TOOLS | ||
| publish_status = { | ||
| "GET": ApiPublishStatus.EXPERIMENTAL, | ||
| } | ||
|
|
||
| enforce_rate_limit = True | ||
| rate_limits = RateLimitConfig( | ||
| limit_overrides={ | ||
| "GET": { | ||
| RateLimitCategory.IP: RateLimit(limit=5, window=1, concurrent_limit=2), | ||
| RateLimitCategory.USER: RateLimit(limit=5, window=1, concurrent_limit=2), | ||
| RateLimitCategory.ORGANIZATION: RateLimit(limit=10, window=1, concurrent_limit=5), | ||
| } | ||
| } | ||
| ) | ||
|
|
||
| def get(self, request: Request, organization: Organization) -> HttpResponseBase: | ||
| """Stream build distribution stats for the current filters as a CSV. | ||
|
|
||
| Accepts the same ``query``, ``project``, and date-range params as the builds list | ||
| endpoint. The export is build-distribution-specific, so it always uses the | ||
| distribution row set regardless of any ``display`` param. | ||
| """ | ||
| filename = ( | ||
| f"{organization.slug}-build-distribution-{timezone.now().strftime('%Y-%m-%d-%H%M%S')}" | ||
| ) | ||
|
|
||
| try: | ||
| params = self.get_filter_params(request, organization, date_filter_optional=True) | ||
| except NoProjects: | ||
| return BuildsCsvResponder().respond(iter(()), filename) | ||
|
|
||
| query = request.GET.get("query", "").strip() | ||
|
|
||
| # We force display="distribution" because the logic is really only for build distribution info. | ||
| with handle_query_errors(): | ||
| queryset = filtered_builds_queryset( | ||
| organization=organization, | ||
| query=query, | ||
| display="distribution", | ||
| project_ids=params["project_id"], | ||
| start=params["start"], | ||
| end=params["end"], | ||
| ) | ||
|
|
||
| # Filter out non-installable builds since they aren't really relevant for distribution info. | ||
| queryset = queryset.filter(installable_app_file_id__isnull=False) | ||
|
|
||
| # Reject oversized exports rather than silently truncating. The SQL limit is conservatively | ||
| # correct, but could lead to false-negatives in some edge cases, which we're ignoring. | ||
| row_count = queryset.count() | ||
| if row_count > CSV_EXPORT_ROW_LIMIT: | ||
| raise serializers.ValidationError( | ||
| { | ||
| "detail": f"This export has {row_count} builds, which exceeds the limit of {CSV_EXPORT_ROW_LIMIT}. " | ||
| "Narrow your search or date range and try again." | ||
| } | ||
| ) | ||
|
|
||
| queryset = queryset.select_related( | ||
| "mobile_app_info", "project", "build_configuration" | ||
| ).order_by("-date_added") | ||
| installable_builds = ( | ||
| artifact for artifact in queryset.iterator() if is_installable_artifact(artifact) | ||
| ) | ||
| return BuildsCsvResponder().respond(installable_builds, filename) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from collections.abc import Sequence | ||
| from datetime import datetime | ||
|
|
||
| from sentry.models.organization import Organization | ||
| from sentry.preprod.artifact_search import queryset_for_query | ||
| from sentry.preprod.models import PreprodArtifactQuerySet | ||
| from sentry.preprod.quotas import get_size_retention_cutoff | ||
|
|
||
| # Display values that constrain the list to non-snapshot builds (size + distribution | ||
| # share the same underlying artifacts; only the columns shown differ). | ||
| _NON_SNAPSHOT_DISPLAYS = ("size", "distribution") | ||
|
|
||
|
|
||
| def filtered_builds_queryset( | ||
| *, | ||
| organization: Organization, | ||
| query: str, | ||
| display: str | None, | ||
| project_ids: Sequence[int], | ||
| start: datetime | None, | ||
| end: datetime | None, | ||
| ) -> PreprodArtifactQuerySet: | ||
| """Build the PreprodArtifact queryset shared by the builds list and CSV export endpoints. | ||
|
|
||
| Applies the search query, size-retention cutoff, optional date range, project | ||
| scoping, and display-based snapshot filtering. Keeping this in one place ensures | ||
| the CSV export returns exactly the same rows as the on-screen list. | ||
|
|
||
| Callers are responsible for adding their own ``select_related``/``prefetch_related`` | ||
| and ordering on top of the returned queryset. | ||
|
|
||
| Raises: | ||
| InvalidSearchQuery: if the query string is invalid. | ||
| """ | ||
| queryset = queryset_for_query(query, organization) | ||
| queryset = queryset.filter(date_added__gte=get_size_retention_cutoff(organization)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll follow up with a rename in another PR since it's used in a number of spots |
||
| if start: | ||
| queryset = queryset.filter(date_added__gte=start) | ||
| if end: | ||
| queryset = queryset.filter(date_added__lte=end) | ||
| queryset = queryset.filter(project_id__in=project_ids) | ||
|
|
||
| if display in _NON_SNAPSHOT_DISPLAYS: | ||
| queryset = queryset.filter(preprodsnapshotmetrics__isnull=True) | ||
| elif display == "snapshot": | ||
| queryset = queryset.filter(preprodsnapshotmetrics__isnull=False) | ||
|
|
||
| return queryset | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: The CSV export for builds will always report a
download_countof 0 because the database query is missing the necessary annotation to calculate this value.Severity: MEDIUM
Suggested Fix
Add the
.annotate_download_count()method to the queryset chain insrc/sentry/preprod/api/endpoints/builds_export.pybetween lines 143 and 145. This will ensure thedownload_countis correctly calculated and included in the exported CSV.Prompt for AI Agent
Did we get this right? 👍 / 👎 to inform future reviews.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BuildsExportEndpoint.get() builds its queryset via filtered_builds_queryset(), which traces back to the annotation:
builds_export.py:120 → filtered_builds_queryset(...)
builds_query.py:37 → queryset_for_query(query, organization)
artifact_search.py:222 → apply_filters(_base_searchable_queryset(), ...)
artifact_search.py:191-198 → _base_searchable_queryset() calls .annotate_download_count()