diff --git a/src/sentry/preprod/api/endpoints/builds.py b/src/sentry/preprod/api/endpoints/builds.py index 566371cb69073e..5f8fa8617a0beb 100644 --- a/src/sentry/preprod/api/endpoints/builds.py +++ b/src/sentry/preprod/api/endpoints/builds.py @@ -14,9 +14,8 @@ from sentry.preprod.api.models.project_preprod_build_details_models import ( transform_preprod_artifact_to_build_details, ) -from sentry.preprod.artifact_search import queryset_for_query +from sentry.preprod.builds_query import filtered_builds_queryset from sentry.preprod.models import PreprodArtifact -from sentry.preprod.quotas import get_size_retention_cutoff logger = logging.getLogger(__name__) @@ -56,42 +55,35 @@ def on_results(artifacts: list[PreprodArtifact]) -> list[dict[str, Any]]: except NoProjects: return paginate(PreprodArtifact.objects.none()) - start = params["start"] - end = params["end"] # Builds don't have environments so we ignore environments from # params on purpose. - query = request.GET.get("query", "").strip() - cutoff = get_size_retention_cutoff(organization) + display = request.GET.get("display") try: - queryset = queryset_for_query(query, organization) - queryset = queryset.select_related( - "project", - "build_configuration", - "commit_comparison", - "mobile_app_info", - "preprodsnapshotmetrics", - ).prefetch_related( - "preprodartifactsizemetrics_set", - "preprodsnapshotmetrics__snapshot_comparisons_head_metrics", - "preprodcomparisonapproval_set", + queryset = filtered_builds_queryset( + organization=organization, + query=query, + display=display, + project_ids=params["project_id"], + start=params["start"], + end=params["end"], ) - queryset = queryset.filter(date_added__gte=cutoff) - if start: - queryset = queryset.filter(date_added__gte=start) - if end: - queryset = queryset.filter(date_added__lte=end) - queryset = queryset.filter(project_id__in=params["project_id"]) - - display = request.GET.get("display") - if display in ("size", "distribution"): - queryset = queryset.filter(preprodsnapshotmetrics__isnull=True) - elif display == "snapshot": - queryset = queryset.filter(preprodsnapshotmetrics__isnull=False) except InvalidSearchQuery as e: # CodeQL complains about str(e) below but ~all handlers # of InvalidSearchQuery do the same as this. return Response({"detail": str(e)}, status=400) + queryset = queryset.select_related( + "project", + "build_configuration", + "commit_comparison", + "mobile_app_info", + "preprodsnapshotmetrics", + ).prefetch_related( + "preprodartifactsizemetrics_set", + "preprodsnapshotmetrics__snapshot_comparisons_head_metrics", + "preprodcomparisonapproval_set", + ) + return paginate(queryset) diff --git a/src/sentry/preprod/api/endpoints/builds_export.py b/src/sentry/preprod/api/endpoints/builds_export.py new file mode 100644 index 00000000000000..4fd8b7c8fc44ec --- /dev/null +++ b/src/sentry/preprod/api/endpoints/builds_export.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +import logging + +from django.http.response import HttpResponseBase +from django.utils import timezone +from rest_framework import serializers +from rest_framework.request import Request + +from sentry.api.api_owners import ApiOwner +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import cell_silo_endpoint +from sentry.api.bases.organization import NoProjects, OrganizationEndpoint +from sentry.api.utils import handle_query_errors +from sentry.models.organization import Organization +from sentry.preprod.build_distribution_utils import is_installable_artifact +from sentry.preprod.builds_query import filtered_builds_queryset +from sentry.preprod.models import PreprodArtifact +from sentry.ratelimits.config import RateLimitConfig +from sentry.types.ratelimit import RateLimit, RateLimitCategory +from sentry.utils import json +from sentry.web.frontend.csv import CsvResponder + +logger = logging.getLogger(__name__) + +CSV_EXPORT_ROW_LIMIT = 10_000 + +_FORMULA_PREFIXES = ("=", "+", "-", "@") + + +def _escape_csv_value(value: object) -> str: + """Stringify a value, neutralizing spreadsheet formula injection.""" + if value is None: + return "" + text = str(value) + stripped = text.lstrip() + if stripped and stripped[0] in _FORMULA_PREFIXES: + return "'" + text + return text + + +class BuildsCsvResponder(CsvResponder[PreprodArtifact]): + def get_header(self) -> tuple[str, ...]: + return ( + "app_name", + "project_slug", + "artifact_id", + "app_id", + "build_configuration", + "version", + "platform", + "install_groups", + "upload_date", + "download_count", + ) + + def get_row(self, item: PreprodArtifact) -> tuple[str, ...]: + mobile_app_info = item.get_mobile_app_info() + platform = item.platform + build_configuration = item.build_configuration + download_count = getattr(item, "download_count", 0) + + # Emit install_groups as a JSON array + raw_install_groups = (item.extras or {}).get("install_groups") + install_groups = json.dumps( + raw_install_groups if isinstance(raw_install_groups, list) else [] + ) + return ( + _escape_csv_value(mobile_app_info.app_name if mobile_app_info else None), + _escape_csv_value(item.project.slug), + _escape_csv_value(item.id), + _escape_csv_value(item.app_id), + _escape_csv_value(build_configuration.name if build_configuration else None), + _escape_csv_value(mobile_app_info.build_version if mobile_app_info else None), + _escape_csv_value(platform.value if platform else None), + install_groups, + _escape_csv_value(item.date_added.isoformat() if item.date_added else None), + _escape_csv_value(download_count), + ) + + +@cell_silo_endpoint +class BuildsExportEndpoint(OrganizationEndpoint): + owner = ApiOwner.EMERGE_TOOLS + publish_status = { + "GET": ApiPublishStatus.EXPERIMENTAL, + } + + enforce_rate_limit = True + rate_limits = RateLimitConfig( + limit_overrides={ + "GET": { + RateLimitCategory.IP: RateLimit(limit=5, window=1, concurrent_limit=2), + RateLimitCategory.USER: RateLimit(limit=5, window=1, concurrent_limit=2), + RateLimitCategory.ORGANIZATION: RateLimit(limit=10, window=1, concurrent_limit=5), + } + } + ) + + def get(self, request: Request, organization: Organization) -> HttpResponseBase: + """Stream build distribution stats for the current filters as a CSV. + + Accepts the same ``query``, ``project``, and date-range params as the builds list + endpoint. The export is build-distribution-specific, so it always uses the + distribution row set regardless of any ``display`` param. + """ + filename = ( + f"{organization.slug}-build-distribution-{timezone.now().strftime('%Y-%m-%d-%H%M%S')}" + ) + + try: + params = self.get_filter_params(request, organization, date_filter_optional=True) + except NoProjects: + return BuildsCsvResponder().respond(iter(()), filename) + + query = request.GET.get("query", "").strip() + + # We force display="distribution" because the logic is really only for build distribution info. + with handle_query_errors(): + queryset = filtered_builds_queryset( + organization=organization, + query=query, + display="distribution", + project_ids=params["project_id"], + start=params["start"], + end=params["end"], + ) + + # Filter out non-installable builds since they aren't really relevant for distribution info. + queryset = queryset.filter(installable_app_file_id__isnull=False) + + # Reject oversized exports rather than silently truncating. The SQL limit is conservatively + # correct, but could lead to false-negatives in some edge cases, which we're ignoring. + row_count = queryset.count() + if row_count > CSV_EXPORT_ROW_LIMIT: + raise serializers.ValidationError( + { + "detail": f"This export has {row_count} builds, which exceeds the limit of {CSV_EXPORT_ROW_LIMIT}. " + "Narrow your search or date range and try again." + } + ) + + queryset = queryset.select_related( + "mobile_app_info", "project", "build_configuration" + ).order_by("-date_added") + installable_builds = ( + artifact for artifact in queryset.iterator() if is_installable_artifact(artifact) + ) + return BuildsCsvResponder().respond(installable_builds, filename) diff --git a/src/sentry/preprod/api/endpoints/urls.py b/src/sentry/preprod/api/endpoints/urls.py index 19e2beaa7f931c..e2c85fa35f6c11 100644 --- a/src/sentry/preprod/api/endpoints/urls.py +++ b/src/sentry/preprod/api/endpoints/urls.py @@ -3,6 +3,7 @@ from django.urls import re_path from sentry.preprod.api.endpoints.builds import BuildsEndpoint +from sentry.preprod.api.endpoints.builds_export import BuildsExportEndpoint from sentry.preprod.api.endpoints.project_preprod_artifact_image import ( ProjectPreprodArtifactImageEndpoint, ) @@ -183,6 +184,11 @@ BuildsEndpoint.as_view(), name="sentry-api-0-organization-builds", ), + re_path( + r"^(?P[^/]+)/builds-export/$", + BuildsExportEndpoint.as_view(), + name="sentry-api-0-organization-builds-export", + ), # Public API endpoints re_path( r"^(?P[^/]+)/preprodartifacts/(?P[^/]+)/install-details/$", diff --git a/src/sentry/preprod/builds_query.py b/src/sentry/preprod/builds_query.py new file mode 100644 index 00000000000000..11aa95c502759d --- /dev/null +++ b/src/sentry/preprod/builds_query.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from collections.abc import Sequence +from datetime import datetime + +from sentry.models.organization import Organization +from sentry.preprod.artifact_search import queryset_for_query +from sentry.preprod.models import PreprodArtifactQuerySet +from sentry.preprod.quotas import get_size_retention_cutoff + +# Display values that constrain the list to non-snapshot builds (size + distribution +# share the same underlying artifacts; only the columns shown differ). +_NON_SNAPSHOT_DISPLAYS = ("size", "distribution") + + +def filtered_builds_queryset( + *, + organization: Organization, + query: str, + display: str | None, + project_ids: Sequence[int], + start: datetime | None, + end: datetime | None, +) -> PreprodArtifactQuerySet: + """Build the PreprodArtifact queryset shared by the builds list and CSV export endpoints. + + Applies the search query, size-retention cutoff, optional date range, project + scoping, and display-based snapshot filtering. Keeping this in one place ensures + the CSV export returns exactly the same rows as the on-screen list. + + Callers are responsible for adding their own ``select_related``/``prefetch_related`` + and ordering on top of the returned queryset. + + Raises: + InvalidSearchQuery: if the query string is invalid. + """ + queryset = queryset_for_query(query, organization) + queryset = queryset.filter(date_added__gte=get_size_retention_cutoff(organization)) + if start: + queryset = queryset.filter(date_added__gte=start) + if end: + queryset = queryset.filter(date_added__lte=end) + queryset = queryset.filter(project_id__in=project_ids) + + if display in _NON_SNAPSHOT_DISPLAYS: + queryset = queryset.filter(preprodsnapshotmetrics__isnull=True) + elif display == "snapshot": + queryset = queryset.filter(preprodsnapshotmetrics__isnull=False) + + return queryset diff --git a/static/app/utils/api/knownSentryApiUrls.generated.ts b/static/app/utils/api/knownSentryApiUrls.generated.ts index 96303caa734ff3..dc2cff2802aadd 100644 --- a/static/app/utils/api/knownSentryApiUrls.generated.ts +++ b/static/app/utils/api/knownSentryApiUrls.generated.ts @@ -86,6 +86,7 @@ export type KnownSentryApiUrls = | '/organizations/$organizationIdOrSlug/available-actions/' | '/organizations/$organizationIdOrSlug/avatar/' | '/organizations/$organizationIdOrSlug/broadcasts/' + | '/organizations/$organizationIdOrSlug/builds-export/' | '/organizations/$organizationIdOrSlug/builds/' | '/organizations/$organizationIdOrSlug/builtin-symbol-sources/' | '/organizations/$organizationIdOrSlug/chunk-upload/' diff --git a/tests/sentry/preprod/api/endpoints/test_builds.py b/tests/sentry/preprod/api/endpoints/test_builds.py index 2954fa84d3d482..e92d53a165b310 100644 --- a/tests/sentry/preprod/api/endpoints/test_builds.py +++ b/tests/sentry/preprod/api/endpoints/test_builds.py @@ -862,7 +862,7 @@ def test_distribution_error_code_invalid_values(self) -> None: self.create_preprod_artifact(app_id="test.app") assert self._request({"query": "distribution_error_code:bogus"}).status_code == 400 - @patch("sentry.preprod.api.endpoints.builds.get_size_retention_cutoff") + @patch("sentry.preprod.builds_query.get_size_retention_cutoff") def test_excludes_expired_artifacts(self, mock_cutoff) -> None: mock_cutoff.return_value = before_now(days=30) self.create_preprod_artifact(app_id="recent.app", date_added=before_now(days=10)) diff --git a/tests/sentry/preprod/api/endpoints/test_builds_export.py b/tests/sentry/preprod/api/endpoints/test_builds_export.py new file mode 100644 index 00000000000000..d9e23508788032 --- /dev/null +++ b/tests/sentry/preprod/api/endpoints/test_builds_export.py @@ -0,0 +1,308 @@ +import csv +import io +from unittest.mock import patch + +from django.urls import reverse +from django.utils.functional import cached_property + +from sentry.preprod.models import PreprodArtifact +from sentry.testutils.cases import APITestCase +from sentry.testutils.helpers.datetime import before_now +from sentry.utils import json + +EXPECTED_HEADER = [ + "app_name", + "project_slug", + "artifact_id", + "app_id", + "build_configuration", + "version", + "platform", + "install_groups", + "upload_date", + "download_count", +] + + +def _col(row: list[str], name: str) -> str: + """Look up a cell by column name so assertions don't hard-code column positions.""" + return row[EXPECTED_HEADER.index(name)] + + +class BuildsExportEndpointTest(APITestCase): + @cached_property + def user_auth_token(self): + auth_token = self.create_user_auth_token( + self.user, scope_list=["org:admin", "project:admin"] + ) + return auth_token.token + + def _request(self, query, token=None): + token = self.user_auth_token if token is None else token + url = reverse( + "sentry-api-0-organization-builds-export", + args=[self.organization.slug], + query=query, + ) + return self.client.get(url, HTTP_AUTHORIZATION=f"Bearer {token}") + + def _csv_rows(self, response) -> list[list[str]]: + assert response.status_code == 200, ( + f"status {response.status_code} body {response.getvalue()!r}" + ) + assert response["Content-Type"] == "text/csv" + content = b"".join(response.streaming_content).decode("utf-8") + return list(csv.reader(io.StringIO(content))) + + def _create_installable_build(self, **kwargs) -> PreprodArtifact: + # Installable per is_installable_artifact(): an installable file plus a + # build_number (default APK, so no iOS signature check applies). Tests that want + # a non-installable build call create_preprod_artifact directly. + kwargs.setdefault("installable_app_file_id", 1) + kwargs.setdefault("build_number", 1) + return self.create_preprod_artifact(**kwargs) + + # --- auth --------------------------------------------------------------- + + def test_invalid_token(self) -> None: + response = self._request({}, token="Invalid") + assert response.status_code == 401 + assert response.json() == {"detail": "Invalid token"} + + def test_wrong_user(self) -> None: + random_user = self.create_user("foo@localhost") + auth_token = self.create_user_auth_token( + random_user, scope_list=["org:admin", "project:admin"] + ) + response = self._request({}, token=auth_token.token) + assert response.status_code == 403 + + def test_missing_scopes(self) -> None: + auth_token = self.create_user_auth_token(self.user, scope_list=[]) + response = self._request({}, token=auth_token.token) + assert response.status_code == 403 + + # --- shape -------------------------------------------------------------- + + def test_no_builds_only_header(self) -> None: + response = self._request({}) + rows = self._csv_rows(response) + assert rows == [EXPECTED_HEADER] + + def test_attachment_filename(self) -> None: + response = self._request({}) + disposition = response["Content-Disposition"] + assert disposition.startswith( + f'attachment; filename="{self.organization.slug}-build-distribution-' + ) + assert disposition.endswith('.csv"') + + def test_single_build_row(self) -> None: + artifact = self.create_preprod_artifact( + app_id="com.example.app", + app_name="My App", + build_version="1.2.3", + build_number=42, + installable_app_file_id=12345, + artifact_type=PreprodArtifact.ArtifactType.APK, + ) + self.create_installable_preprod_artifact(artifact, download_count=5) + self.create_installable_preprod_artifact(artifact, download_count=10) + + rows = self._csv_rows(self._request({})) + assert rows[0] == EXPECTED_HEADER + assert len(rows) == 2 + ( + app_name, + project_slug, + artifact_id, + app_id, + build_configuration, + version, + platform, + install_groups, + upload_date, + download_count, + ) = rows[1] + assert app_name == "My App" + assert artifact_id == str(artifact.id) + assert app_id == "com.example.app" + assert build_configuration == "" # none set -> blank + assert version == "1.2.3" + assert platform == "android" + assert install_groups == "[]" # none set -> empty JSON array + assert upload_date # ISO timestamp present + assert download_count == "15" + assert project_slug == self.project.slug + + def test_build_configuration_and_project_slug(self) -> None: + # build_configuration sources from the (nullable) PreprodBuildConfiguration name; + # project_slug from the build's project. + config = self.create_preprod_build_configuration(name="Release") + self._create_installable_build(app_id="com.example.cfg", build_configuration=config) + row = self._csv_rows(self._request({}))[1] + assert _col(row, "project_slug") == self.project.slug + assert _col(row, "build_configuration") == "Release" + + def test_platform_apple(self) -> None: + self._create_installable_build( + app_id="com.example.ios", + artifact_type=PreprodArtifact.ArtifactType.XCARCHIVE, + extras={"is_code_signature_valid": True}, # iOS needs this to count as installable + ) + rows = self._csv_rows(self._request({})) + assert len(rows) == 2 + assert _col(rows[1], "platform") == "apple" + + def test_blank_optional_cells(self) -> None: + # An installable build (file + build_number) with no app_name/build_version and + # no build_configuration leaves those cells blank. + self._create_installable_build(app_id="com.example.bare") + rows = self._csv_rows(self._request({})) + assert len(rows) == 2 + assert _col(rows[1], "app_name") == "" + assert _col(rows[1], "app_id") == "com.example.bare" + assert _col(rows[1], "version") == "" + assert _col(rows[1], "build_configuration") == "" # no PreprodBuildConfiguration + assert _col(rows[1], "install_groups") == "[]" + assert _col(rows[1], "download_count") == "0" + assert _col(rows[1], "project_slug") == self.project.slug + + def test_formula_injection_escaped(self) -> None: + # Leading formula triggers are neutralized with a quote — including when preceded + # by whitespace or a tab that a spreadsheet would strip before evaluating. + self._create_installable_build(app_id="evil.plain", app_name="=HYPERLINK(1)") + self._create_installable_build(app_id="evil.space", app_name=" =HYPERLINK(1)") + self._create_installable_build(app_id="evil.tab", app_name="\t=cmd") + rows = self._csv_rows(self._request({})) + by_app_id = {_col(r, "app_id"): _col(r, "app_name") for r in rows[1:]} + assert by_app_id["evil.plain"] == "'=HYPERLINK(1)" + assert by_app_id["evil.space"] == "' =HYPERLINK(1)" + assert by_app_id["evil.tab"] == "'\t=cmd" + + def test_install_groups_json_encoded(self) -> None: + self._create_installable_build( + app_id="com.example.groups", + extras={"install_groups": ["qa", "beta"]}, + ) + rows = self._csv_rows(self._request({})) + # Emitted as a compact JSON array string, order preserved. + assert _col(rows[1], "install_groups") == '["qa","beta"]' + # ...and round-trips back to the original list. + assert json.loads(_col(rows[1], "install_groups")) == ["qa", "beta"] + + # --- installability ----------------------------------------------------- + + def test_excludes_non_installable_builds(self) -> None: + # Matches the list/UI is_installable_artifact() definition: a build needs an + # installable file AND a build_number (iOS also needs a valid, non-app-store + # signature). Builds failing any of these are omitted, even with downloads. + installable = self._create_installable_build(app_id="com.example.installable") + self.create_installable_preprod_artifact(installable, download_count=3) + + # No installable file at all. + no_file = self.create_preprod_artifact(app_id="com.example.nofile") + self.create_installable_preprod_artifact(no_file, download_count=7) + + # Has an installable file but no build_number. + no_build_number = self.create_preprod_artifact( + app_id="com.example.nobuildnum", installable_app_file_id=2 + ) + self.create_installable_preprod_artifact(no_build_number, download_count=11) + + # iOS build with file + build_number but an invalid code signature. + bad_signature = self._create_installable_build( + app_id="com.example.badsig", + artifact_type=PreprodArtifact.ArtifactType.XCARCHIVE, + extras={"is_code_signature_valid": False}, + ) + self.create_installable_preprod_artifact(bad_signature, download_count=13) + + rows = self._csv_rows(self._request({})) + assert len(rows) == 2 # header + only the fully-installable build + assert _col(rows[1], "app_id") == "com.example.installable" + assert _col(rows[1], "download_count") == "3" + + # --- filtering parity with the list endpoint ---------------------------- + + def test_filter_by_query_app_id(self) -> None: + self._create_installable_build(app_id="foo") + self._create_installable_build(app_id="bar") + rows = self._csv_rows(self._request({"query": "app_id:foo"})) + assert len(rows) == 2 + assert _col(rows[1], "app_id") == "foo" + + def test_invalid_query_returns_400(self) -> None: + self._create_installable_build(app_id="foo") + response = self._request({"query": "no_such_key:foo"}) + assert response.status_code == 400 + assert response.json() == {"detail": "Invalid key for this search: no_such_key"} + + def test_bad_project(self) -> None: + self._create_installable_build() + response = self._request({"project": [1]}) + assert response.status_code == 403 + + def test_build_in_another_project_excluded(self) -> None: + another_project = self.create_project(name="Baz", slug="baz") + self._create_installable_build(project=another_project) + rows = self._csv_rows(self._request({"project": [self.project.id]})) + assert rows == [EXPECTED_HEADER] + + def test_snapshot_builds_always_excluded(self) -> None: + # The export is distribution-scoped; snapshot builds are excluded even when the + # request asks for display=snapshot (the param is ignored). + self._create_installable_build(app_id="com.regular.app") + snapshot_artifact = self._create_installable_build(app_id="com.snapshot.app") + self.create_preprod_snapshot_metrics(preprod_artifact=snapshot_artifact, image_count=5) + + rows = self._csv_rows(self._request({"display": "snapshot"})) + assert len(rows) == 2 + assert _col(rows[1], "app_id") == "com.regular.app" + + def test_start_end_respected(self) -> None: + self._create_installable_build(app_id="old.app", date_added=before_now(days=5)) + middle = self._create_installable_build(app_id="mid.app", date_added=before_now(days=3)) + self._create_installable_build(app_id="new.app", date_added=before_now(days=1)) + + rows = self._csv_rows( + self._request({"start": before_now(days=4), "end": before_now(days=2)}) + ) + assert len(rows) == 2 + assert _col(rows[1], "artifact_id") == str(middle.id) + + @patch("sentry.preprod.builds_query.get_size_retention_cutoff") + def test_excludes_expired_artifacts(self, mock_cutoff) -> None: + mock_cutoff.return_value = before_now(days=30) + self._create_installable_build(app_id="recent.app", date_added=before_now(days=10)) + self._create_installable_build(app_id="expired.app", date_added=before_now(days=60)) + + rows = self._csv_rows(self._request({})) + assert len(rows) == 2 + assert _col(rows[1], "app_id") == "recent.app" + + def test_results_ordered_newest_first(self) -> None: + self._create_installable_build(app_id="oldest.app", date_added=before_now(days=5)) + self._create_installable_build(app_id="newest.app", date_added=before_now(days=1)) + rows = self._csv_rows(self._request({})) + assert [_col(r, "app_id") for r in rows[1:]] == ["newest.app", "oldest.app"] + + # --- row limit ---------------------------------------------------------- + + @patch("sentry.preprod.api.endpoints.builds_export.CSV_EXPORT_ROW_LIMIT", 2) + def test_rejects_when_too_many_rows(self) -> None: + for i in range(3): + self._create_installable_build(app_id=f"com.example.app{i}") + response = self._request({}) + assert response.status_code == 400 + # ValidationError serializes to a list of messages; both count and limit appear. + body = str(response.json()) + assert "3" in body + assert "2" in body + + @patch("sentry.preprod.api.endpoints.builds_export.CSV_EXPORT_ROW_LIMIT", 2) + def test_allows_count_at_limit(self) -> None: + for i in range(2): + self._create_installable_build(app_id=f"com.example.app{i}") + rows = self._csv_rows(self._request({})) + assert len(rows) == 3 # header + 2 builds