Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 21 additions & 29 deletions src/sentry/preprod/api/endpoints/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
from sentry.preprod.api.models.project_preprod_build_details_models import (
transform_preprod_artifact_to_build_details,
)
from sentry.preprod.artifact_search import queryset_for_query
from sentry.preprod.builds_query import filtered_builds_queryset
from sentry.preprod.models import PreprodArtifact
from sentry.preprod.quotas import get_size_retention_cutoff

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -56,42 +55,35 @@ def on_results(artifacts: list[PreprodArtifact]) -> list[dict[str, Any]]:
except NoProjects:
return paginate(PreprodArtifact.objects.none())

start = params["start"]
end = params["end"]
# Builds don't have environments so we ignore environments from
# params on purpose.

query = request.GET.get("query", "").strip()
cutoff = get_size_retention_cutoff(organization)
display = request.GET.get("display")

try:
queryset = queryset_for_query(query, organization)
queryset = queryset.select_related(
"project",
"build_configuration",
"commit_comparison",
"mobile_app_info",
"preprodsnapshotmetrics",
).prefetch_related(
"preprodartifactsizemetrics_set",
"preprodsnapshotmetrics__snapshot_comparisons_head_metrics",
"preprodcomparisonapproval_set",
queryset = filtered_builds_queryset(
organization=organization,
query=query,
display=display,
project_ids=params["project_id"],
start=params["start"],
end=params["end"],
)
queryset = queryset.filter(date_added__gte=cutoff)
if start:
queryset = queryset.filter(date_added__gte=start)
if end:
queryset = queryset.filter(date_added__lte=end)
queryset = queryset.filter(project_id__in=params["project_id"])

display = request.GET.get("display")
if display in ("size", "distribution"):
queryset = queryset.filter(preprodsnapshotmetrics__isnull=True)
elif display == "snapshot":
queryset = queryset.filter(preprodsnapshotmetrics__isnull=False)
except InvalidSearchQuery as e:
# CodeQL complains about str(e) below but ~all handlers
# of InvalidSearchQuery do the same as this.
return Response({"detail": str(e)}, status=400)

queryset = queryset.select_related(
"project",
"build_configuration",
"commit_comparison",
"mobile_app_info",
"preprodsnapshotmetrics",
).prefetch_related(
"preprodartifactsizemetrics_set",
"preprodsnapshotmetrics__snapshot_comparisons_head_metrics",
"preprodcomparisonapproval_set",
)

return paginate(queryset)
149 changes: 149 additions & 0 deletions src/sentry/preprod/api/endpoints/builds_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from __future__ import annotations

import logging

from django.http.response import HttpResponseBase
from django.utils import timezone
from rest_framework import serializers
from rest_framework.request import Request

from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import cell_silo_endpoint
from sentry.api.bases.organization import NoProjects, OrganizationEndpoint
from sentry.api.utils import handle_query_errors
from sentry.models.organization import Organization
from sentry.preprod.build_distribution_utils import is_installable_artifact
from sentry.preprod.builds_query import filtered_builds_queryset
from sentry.preprod.models import PreprodArtifact
from sentry.ratelimits.config import RateLimitConfig
from sentry.types.ratelimit import RateLimit, RateLimitCategory
from sentry.utils import json
from sentry.web.frontend.csv import CsvResponder

logger = logging.getLogger(__name__)

CSV_EXPORT_ROW_LIMIT = 10_000

_FORMULA_PREFIXES = ("=", "+", "-", "@")


def _escape_csv_value(value: object) -> str:
"""Stringify a value, neutralizing spreadsheet formula injection."""
if value is None:
return ""
text = str(value)
stripped = text.lstrip()
if stripped and stripped[0] in _FORMULA_PREFIXES:
return "'" + text
return text


class BuildsCsvResponder(CsvResponder[PreprodArtifact]):
def get_header(self) -> tuple[str, ...]:
return (
"app_name",
"project_slug",
"artifact_id",
"app_id",
"build_configuration",
"version",
"platform",
"install_groups",
"upload_date",
"download_count",
)

def get_row(self, item: PreprodArtifact) -> tuple[str, ...]:
mobile_app_info = item.get_mobile_app_info()
platform = item.platform
build_configuration = item.build_configuration
download_count = getattr(item, "download_count", 0)

# Emit install_groups as a JSON array
raw_install_groups = (item.extras or {}).get("install_groups")
install_groups = json.dumps(
raw_install_groups if isinstance(raw_install_groups, list) else []
)
return (
_escape_csv_value(mobile_app_info.app_name if mobile_app_info else None),
_escape_csv_value(item.project.slug),
_escape_csv_value(item.id),
_escape_csv_value(item.app_id),
_escape_csv_value(build_configuration.name if build_configuration else None),
_escape_csv_value(mobile_app_info.build_version if mobile_app_info else None),
_escape_csv_value(platform.value if platform else None),
install_groups,
_escape_csv_value(item.date_added.isoformat() if item.date_added else None),
_escape_csv_value(download_count),
)


@cell_silo_endpoint
class BuildsExportEndpoint(OrganizationEndpoint):
owner = ApiOwner.EMERGE_TOOLS
publish_status = {
"GET": ApiPublishStatus.EXPERIMENTAL,
}

enforce_rate_limit = True
rate_limits = RateLimitConfig(
limit_overrides={
"GET": {
RateLimitCategory.IP: RateLimit(limit=5, window=1, concurrent_limit=2),
RateLimitCategory.USER: RateLimit(limit=5, window=1, concurrent_limit=2),
RateLimitCategory.ORGANIZATION: RateLimit(limit=10, window=1, concurrent_limit=5),
}
}
)

def get(self, request: Request, organization: Organization) -> HttpResponseBase:
"""Stream build distribution stats for the current filters as a CSV.

Accepts the same ``query``, ``project``, and date-range params as the builds list
endpoint. The export is build-distribution-specific, so it always uses the
distribution row set regardless of any ``display`` param.
"""
filename = (
f"{organization.slug}-build-distribution-{timezone.now().strftime('%Y-%m-%d-%H%M%S')}"
)

try:
params = self.get_filter_params(request, organization, date_filter_optional=True)
except NoProjects:
return BuildsCsvResponder().respond(iter(()), filename)

query = request.GET.get("query", "").strip()

# We force display="distribution" because the logic is really only for build distribution info.
with handle_query_errors():
queryset = filtered_builds_queryset(
organization=organization,
query=query,
display="distribution",
project_ids=params["project_id"],
start=params["start"],
end=params["end"],
)

# Filter out non-installable builds since they aren't really relevant for distribution info.
queryset = queryset.filter(installable_app_file_id__isnull=False)

# Reject oversized exports rather than silently truncating. The SQL limit is conservatively
# correct, but could lead to false-negatives in some edge cases, which we're ignoring.
row_count = queryset.count()
if row_count > CSV_EXPORT_ROW_LIMIT:
raise serializers.ValidationError(
{
"detail": f"This export has {row_count} builds, which exceeds the limit of {CSV_EXPORT_ROW_LIMIT}. "
"Narrow your search or date range and try again."
}
)

queryset = queryset.select_related(
"mobile_app_info", "project", "build_configuration"
).order_by("-date_added")
Comment on lines +143 to +145

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The CSV export for builds will always report a download_count of 0 because the database query is missing the necessary annotation to calculate this value.
Severity: MEDIUM

Suggested Fix

Add the .annotate_download_count() method to the queryset chain in src/sentry/preprod/api/endpoints/builds_export.py between lines 143 and 145. This will ensure the download_count is correctly calculated and included in the exported CSV.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/preprod/api/endpoints/builds_export.py#L143-L145

Potential issue: The CSV export for builds consistently reports a `download_count` of 0
for all artifacts. This occurs because the database queryset at lines 143-145 does not
include the `.annotate_download_count()` method. As a result, the `download_count`
attribute is not present on the model instances being iterated over. The code then falls
back to `getattr(item, "download_count", 0)`, which returns the default value of 0,
leading to incorrect data in the exported file.

Did we get this right? 👍 / 👎 to inform future reviews.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BuildsExportEndpoint.get() builds its queryset via filtered_builds_queryset(), which traces back to the annotation:

builds_export.py:120 → filtered_builds_queryset(...)
builds_query.py:37 → queryset_for_query(query, organization)
artifact_search.py:222 → apply_filters(_base_searchable_queryset(), ...)
artifact_search.py:191-198 → _base_searchable_queryset() calls .annotate_download_count()

installable_builds = (
artifact for artifact in queryset.iterator() if is_installable_artifact(artifact)
)
return BuildsCsvResponder().respond(installable_builds, filename)
6 changes: 6 additions & 0 deletions src/sentry/preprod/api/endpoints/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.urls import re_path

from sentry.preprod.api.endpoints.builds import BuildsEndpoint
from sentry.preprod.api.endpoints.builds_export import BuildsExportEndpoint
from sentry.preprod.api.endpoints.project_preprod_artifact_image import (
ProjectPreprodArtifactImageEndpoint,
)
Expand Down Expand Up @@ -183,6 +184,11 @@
BuildsEndpoint.as_view(),
name="sentry-api-0-organization-builds",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/builds-export/$",
BuildsExportEndpoint.as_view(),
name="sentry-api-0-organization-builds-export",
),
# Public API endpoints
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/preprodartifacts/(?P<artifact_id>[^/]+)/install-details/$",
Expand Down
50 changes: 50 additions & 0 deletions src/sentry/preprod/builds_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from __future__ import annotations

from collections.abc import Sequence
from datetime import datetime

from sentry.models.organization import Organization
from sentry.preprod.artifact_search import queryset_for_query
from sentry.preprod.models import PreprodArtifactQuerySet
from sentry.preprod.quotas import get_size_retention_cutoff

# Display values that constrain the list to non-snapshot builds (size + distribution
# share the same underlying artifacts; only the columns shown differ).
_NON_SNAPSHOT_DISPLAYS = ("size", "distribution")


def filtered_builds_queryset(
*,
organization: Organization,
query: str,
display: str | None,
project_ids: Sequence[int],
start: datetime | None,
end: datetime | None,
) -> PreprodArtifactQuerySet:
"""Build the PreprodArtifact queryset shared by the builds list and CSV export endpoints.

Applies the search query, size-retention cutoff, optional date range, project
scoping, and display-based snapshot filtering. Keeping this in one place ensures
the CSV export returns exactly the same rows as the on-screen list.

Callers are responsible for adding their own ``select_related``/``prefetch_related``
and ordering on top of the returned queryset.

Raises:
InvalidSearchQuery: if the query string is invalid.
"""
queryset = queryset_for_query(query, organization)
queryset = queryset.filter(date_added__gte=get_size_retention_cutoff(organization))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we can rename this to "get_preprod_retention_cutoff` since snapshots and size retention rules will be the same and we're using them for both potential cases here

Image

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll follow up with a rename in another PR since it's used in a number of spots

if start:
queryset = queryset.filter(date_added__gte=start)
if end:
queryset = queryset.filter(date_added__lte=end)
queryset = queryset.filter(project_id__in=project_ids)

if display in _NON_SNAPSHOT_DISPLAYS:
queryset = queryset.filter(preprodsnapshotmetrics__isnull=True)
elif display == "snapshot":
queryset = queryset.filter(preprodsnapshotmetrics__isnull=False)

return queryset
1 change: 1 addition & 0 deletions static/app/utils/api/knownSentryApiUrls.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export type KnownSentryApiUrls =
| '/organizations/$organizationIdOrSlug/available-actions/'
| '/organizations/$organizationIdOrSlug/avatar/'
| '/organizations/$organizationIdOrSlug/broadcasts/'
| '/organizations/$organizationIdOrSlug/builds-export/'
| '/organizations/$organizationIdOrSlug/builds/'
| '/organizations/$organizationIdOrSlug/builtin-symbol-sources/'
| '/organizations/$organizationIdOrSlug/chunk-upload/'
Expand Down
2 changes: 1 addition & 1 deletion tests/sentry/preprod/api/endpoints/test_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def test_distribution_error_code_invalid_values(self) -> None:
self.create_preprod_artifact(app_id="test.app")
assert self._request({"query": "distribution_error_code:bogus"}).status_code == 400

@patch("sentry.preprod.api.endpoints.builds.get_size_retention_cutoff")
@patch("sentry.preprod.builds_query.get_size_retention_cutoff")
def test_excludes_expired_artifacts(self, mock_cutoff) -> None:
mock_cutoff.return_value = before_now(days=30)
self.create_preprod_artifact(app_id="recent.app", date_added=before_now(days=10))
Expand Down
Loading
Loading