Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from __future__ import annotations

import logging
import zipfile
from collections import defaultdict
from collections.abc import Generator
from io import BytesIO

import orjson
from django.conf import settings
from django.http import StreamingHttpResponse
from django.http.response import HttpResponseBase
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import features
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import cell_silo_endpoint
from sentry.api.bases.organization import (
OrganizationEndpoint,
OrganizationReleasePermission,
)
from sentry.auth.staff import is_active_staff
from sentry.models.organization import Organization
from sentry.objectstore import get_preprod_session
from sentry.preprod.models import PreprodArtifact
from sentry.preprod.snapshots.manifest import SnapshotManifest
from sentry.preprod.snapshots.models import PreprodSnapshotMetrics
from sentry.utils.concurrent import ContextPropagatingThreadPoolExecutor
from sentry.utils.zip import is_unsafe_path

logger = logging.getLogger(__name__)

FETCH_BATCH_SIZE = 200
FETCH_MAX_WORKERS = 32


class _DrainableBuffer:
def __init__(self) -> None:
self._buf = BytesIO()
self._pos = 0

def write(self, data: bytes) -> int:
self._buf.write(data)
self._pos += len(data)
return len(data)

def tell(self) -> int:
return self._pos

def seekable(self) -> bool:
return False

def flush(self) -> None:
pass

def close(self) -> None:
pass

def drain(self) -> bytes:
self._buf.seek(0)
data = self._buf.read()
self._buf.seek(0)
self._buf.truncate()
return data
Comment thread
cursor[bot] marked this conversation as resolved.


def _build_hash_to_filenames(
manifest: SnapshotManifest,
) -> dict[str, list[str]]:
result: dict[str, list[str]] = defaultdict(list)
for filename, meta in manifest.images.items():
if not is_unsafe_path(filename):
result[meta.content_hash].append(filename)
return result
Comment thread
NicoHinderling marked this conversation as resolved.


@cell_silo_endpoint
class OrganizationPreprodSnapshotDownloadEndpoint(OrganizationEndpoint):
owner = ApiOwner.EMERGE_TOOLS
publish_status = {
"GET": ApiPublishStatus.EXPERIMENTAL,
}
permission_classes = (OrganizationReleasePermission,)

def get(
self, request: Request, organization: Organization, snapshot_id: str
) -> HttpResponseBase:
if not settings.IS_DEV and not features.has(
"organizations:preprod-snapshots", organization, actor=request.user
):
return Response({"detail": "Feature not enabled"}, status=403)

try:
artifact = PreprodArtifact.objects.select_related("project").get(
id=snapshot_id, project__organization_id=organization.id
)
except (PreprodArtifact.DoesNotExist, ValueError):
return Response({"detail": "Snapshot not found"}, status=404)

if not is_active_staff(request) and not request.access.has_project_access(artifact.project):
return Response({"detail": "Snapshot not found"}, status=404)

try:
snapshot_metrics = artifact.preprodsnapshotmetrics
except PreprodSnapshotMetrics.DoesNotExist:
return Response({"detail": "Snapshot metrics not found"}, status=404)

manifest_key = (snapshot_metrics.extras or {}).get("manifest_key")
if not manifest_key:
return Response({"detail": "Manifest key not found"}, status=404)

session = get_preprod_session(organization.id, artifact.project_id)

try:
get_response = session.get(manifest_key)
manifest_data = orjson.loads(get_response.payload.read())
manifest = SnapshotManifest(**manifest_data)
except Exception:
logger.exception(
"preprod_snapshot_download.manifest_fetch_failed",
extra={
"preprod_artifact_id": artifact.id,
"manifest_key": manifest_key,
},
)
return Response({"detail": "Internal server error"}, status=500)

hash_to_filenames = _build_hash_to_filenames(manifest)
unique_hashes = list(hash_to_filenames.keys())
key_prefix = f"{organization.id}/{artifact.project_id}"

def _stream_zip() -> Generator[bytes]:
buf = _DrainableBuffer()
zf = zipfile.ZipFile(buf, "w", zipfile.ZIP_STORED)
failed_count = 0

def fetch_image(image_hash: str) -> tuple[str, bytes | None]:
try:
data = session.get(f"{key_prefix}/{image_hash}").payload.read()
return (image_hash, data)
except Exception:
logger.exception(
"preprod_snapshot_download.image_fetch_failed",
extra={
"preprod_artifact_id": artifact.id,
"image_hash": image_hash,
},
)
return (image_hash, None)

try:
with ContextPropagatingThreadPoolExecutor(
max_workers=FETCH_MAX_WORKERS
) as executor:
for batch_start in range(0, len(unique_hashes), FETCH_BATCH_SIZE):
batch = unique_hashes[batch_start : batch_start + FETCH_BATCH_SIZE]

for image_hash, data in executor.map(fetch_image, batch):
if data is None:
Comment thread
NicoHinderling marked this conversation as resolved.
failed_count += 1
continue
for filename in hash_to_filenames[image_hash]:
zf.writestr(filename, data)

chunk = buf.drain()
if chunk:
yield chunk

if failed_count:
logger.warning(
"preprod_snapshot_download.image_fetch_failures",
extra={
"preprod_artifact_id": artifact.id,
"failed_count": failed_count,
"total_count": len(unique_hashes),
},
)
finally:
zf.close()

chunk = buf.drain()
if chunk:
yield chunk
Comment thread
sentry[bot] marked this conversation as resolved.

response = StreamingHttpResponse(_stream_zip(), content_type="application/zip")
response["Content-Disposition"] = (
f'attachment; filename="snapshot_images_{artifact.id}.zip"'
)
return response
6 changes: 6 additions & 0 deletions src/sentry/preprod/api/endpoints/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
OrganizationPreprodSnapshotEndpoint,
ProjectPreprodSnapshotEndpoint,
)
from .preprod_artifact_snapshot_download import OrganizationPreprodSnapshotDownloadEndpoint
from .preprod_snapshot_recompare import PreprodSnapshotRecompareEndpoint
from .project_installable_preprod_artifact_download import (
ProjectInstallablePreprodArtifactDownloadEndpoint,
Expand Down Expand Up @@ -215,6 +216,11 @@
PreprodSnapshotRecompareEndpoint.as_view(),
name="sentry-api-0-organization-preprod-snapshots-recompare",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/preprodartifacts/snapshots/(?P<snapshot_id>[^/]+)/download/$",
OrganizationPreprodSnapshotDownloadEndpoint.as_view(),
name="sentry-api-0-organization-preprod-snapshots-download",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/preprodartifacts/(?P<head_artifact_id>[^/]+)/distribution/$",
ProjectPreprodDistributionEndpoint.as_view(),
Expand Down
12 changes: 9 additions & 3 deletions src/sentry/utils/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@


def is_unsafe_path(path: str) -> bool:
if os.path.isabs(path):
# Normalize backslashes to forward slashes so traversal attacks using
# Windows-style separators (e.g. "..\..\..\evil.png") are caught even
# when this runs on Linux where os.path.sep is "/".
normalized = path.replace("\\", "/")
if os.path.isabs(path) or normalized.startswith("/"):
return True
for segment in path.split(os.path.sep):
if segment == os.path.pardir:
if len(normalized) >= 2 and normalized[0].isalpha() and normalized[1] == ":":
return True
for segment in normalized.split("/"):
if segment == "..":
return True
return False

Expand Down
1 change: 1 addition & 0 deletions static/app/utils/api/knownSentryApiUrls.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ export type KnownSentryApiUrls =
| '/organizations/$organizationIdOrSlug/preprodartifacts/$headArtifactId/private-install-details/'
| '/organizations/$organizationIdOrSlug/preprodartifacts/size-analysis/compare/$headArtifactId/$baseArtifactId/'
| '/organizations/$organizationIdOrSlug/preprodartifacts/snapshots/$snapshotId/'
| '/organizations/$organizationIdOrSlug/preprodartifacts/snapshots/$snapshotId/download/'
| '/organizations/$organizationIdOrSlug/preprodartifacts/snapshots/$snapshotId/recompare/'
| '/organizations/$organizationIdOrSlug/prevent/owner/$owner/repositories/'
| '/organizations/$organizationIdOrSlug/prevent/owner/$owner/repositories/sync/'
Expand Down
29 changes: 29 additions & 0 deletions tests/sentry/utils/test_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,32 @@ def test_is_unsafe_path() -> None:
assert is_unsafe_path("aha/../foo.txt")
assert not is_unsafe_path("foo.txt")
assert not is_unsafe_path("foo/bar.txt")


def test_is_unsafe_path_backslash_traversal() -> None:
assert is_unsafe_path("..\\foo.txt")
assert is_unsafe_path("..\\..\\foo.txt")
assert is_unsafe_path("aha\\..\\foo.txt")
assert is_unsafe_path("aha\\..\\..\\foo.txt")


def test_is_unsafe_path_mixed_separators() -> None:
assert is_unsafe_path("../..\\foo.txt")
assert is_unsafe_path("..\\../foo.txt")
assert is_unsafe_path("a/b\\..\\../foo.txt")


def test_is_unsafe_path_windows_absolute() -> None:
assert is_unsafe_path("C:\\evil.exe")
assert is_unsafe_path("C:/evil.exe")
assert is_unsafe_path("D:\\Windows\\System32\\evil.dll")


def test_is_unsafe_path_backslash_absolute() -> None:
assert is_unsafe_path("\\\\server\\share\\file.txt")
assert is_unsafe_path("\\foo.txt")


def test_is_unsafe_path_safe_with_backslash_in_name() -> None:
assert not is_unsafe_path("foo\\bar.txt")
assert not is_unsafe_path("a\\b\\c.txt")
Loading