Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/sentry/relocation/api/endpoints/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from datetime import timedelta
from functools import reduce
from string import Template
from uuid import uuid4

from django.db import router
from django.db.models import Q
Expand All @@ -22,12 +23,13 @@
from sentry.api.serializers import serialize
from sentry.auth.elevated_mode import has_elevated_mode
from sentry.models.files.file import File
from sentry.models.files.utils import get_relocation_storage
from sentry.options import get
from sentry.relocation.api.endpoints import ERR_FEATURE_DISABLED
from sentry.relocation.api.serializers.relocation import RelocationSerializer
from sentry.relocation.models.relocation import Relocation, RelocationFile
from sentry.relocation.tasks.process import uploading_start
from sentry.relocation.utils import RELOCATION_BLOB_SIZE, RELOCATION_FILE_TYPE
from sentry.relocation.utils import RELOCATION_FILE_TYPE, relocation_raw_data_path
from sentry.search.utils import tokenize_query
from sentry.signals import relocation_link_promo_code
from sentry.users.models.user import MAX_USERNAME_LENGTH, User
Expand All @@ -53,7 +55,9 @@
RELOCATION_FILE_SIZE_MEDIUM = 100 * 1024**2


def get_relocation_size_category(size) -> str:
def get_relocation_size_category(size: int | None) -> str:
if size is None:
return "medium"
if size < RELOCATION_FILE_SIZE_SMALL:
return "small"
elif size < RELOCATION_FILE_SIZE_MEDIUM:
Expand All @@ -62,6 +66,7 @@ def get_relocation_size_category(size) -> str:


def should_throttle_relocation(relocation_bucket_size: str) -> bool:
# TODO(cells) Instead of doing this by size we could use the number of inflight relocations.
recent_relocation_files = RelocationFile.objects.filter(
date_added__gte=(timezone.now() - timedelta(days=1))
)
Expand Down Expand Up @@ -273,14 +278,20 @@ def post(self, request: Request) -> Response:
if err is not None:
return err

file = File.objects.create(name="raw-relocation-data.tar", type=RELOCATION_FILE_TYPE)
file.putfile(fileobj, blob_size=RELOCATION_BLOB_SIZE, logger=logger)
relocation_uuid = uuid4()
path = relocation_raw_data_path(relocation_uuid)
relocation_storage = get_relocation_storage()
relocation_storage.save(path, fileobj)

# TODO(cells) Make RelocationFile.file nullable
file = File.objects.create(name="stub", type=RELOCATION_FILE_TYPE, size=file_size)

with atomic_transaction(
using=(router.db_for_write(Relocation), router.db_for_write(RelocationFile))
):
provenance = Relocation.Provenance.SELF_HOSTED
relocation: Relocation = Relocation.objects.create(
uuid=relocation_uuid,
creator_id=request.user.id,
owner_id=owner.id,
want_org_slugs=org_slugs,
Expand All @@ -290,8 +301,9 @@ def post(self, request: Request) -> Response:
)
RelocationFile.objects.create(
relocation=relocation,
file=file,
kind=RelocationFile.Kind.RAW_USER_DATA.value,
bucket_path=path,
file=file,
)
relocation_link_promo_code.send_robust(
relocation_uuid=relocation.uuid, promo_code=promo_code, sender=self.__class__
Expand Down
16 changes: 9 additions & 7 deletions src/sentry/relocation/api/endpoints/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from sentry.api.exceptions import ResourceDoesNotExist
from sentry.api.permissions import SentryIsAuthenticated
from sentry.api.serializers import serialize
from sentry.models.files.file import File
from sentry.relocation.api.endpoints.index import (
get_autopause_value,
validate_new_relocation_request,
Expand Down Expand Up @@ -75,6 +74,7 @@ def post(self, request: Request, relocation_uuid: str) -> Response:
status=status.HTTP_400_BAD_REQUEST,
)

# TODO(cells) Update this when file becomes nullable/removed.
relocation_file = (
RelocationFile.objects.filter(relocation=relocation).select_related("file").first()
)
Expand All @@ -85,10 +85,7 @@ def post(self, request: Request, relocation_uuid: str) -> Response:
)

# We can re-use the same `File` instance in the database, avoiding duplicating data.
try:
file = File.objects.get(id=relocation_file.file_id)
fileobj = file.getfile()
except (File.DoesNotExist, FileNotFoundError):
if not relocation_file.file:
return Response(
{"detail": ERR_FILE_NO_LONGER_EXISTS},
status=status.HTTP_400_BAD_REQUEST,
Expand All @@ -102,7 +99,7 @@ def post(self, request: Request, relocation_uuid: str) -> Response:
)

err = validate_new_relocation_request(
request, owner.username, relocation.want_org_slugs, fileobj.size
request, owner.username, relocation.want_org_slugs, relocation_file.file.size
) or validate_relocation_uniqueness(owner)
if err is not None:
return err
Expand All @@ -126,10 +123,15 @@ def post(self, request: Request, relocation_uuid: str) -> Response:
new_relocation_uuid=new_relocation.uuid,
sender=self.__class__,
)
# Create a new RelocationFile. Initially the bucket_path
# will point to the original relocation. During preprocessing_transfer
# we will copy the raw data into `runs/{uuid}/in` so it can be copied
# into cloudbuild.
RelocationFile.objects.create(
relocation=new_relocation,
file=file,
Comment thread
cursor[bot] marked this conversation as resolved.
kind=RelocationFile.Kind.RAW_USER_DATA.value,
bucket_path=relocation_file.bucket_path,
file=relocation_file.file,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retry copies null bucket path

Medium Severity

Failed-relocation retry reuses relocation_file.bucket_path on the new RelocationFile without checking it is set. Failures created before shared-bucket uploads can have a null path while data still lives on the legacy File record, so the retried run cannot open raw data from storage.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 93dc700. Configure here.

)

uploading_start.delay(str(new_relocation.uuid), None, None)
Expand Down
45 changes: 18 additions & 27 deletions src/sentry/relocation/services/relocation_export/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
import base64
import logging
from datetime import UTC, datetime
from io import BytesIO
from uuid import UUID

from django.db import router
from django.db.utils import IntegrityError
from django.utils import timezone
from sentry_sdk import capture_exception

from sentry.models.files.file import File
from sentry.models.files.utils import get_relocation_storage
from sentry.relocation.models.relocation import Relocation, RelocationFile
from sentry.relocation.models.relocationtransfer import (
RETRY_BACKOFF,
Expand All @@ -25,7 +24,11 @@
CellRelocationExportService,
ControlRelocationExportService,
)
from sentry.relocation.utils import RELOCATION_BLOB_SIZE, RELOCATION_FILE_TYPE
from sentry.relocation.utils import (
RELOCATION_FILE_TYPE,
get_relocation_storage,
relocation_raw_data_path,
)
from sentry.utils.db import atomic_transaction

logger = logging.getLogger("sentry.relocation")
Expand Down Expand Up @@ -77,8 +80,7 @@ def reply_with_export(
requesting_region_name: str,
replying_region_name: str,
org_slug: str,
encrypted_bytes: list[int],
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
encrypted_bytes: list[int] | None = None,
encrypted_contents: bytes | None = None,
) -> None:
from sentry.relocation.tasks.process import uploading_complete
Expand All @@ -95,23 +97,24 @@ def reply_with_export(
"requesting_region_name": requesting_region_name,
"replying_region_name": replying_region_name,
"org_slug": org_slug,
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
"encrypted_bytes_size": len(encrypted_bytes or []),
}
logger.info("SaaS -> SaaS reply received in triggering region", extra=logger_data)
uuid = UUID(relocation_uuid)

try:
relocation: Relocation = Relocation.objects.get(uuid=relocation_uuid)
relocation: Relocation = Relocation.objects.get(uuid=uuid)
except Relocation.DoesNotExist as e:
logger.exception("Could not locate Relocation model by UUID: %s", relocation_uuid)
capture_exception(e)
return

# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
fp = BytesIO(bytes(encrypted_bytes or []))
file = File.objects.create(name="raw-relocation-data.tar", type=RELOCATION_FILE_TYPE)
file.putfile(fp, blob_size=RELOCATION_BLOB_SIZE, logger=logger)
logger.info("SaaS -> SaaS relocation underlying File created", extra=logger_data)
# Now that we have confirmation that the export file
# was stored in the shared bucket, create a RelocationFile record
# so that the import process can begin.
relocation_storage = get_relocation_storage()
blobsize = relocation_storage.size(relocation_raw_data_path(uuid))
# TODO(cells) Remove this once RelocationFile.file is optional.
file = File.objects.create(name="stub", type=RELOCATION_FILE_TYPE, size=blobsize)

# This write ensures that the entire chain triggered by `uploading_start` remains
# idempotent, since only one (relocation_uuid, relocation_file_kind) pairing can exist
Comment on lines +114 to 120
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Catching an IntegrityError leaves the transaction in an aborted state, leading to a TransactionManagementError and an infinite retry loop that repeatedly schedules a Celery task.
Severity: CRITICAL

Suggested Fix

When an IntegrityError is caught, the transaction should be explicitly rolled back to a savepoint created before the failing create() call. This will allow the transaction to be committed successfully, preventing the TransactionManagementError and the subsequent infinite retry loop. The pass statement should be replaced with logic to handle the transaction state correctly.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: src/sentry/relocation/services/relocation_export/impl.py#L114-L120

Potential issue: When handling an `IntegrityError` from
`RelocationFile.objects.create()` for idempotency, the exception is caught, but the
underlying database transaction is left in an aborted state. The code then proceeds to
schedule an `uploading_complete` task via Celery. When the `atomic_transaction` block
exits, Django raises a `TransactionManagementError` because the transaction was aborted.
The calling function catches this error but fails to delete the `transfer` record,
causing an infinite retry loop where the `uploading_complete` task is repeatedly
scheduled on each attempt.

Expand All @@ -122,11 +125,11 @@ def reply_with_export(
relocation=relocation,
file=file,
kind=RelocationFile.Kind.RAW_USER_DATA.value,
bucket_path=relocation_raw_data_path(uuid),
)
except IntegrityError:
# We already have the file, we can proceed.
pass

Comment thread
cursor[bot] marked this conversation as resolved.
logger.info("SaaS -> SaaS relocation RelocationFile saved", extra=logger_data)

uploading_complete.apply_async(args=[str(relocation.uuid)])
Expand Down Expand Up @@ -173,8 +176,7 @@ def reply_with_export(
requesting_region_name: str,
replying_region_name: str,
org_slug: str,
encrypted_bytes: list[int],
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
encrypted_bytes: list[int] | None = None,
encrypted_contents: bytes | None = None,
) -> None:
from sentry.relocation.tasks.transfer import process_relocation_transfer_control
Expand All @@ -184,20 +186,9 @@ def reply_with_export(
"requesting_region_name": requesting_region_name,
"replying_region_name": replying_region_name,
"org_slug": org_slug,
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
"encrypted_bytes_size": len(encrypted_bytes or []),
}
logger.info("SaaS -> SaaS reply received on proxy", extra=logger_data)

# Save the payload into the control silo's "relocation" GCS bucket. This bucket is only used
# for temporary storage of `encrypted_bytes` being shuffled between cells like this.
path = f"runs/{relocation_uuid}/saas_to_saas_export/{org_slug}.tar"
relocation_storage = get_relocation_storage()
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
fp = BytesIO(bytes(encrypted_bytes or []))
relocation_storage.save(path, fp)
logger.info("SaaS -> SaaS export contents retrieved", extra=logger_data)

# Save transfer record so we can push state to the requesting cell
transfer = ControlRelocationTransfer.objects.create(
relocation_uuid=relocation_uuid,
Expand Down
8 changes: 4 additions & 4 deletions src/sentry/relocation/services/relocation_export/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def reply_with_export(
requesting_region_name: str,
replying_region_name: str,
org_slug: str,
encrypted_bytes: list[int],
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
# TODO(cells) bytes/contents are deprecated
encrypted_bytes: list[int] | None = None,
encrypted_contents: bytes | None = None,
) -> None:
"""
Expand Down Expand Up @@ -132,8 +132,8 @@ def reply_with_export(
requesting_region_name: str,
replying_region_name: str,
org_slug: str,
encrypted_bytes: list[int],
# TODO(azaslavsky): finish transfer from `encrypted_contents` -> `encrypted_bytes`.
# TODO(cells) bytes/contents are deprecated
encrypted_bytes: list[int] | None = None,
encrypted_contents: bytes | None = None,
) -> None:
"""
Expand Down
Loading
Loading