4444from sentry .models .files .utils import MAX_FILE_SIZE , clear_cached_files
4545from sentry .objectstore import get_debug_files_session
4646from sentry .utils import json , metrics
47+ from sentry .utils .concurrent import ContextPropagatingThreadPoolExecutor
4748from sentry .utils .zip import safe_extract_zip
4849
4950if TYPE_CHECKING :
5455logger = logging .getLogger (__name__ )
5556
5657DIF_MIMETYPES = {v : k for k , v in KNOWN_DIF_FORMATS .items ()}
57- OBJECTSTORE_MULTIPART_PART_SIZE = 32 * 1024 * 1024
58- OBJECTSTORE_MULTIPART_MAX_RETRIES = 3
59- OBJECTSTORE_MULTIPART_MAX_WORKERS = 4
6058
6159_proguard_file_re = re .compile (r"/proguard/(?:mapping-)?(.*?)\.txt$" )
6260
61+ OBJECTSTORE_MULTIPART_PART_SIZE = 32 * 1024 * 1024 # 32 MiB
62+
6363
6464class BadDif (Exception ):
6565 pass
@@ -168,7 +168,6 @@ class Meta:
168168 __repr__ = sane_repr ("object_name" , "cpu_name" , "debug_id" )
169169
170170 def get_checksum (self ) -> str :
171- """Returns the SHA-1 checksum, reading from the denormalized column or the linked File."""
172171 if self .storage_path is not None :
173172 assert self .checksum is not None
174173 return self .checksum
@@ -178,7 +177,6 @@ def get_checksum(self) -> str:
178177 raise ValueError ("ProjectDebugFile has neither file nor storage_path" )
179178
180179 def get_content_type (self ) -> str :
181- """Returns the MIME content type, reading from the denormalized column or the linked File."""
182180 if self .storage_path is not None :
183181 assert self .content_type is not None
184182 return str (self .content_type )
@@ -187,7 +185,6 @@ def get_content_type(self) -> str:
187185 raise ValueError ("ProjectDebugFile has neither file nor storage_path" )
188186
189187 def get_file_size (self ) -> int :
190- """Returns the file size in bytes, reading from the denormalized column or the linked File."""
191188 if self .storage_path is not None :
192189 assert self .file_size is not None
193190 return int (self .file_size )
@@ -196,7 +193,6 @@ def get_file_size(self) -> int:
196193 raise ValueError ("ProjectDebugFile has neither file nor storage_path" )
197194
198195 def get_date_created (self ) -> datetime :
199- """Returns the creation timestamp, reading from the denormalized column or the linked File."""
200196 if self .storage_path is not None :
201197 assert self .date_created is not None
202198 return self .date_created
@@ -205,7 +201,6 @@ def get_date_created(self) -> datetime:
205201 raise ValueError ("ProjectDebugFile has neither file nor storage_path" )
206202
207203 def get_headers (self ) -> dict [str , str ]:
208- """Returns file headers (currently just Content-Type), from the denormalized column or the linked File."""
209204 if self .storage_path is not None :
210205 assert self .content_type is not None
211206 return {"Content-Type" : self .content_type }
@@ -264,14 +259,12 @@ def features(self) -> frozenset[str]:
264259 return frozenset ((self .data or {}).get ("features" , []))
265260
266261 def _get_objectstore_session (self ) -> Session :
267- """Returns an Objectstore session scoped to this debug file's project."""
268262 from sentry .models .project import Project
269263
270264 org_id = Project .objects .get_from_cache (id = self .project_id ).organization_id
271265 return get_debug_files_session (org = org_id , project = self .project_id )
272266
273267 def getfile (self ) -> IO [bytes ]:
274- """Returns a file-like object with the debug file contents, from Objectstore or the linked File."""
275268 if self .storage_path is not None :
276269 try :
277270 response = self ._get_objectstore_session ().get (self .storage_path )
@@ -284,7 +277,6 @@ def getfile(self) -> IO[bytes]:
284277 raise ValueError ("ProjectDebugFile has neither file nor storage_path" )
285278
286279 def save_to (self , path : str ) -> None :
287- """Downloads the debug file contents to a local path, atomically via a temp file."""
288280 if self .storage_path is not None :
289281 path = os .path .abspath (path )
290282 base = os .path .dirname (path )
@@ -404,57 +396,48 @@ def create_dif_from_file(
404396 return create_dif_from_id (project , result [0 ], file = file )
405397
406398
407- def _put_objectstore_part_with_retry (
408- upload : MultipartUpload , chunk : bytes , part_number : int
409- ) -> CompletePart :
410- """Uploads a single multipart part, retrying with exponential backoff on transient errors."""
411- for attempt in range (OBJECTSTORE_MULTIPART_MAX_RETRIES ):
412- try :
413- return upload .put_part (chunk , part_number = part_number , content_length = len (chunk ))
414- except (RequestError , HTTPError ):
415- if attempt == OBJECTSTORE_MULTIPART_MAX_RETRIES - 1 :
416- raise
417- time .sleep (2 ** attempt )
418-
419- raise AssertionError ("unreachable" )
420-
421-
422399def _upload_dif_to_objectstore (
423400 session : Session ,
424401 fileobj : IO [bytes ],
425402 content_type : str ,
426403 file_size : int ,
427- ) -> tuple [str , int ]:
428- """Uploads a debug file to Objectstore via parallel multipart upload, aborting on failure.
429-
430- Parts are uploaded concurrently with up to OBJECTSTORE_MULTIPART_MAX_WORKERS threads.
431- Each worker reads its chunk from fileobj under a lock, so memory usage is bounded to
432- roughly max_workers * OBJECTSTORE_MULTIPART_PART_SIZE.
433-
434- Returns (storage_path, uploaded_size).
435- """
436- from sentry .utils .concurrent import ContextPropagatingThreadPoolExecutor
437-
404+ ) -> str :
405+ """Uploads a debug file to Objectstore via parallel multipart upload, returning the key under which the file was uploaded."""
438406 upload = session .initiate_multipart_upload (content_type = content_type )
439- read_lock = threading .Lock ()
407+
408+ lock = threading .Lock ()
440409 num_parts = max (1 , math .ceil (file_size / OBJECTSTORE_MULTIPART_PART_SIZE ))
441410
442- def _read_and_upload_part (part_number : int ) -> CompletePart | None :
443- with read_lock :
411+ def put_part_with_retry (
412+ upload : MultipartUpload , chunk : bytes , part_number : int
413+ ) -> CompletePart :
414+ for attempt in range (3 ):
415+ try :
416+ return upload .put_part (chunk , part_number = part_number , content_length = len (chunk ))
417+ except (RequestError , HTTPError ):
418+ if attempt == 2 :
419+ raise
420+ time .sleep (2 ** attempt )
421+ raise AssertionError ("unreachable" )
422+
423+ def read_and_put_part (part_number : int ) -> CompletePart | None :
424+ offset = (part_number - 1 ) * OBJECTSTORE_MULTIPART_PART_SIZE
425+ with lock :
426+ fileobj .seek (offset )
444427 chunk = fileobj .read (OBJECTSTORE_MULTIPART_PART_SIZE )
445428 if not chunk :
446429 return None
447- return _put_objectstore_part_with_retry (upload , chunk , part_number )
430+ return put_part_with_retry (upload , chunk , part_number )
448431
449432 try :
450433 with ContextPropagatingThreadPoolExecutor (
451- max_workers = OBJECTSTORE_MULTIPART_MAX_WORKERS
434+ max_workers = 4 ,
452435 ) as executor :
453- futures = [executor .submit (_read_and_upload_part , i + 1 ) for i in range (num_parts )]
436+ futures = [executor .submit (read_and_put_part , i + 1 ) for i in range (num_parts )]
454437 parts = [part for f in futures if (part := f .result ()) is not None ]
455438
456439 storage_path = upload .complete (parts )
457- return storage_path , file_size
440+ return storage_path
458441 except Exception :
459442 try :
460443 upload .abort ()
@@ -554,19 +537,17 @@ def create_dif_from_id(
554537 session = get_debug_files_session (project .organization_id , project .id )
555538 if file is not None :
556539 with file .getfile () as source_fileobj :
557- storage_path , uploaded_file_size = _upload_dif_to_objectstore (
540+ storage_path = _upload_dif_to_objectstore (
558541 session , source_fileobj , content_type , file_size
559542 )
560543 elif fileobj is not None :
561- storage_path , uploaded_file_size = _upload_dif_to_objectstore (
562- session , fileobj , content_type , file_size
563- )
544+ storage_path = _upload_dif_to_objectstore (session , fileobj , content_type , file_size )
564545 else :
565546 raise RuntimeError ("missing file object" )
566547
567548 metrics .distribution (
568549 "storage.put.size" ,
569- uploaded_file_size ,
550+ file_size ,
570551 tags = {"usecase" : "debug-files" , "compression" : "none" },
571552 unit = "byte" ,
572553 )
@@ -575,7 +556,7 @@ def create_dif_from_id(
575556 file = None ,
576557 storage_path = storage_path ,
577558 content_type = content_type ,
578- file_size = uploaded_file_size ,
559+ file_size = file_size ,
579560 date_created = timezone .now (),
580561 checksum = checksum ,
581562 debug_id = meta .debug_id ,
0 commit comments