@@ -720,76 +720,38 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
720720 pass
721721
722722 def compare_and_extract_chunks (self , item , fs_path ):
723- print (f"Initial fs_path: { fs_path } " )
724- print (f"self.cwd: { self .cwd } " )
725- if fs_path .startswith (self .cwd ):
726- fs_path = fs_path [len (self .cwd ) :].lstrip (os .sep )
727- print (f"Relative fs_path: { fs_path } " )
728-
729- # Construct the final path
730- fs_path = os .path .normpath (os .path .join (self .cwd , fs_path ))
731- print (f"Final fs_path: { fs_path } " )
732- print (f"File exists at final path: { os .path .isfile (fs_path )} " )
733-
734- os .makedirs (os .path .dirname (fs_path ), exist_ok = True )
723+ """Compare file chunks and patch if needed. Returns True if patching succeeded."""
735724 try :
736- if os .path .isfile (fs_path ):
737- with open (fs_path , "rb+" ) as fs_file :
738- chunk_offset = 0
739- for chunk_entry in item .chunks :
740- chunkid_A = chunk_entry .id
741- size = chunk_entry .size
742- print (f"Processing chunk at offset { chunk_offset } " )
725+ st = os .stat (fs_path , follow_symlinks = False )
726+ if not stat .S_ISREG (st .st_mode ):
727+ return False
743728
744- fs_file .seek (chunk_offset )
745- data_F = fs_file .read (size )
746- print (f"Read { len (data_F )} bytes at offset { chunk_offset } " )
747- print (f"File content: { data_F [:20 ]} ..." ) # Show first 20 bytes
748-
749- if len (data_F ) == size :
750- chunkid_F = self .key .id_hash (data_F )
751- print ("Comparing hashes:" ) # Debug
752- print (f"Archive hash: { chunkid_A .hex ()} " ) # Debug
753- print (f"File hash: { chunkid_F .hex ()} " ) # Debug
754- print (f"Hashes match? { chunkid_A == chunkid_F } " )
755- if chunkid_A != chunkid_F :
756- print ("Hashes don't match, fetching new chunk" ) # Debug
757- fs_file .seek (chunk_offset ) # Go back to the start of the chunk
758- chunk_data = b"" .join (self .pipeline .fetch_many ([chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
759- print (f"Fetched content: { chunk_data [:20 ]} ..." )
760- fs_file .write (chunk_data )
761- fs_file .flush ()
762- print ("Wrote and flushed new chunk data" )
763- else :
764- print (f"Chunk size mismatch at offset { chunk_offset } " )
765- fs_file .seek (chunk_offset )
766- chunk_data = b"" .join (self .pipeline .fetch_many ([chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
767- fs_file .write (chunk_data )
729+ with open (fs_path , "rb+" ) as fs_file :
730+ chunk_offset = 0
731+ for chunk_entry in item .chunks :
732+ chunkid_A = chunk_entry .id
733+ size = chunk_entry .size
768734
769- chunk_offset += size
735+ fs_file .seek (chunk_offset )
736+ data_F = fs_file .read (size )
770737
771- fs_file . truncate ( item . size )
772- print ( f" \n Final file size: { os . path . getsize ( fs_path ) } " )
773- with open ( fs_path , "rb" ) as f :
774- print ( f"Final content: { f . read ()[: 20 ] } ..." )
775- else :
776- with open ( fs_path , "wb" ) as fs_file :
777- for chunk_entry in item . chunks :
778- chunk_data = b"" . join ( self . pipeline . fetch_many ([ chunk_entry . id ], ro_type = ROBJ_FILE_STREAM ) )
738+ needs_update = True
739+ if len ( data_F ) == size :
740+ chunkid_F = self . key . id_hash ( data_F )
741+ needs_update = chunkid_A != chunkid_F
742+
743+ if needs_update :
744+ chunk_data = b"" . join ( self . pipeline . fetch_many ([ chunkid_A ], ro_type = ROBJ_FILE_STREAM ))
745+ fs_file . seek ( chunk_offset )
779746 fs_file .write (chunk_data )
780- fs_file .truncate (item .size )
781747
782- with open (fs_path , "rb" ) as fs_file :
783- preview = fs_file .read (50 )
784- print (f"Final file size: { os .path .getsize (fs_path )} , Expected: { item .size } " )
785- print (f"Content preview (text): { preview .decode ('utf-8' , errors = 'replace' )} " )
748+ chunk_offset += size
786749
787- except OSError as e :
788- print (f"IO error processing { fs_path } : { e } " )
789- raise
790- except Exception as e :
791- print (f"Error processing { fs_path } : { str (e )} " )
792- raise
750+ fs_file .truncate (item .size )
751+ return True
752+
753+ except (OSError , Exception ):
754+ return False
793755
794756 def extract_item (
795757 self ,
@@ -802,7 +764,6 @@ def extract_item(
802764 hlm = None ,
803765 pi = None ,
804766 continue_extraction = False ,
805- check_existing = False ,
806767 ):
807768 """
808769 Extract archive item.
@@ -815,7 +776,6 @@ def extract_item(
815776 :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
816777 :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
817778 :param continue_extraction: continue a previously interrupted extraction of same archive
818- :param check_existing: check against existing file/block device and only retrieve changed data
819779 """
820780
821781 def same_item (item , st ):
@@ -836,16 +796,6 @@ def same_item(item, st):
836796 # if a previous extraction was interrupted between setting the mtime and setting non-default flags.
837797 return True
838798
839- if check_existing :
840- dest = os .path .normpath (self .cwd )
841- fs_path = os .path .join (dest , item .path )
842-
843- if not os .path .normpath (fs_path ).startswith (dest ):
844- raise Exception (f"Path { fs_path } is outside of extraction directory { dest } " )
845-
846- self .compare_and_extract_chunks (item , fs_path )
847- return
848-
849799 has_damaged_chunks = "chunks_healthy" in item
850800 if dry_run or stdout :
851801 with self .extract_helper (item , "" , hlm , dry_run = dry_run or stdout ) as hardlink_set :
@@ -905,6 +855,9 @@ def make_parent(path):
905855 with self .extract_helper (item , path , hlm ) as hardlink_set :
906856 if hardlink_set :
907857 return
858+ if self .compare_and_extract_chunks (item , path ):
859+ return
860+
908861 with backup_io ("open" ):
909862 fd = open (path , "wb" )
910863 with fd :
0 commit comments