@@ -132,6 +132,11 @@ def add_chunk(self, id, meta, data, stats=None, wait=True, ro_type=None):
132132 self .objects [id ] = data
133133 return id , len (data )
134134
135+ def fetch_many (self , ids , ro_type = None ):
136+ """Mock implementation of fetch_many"""
137+ for id in ids :
138+ yield self .objects [id ]
139+
135140
136141def test_cache_chunk_buffer ():
137142 data = [Item (path = "p1" ), Item (path = "p2" )]
@@ -402,3 +407,134 @@ def test_reject_non_sanitized_item():
402407 for path in rejected_dotdot_paths :
403408 with pytest .raises (ValueError , match = "unexpected '..' element in path" ):
404409 Item (path = path , user = "root" , group = "root" )
410+
411+
412+ def test_compare_and_extract_chunks (tmpdir , monkeypatch ):
413+ """Test chunk comparison and selective extraction with fixed-size chunks"""
414+ # Setup mock repository and key
415+ repository = Mock ()
416+ key = PlaintextKey (repository )
417+ manifest = Manifest (key , repository )
418+
419+ cache = MockCache ()
420+
421+ # Create a test file with known content divided into 512-byte chunks
422+ chunk_size = 512
423+ test_data = b"block" * 128 # 640 bytes - will create 2 chunks
424+ original_file = tmpdir .join ("test.txt" )
425+ original_file .write_binary (test_data )
426+
427+ # Create mock item with chunks
428+ chunks = []
429+ for i in range (0 , len (test_data ), chunk_size ):
430+ chunk_data = test_data [i : i + chunk_size ]
431+ chunk_id = key .id_hash (chunk_data )
432+ chunks .append (Mock (id = chunk_id , size = len (chunk_data )))
433+ cache .objects [chunk_id ] = chunk_data
434+
435+ item = Mock (chunks = chunks , size = len (test_data ))
436+
437+ # Test case 1: File doesn't exist (full extraction)
438+ extractor = Archive (manifest = manifest , name = "test" , create = True )
439+ extractor .pipeline = cache
440+ extractor .key = key
441+ extractor .cwd = str (tmpdir )
442+
443+ target_path = str (tmpdir .join ("extracted.txt" ))
444+ extractor .compare_and_extract_chunks (item , target_path )
445+
446+ with open (target_path , "rb" ) as f :
447+ assert f .read () == test_data
448+
449+ # Test case 2: File exists with partially matching chunks
450+ modified_data = test_data [:256 ] + b"modified" + test_data [264 :]
451+ with open (target_path , "wb" ) as f :
452+ f .write (modified_data )
453+
454+ extractor .compare_and_extract_chunks (item , target_path )
455+
456+ with open (target_path , "rb" ) as f :
457+ extracted = f .read ()
458+ assert extracted == test_data
459+ assert extracted != modified_data
460+
461+ # Test case 3: File exists with all matching chunks
462+ extractor .compare_and_extract_chunks (item , target_path )
463+ with open (target_path , "rb" ) as f :
464+ assert f .read () == test_data
465+
466+
467+ def test_compare_and_extract_chunks_size_mismatch (tmpdir ):
468+ """Test chunk comparison when file size doesn't match chunk size"""
469+ repository = Mock ()
470+ key = PlaintextKey (repository )
471+ manifest = Manifest (key , repository )
472+
473+ cache = MockCache ()
474+
475+ # Create a smaller file than expected
476+ test_data = b"block" * 64 # 320 bytes
477+ expected_data = b"block" * 128 # 640 bytes
478+
479+ original_file = tmpdir .join ("test.txt" )
480+ original_file .write_binary (test_data )
481+
482+ # Create mock item with chunks expecting larger size
483+ chunks = []
484+ for i in range (0 , len (expected_data ), 512 ):
485+ chunk_data = expected_data [i : i + 512 ]
486+ chunk_id = key .id_hash (chunk_data )
487+ chunks .append (Mock (id = chunk_id , size = len (chunk_data )))
488+ cache .objects [chunk_id ] = chunk_data
489+
490+ item = Mock (chunks = chunks , size = len (expected_data ))
491+
492+ # Test extraction
493+ extractor = Archive (manifest = manifest , name = "test" , create = True )
494+ extractor .pipeline = cache
495+ extractor .key = key
496+ extractor .cwd = str (tmpdir )
497+
498+ target_path = str (original_file )
499+ extractor .compare_and_extract_chunks (item , target_path )
500+
501+ with open (target_path , "rb" ) as f :
502+ assert f .read () == expected_data
503+
504+
505+ def test_compare_and_extract_chunks_partial_chunk (tmpdir ):
506+ """Test chunk comparison with a final partial chunk"""
507+ repository = Mock ()
508+ key = PlaintextKey (repository )
509+ manifest = Manifest (key , repository )
510+
511+ cache = MockCache ()
512+
513+ # Create data that doesn't align with chunk boundaries
514+ chunk_size = 512
515+ test_data = b"block" * 130 # 650 bytes - will create 2 chunks, second one partial
516+
517+ original_file = tmpdir .join ("test.txt" )
518+ original_file .write_binary (test_data )
519+
520+ # Create mock item with chunks
521+ chunks = []
522+ for i in range (0 , len (test_data ), chunk_size ):
523+ chunk_data = test_data [i : i + chunk_size ]
524+ chunk_id = key .id_hash (chunk_data )
525+ chunks .append (Mock (id = chunk_id , size = len (chunk_data )))
526+ cache .objects [chunk_id ] = chunk_data
527+
528+ item = Mock (chunks = chunks , size = len (test_data ))
529+
530+ # Test extraction
531+ extractor = Archive (manifest = manifest , name = "test" , create = True )
532+ extractor .pipeline = cache
533+ extractor .key = key
534+ extractor .cwd = str (tmpdir )
535+
536+ target_path = str (tmpdir .join ("extracted.txt" ))
537+ extractor .compare_and_extract_chunks (item , target_path )
538+
539+ with open (target_path , "rb" ) as f :
540+ assert f .read () == test_data
0 commit comments