Skip to content

Commit 6ea3ea8

Browse files
committed
Add tests
1 parent d20c269 commit 6ea3ea8

File tree

2 files changed

+163
-15
lines changed

2 files changed

+163
-15
lines changed

src/borg/archive.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -720,9 +720,17 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
720720
pass
721721

722722
def compare_and_extract_chunks(self, item, fs_path):
723-
fs_path = os.path.normpath(fs_path.replace(self.cwd + os.sep, "", 1))
724-
fs_path = os.path.join(self.cwd, fs_path)
725-
print(f"Starting chunk comparison for {fs_path}")
723+
print(f"Initial fs_path: {fs_path}")
724+
print(f"self.cwd: {self.cwd}")
725+
if fs_path.startswith(self.cwd):
726+
fs_path = fs_path[len(self.cwd) :].lstrip(os.sep)
727+
print(f"Relative fs_path: {fs_path}")
728+
729+
# Construct the final path
730+
fs_path = os.path.normpath(os.path.join(self.cwd, fs_path))
731+
print(f"Final fs_path: {fs_path}")
732+
print(f"File exists at final path: {os.path.isfile(fs_path)}")
733+
726734
os.makedirs(os.path.dirname(fs_path), exist_ok=True)
727735
try:
728736
if os.path.isfile(fs_path):
@@ -731,45 +739,49 @@ def compare_and_extract_chunks(self, item, fs_path):
731739
for chunk_entry in item.chunks:
732740
chunkid_A = chunk_entry.id
733741
size = chunk_entry.size
742+
print(f"Processing chunk at offset {chunk_offset}")
734743

735744
fs_file.seek(chunk_offset)
736745
data_F = fs_file.read(size)
746+
print(f"Read {len(data_F)} bytes at offset {chunk_offset}")
747+
print(f"File content: {data_F[:20]}...") # Show first 20 bytes
737748

738749
if len(data_F) == size:
739750
chunkid_F = self.key.id_hash(data_F)
751+
print("Comparing hashes:") # Debug
752+
print(f"Archive hash: {chunkid_A.hex()}") # Debug
753+
print(f"File hash: {chunkid_F.hex()}") # Debug
754+
print(f"Hashes match? {chunkid_A == chunkid_F}")
740755
if chunkid_A != chunkid_F:
756+
print("Hashes don't match, fetching new chunk") # Debug
741757
fs_file.seek(chunk_offset) # Go back to the start of the chunk
742758
chunk_data = b"".join(self.pipeline.fetch_many([chunkid_A], ro_type=ROBJ_FILE_STREAM))
759+
print(f"Fetched content: {chunk_data[:20]}...")
743760
fs_file.write(chunk_data)
761+
fs_file.flush()
762+
print("Wrote and flushed new chunk data")
744763
else:
764+
print(f"Chunk size mismatch at offset {chunk_offset}")
745765
fs_file.seek(chunk_offset)
746766
chunk_data = b"".join(self.pipeline.fetch_many([chunkid_A], ro_type=ROBJ_FILE_STREAM))
747767
fs_file.write(chunk_data)
748768

749769
chunk_offset += size
750770

751771
fs_file.truncate(item.size)
772+
print(f"\nFinal file size: {os.path.getsize(fs_path)}")
773+
with open(fs_path, "rb") as f:
774+
print(f"Final content: {f.read()[:20]}...")
752775
else:
753776
with open(fs_path, "wb") as fs_file:
754777
for chunk_entry in item.chunks:
755778
chunk_data = b"".join(self.pipeline.fetch_many([chunk_entry.id], ro_type=ROBJ_FILE_STREAM))
756779
fs_file.write(chunk_data)
757780
fs_file.truncate(item.size)
758781

759-
total_size = 0
760-
chunk_size = 8192
761782
with open(fs_path, "rb") as fs_file:
762-
while True:
763-
chunk = fs_file.read(chunk_size)
764-
if not chunk:
765-
break
766-
total_size += len(chunk)
767-
if total_size > item.size:
768-
break
769-
770-
fs_file.seek(0)
771783
preview = fs_file.read(50)
772-
print(f"Final file size: {total_size}, Expected: {item.size}")
784+
print(f"Final file size: {os.path.getsize(fs_path)}, Expected: {item.size}")
773785
print(f"Content preview (text): {preview.decode('utf-8', errors='replace')}")
774786

775787
except OSError as e:

src/borg/testsuite/archive_test.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ def add_chunk(self, id, meta, data, stats=None, wait=True, ro_type=None):
132132
self.objects[id] = data
133133
return id, len(data)
134134

135+
def fetch_many(self, ids, ro_type=None):
136+
"""Mock implementation of fetch_many"""
137+
for id in ids:
138+
yield self.objects[id]
139+
135140

136141
def test_cache_chunk_buffer():
137142
data = [Item(path="p1"), Item(path="p2")]
@@ -402,3 +407,134 @@ def test_reject_non_sanitized_item():
402407
for path in rejected_dotdot_paths:
403408
with pytest.raises(ValueError, match="unexpected '..' element in path"):
404409
Item(path=path, user="root", group="root")
410+
411+
412+
def test_compare_and_extract_chunks(tmpdir, monkeypatch):
413+
"""Test chunk comparison and selective extraction with fixed-size chunks"""
414+
# Setup mock repository and key
415+
repository = Mock()
416+
key = PlaintextKey(repository)
417+
manifest = Manifest(key, repository)
418+
419+
cache = MockCache()
420+
421+
# Create a test file with known content divided into 512-byte chunks
422+
chunk_size = 512
423+
test_data = b"block" * 128 # 640 bytes - will create 2 chunks
424+
original_file = tmpdir.join("test.txt")
425+
original_file.write_binary(test_data)
426+
427+
# Create mock item with chunks
428+
chunks = []
429+
for i in range(0, len(test_data), chunk_size):
430+
chunk_data = test_data[i : i + chunk_size]
431+
chunk_id = key.id_hash(chunk_data)
432+
chunks.append(Mock(id=chunk_id, size=len(chunk_data)))
433+
cache.objects[chunk_id] = chunk_data
434+
435+
item = Mock(chunks=chunks, size=len(test_data))
436+
437+
# Test case 1: File doesn't exist (full extraction)
438+
extractor = Archive(manifest=manifest, name="test", create=True)
439+
extractor.pipeline = cache
440+
extractor.key = key
441+
extractor.cwd = str(tmpdir)
442+
443+
target_path = str(tmpdir.join("extracted.txt"))
444+
extractor.compare_and_extract_chunks(item, target_path)
445+
446+
with open(target_path, "rb") as f:
447+
assert f.read() == test_data
448+
449+
# Test case 2: File exists with partially matching chunks
450+
modified_data = test_data[:256] + b"modified" + test_data[264:]
451+
with open(target_path, "wb") as f:
452+
f.write(modified_data)
453+
454+
extractor.compare_and_extract_chunks(item, target_path)
455+
456+
with open(target_path, "rb") as f:
457+
extracted = f.read()
458+
assert extracted == test_data
459+
assert extracted != modified_data
460+
461+
# Test case 3: File exists with all matching chunks
462+
extractor.compare_and_extract_chunks(item, target_path)
463+
with open(target_path, "rb") as f:
464+
assert f.read() == test_data
465+
466+
467+
def test_compare_and_extract_chunks_size_mismatch(tmpdir):
468+
"""Test chunk comparison when file size doesn't match chunk size"""
469+
repository = Mock()
470+
key = PlaintextKey(repository)
471+
manifest = Manifest(key, repository)
472+
473+
cache = MockCache()
474+
475+
# Create a smaller file than expected
476+
test_data = b"block" * 64 # 320 bytes
477+
expected_data = b"block" * 128 # 640 bytes
478+
479+
original_file = tmpdir.join("test.txt")
480+
original_file.write_binary(test_data)
481+
482+
# Create mock item with chunks expecting larger size
483+
chunks = []
484+
for i in range(0, len(expected_data), 512):
485+
chunk_data = expected_data[i : i + 512]
486+
chunk_id = key.id_hash(chunk_data)
487+
chunks.append(Mock(id=chunk_id, size=len(chunk_data)))
488+
cache.objects[chunk_id] = chunk_data
489+
490+
item = Mock(chunks=chunks, size=len(expected_data))
491+
492+
# Test extraction
493+
extractor = Archive(manifest=manifest, name="test", create=True)
494+
extractor.pipeline = cache
495+
extractor.key = key
496+
extractor.cwd = str(tmpdir)
497+
498+
target_path = str(original_file)
499+
extractor.compare_and_extract_chunks(item, target_path)
500+
501+
with open(target_path, "rb") as f:
502+
assert f.read() == expected_data
503+
504+
505+
def test_compare_and_extract_chunks_partial_chunk(tmpdir):
506+
"""Test chunk comparison with a final partial chunk"""
507+
repository = Mock()
508+
key = PlaintextKey(repository)
509+
manifest = Manifest(key, repository)
510+
511+
cache = MockCache()
512+
513+
# Create data that doesn't align with chunk boundaries
514+
chunk_size = 512
515+
test_data = b"block" * 130 # 650 bytes - will create 2 chunks, second one partial
516+
517+
original_file = tmpdir.join("test.txt")
518+
original_file.write_binary(test_data)
519+
520+
# Create mock item with chunks
521+
chunks = []
522+
for i in range(0, len(test_data), chunk_size):
523+
chunk_data = test_data[i : i + chunk_size]
524+
chunk_id = key.id_hash(chunk_data)
525+
chunks.append(Mock(id=chunk_id, size=len(chunk_data)))
526+
cache.objects[chunk_id] = chunk_data
527+
528+
item = Mock(chunks=chunks, size=len(test_data))
529+
530+
# Test extraction
531+
extractor = Archive(manifest=manifest, name="test", create=True)
532+
extractor.pipeline = cache
533+
extractor.key = key
534+
extractor.cwd = str(tmpdir)
535+
536+
target_path = str(tmpdir.join("extracted.txt"))
537+
extractor.compare_and_extract_chunks(item, target_path)
538+
539+
with open(target_path, "rb") as f:
540+
assert f.read() == test_data

0 commit comments

Comments
 (0)