Skip to content

Commit 05c86b2

Browse files
tests: add tests for stats consistency, #8898 #9003
add tests ensuring: - borg info and create report same "This archive" deduplicated size - before/after borg recreate it reports same "This archive" deduplicated size - this/all archive(s) stats are same if 1 archive is in repo - all archives stats is same for borg create and borg info note that some stats differences are expected.
1 parent cec8d87 commit 05c86b2

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

src/borg/testsuite/archiver.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,68 @@ def test_info(self):
16171617
info_archive = self.cmd('info', '--first', '1', self.repository_location)
16181618
assert 'Archive name: test\n' in info_archive
16191619

1620+
def test_info_matches_create_deduplicated_size(self):
1621+
# Create two identical files to ensure intra-archive deduplication happens,
1622+
# so the deduplicated size is visibly different from compressed size.
1623+
data = b'X' * (1024 * 80)
1624+
self.create_regular_file('file1', contents=data)
1625+
self.create_regular_file('file2', contents=data)
1626+
self.cmd('init', '--encryption=repokey', self.repository_location)
1627+
create_json = json.loads(self.cmd('create', '--json', self.repository_location + '::test', 'input'))
1628+
info_json = json.loads(self.cmd('info', '--json', self.repository_location + '::test'))
1629+
1630+
# Compare deduplicated size of "This archive" between create and info.
1631+
dedup_create = create_json['archive']['stats']['deduplicated_size']
1632+
assert len(info_json['archives']) == 1
1633+
dedup_info = info_json['archives'][0]['stats']['deduplicated_size']
1634+
assert dedup_create == dedup_info
1635+
1636+
def test_info_matches_create_all_archives_deduplicated_size(self):
1637+
# Verify that the "All archives" deduplicated size reported by `borg create --json`
1638+
# matches the one reported by `borg info --json <repo>`.
1639+
data = b'Y' * (1024 * 80)
1640+
self.create_regular_file('file1', contents=data)
1641+
self.create_regular_file('file2', contents=data)
1642+
self.cmd('init', '--encryption=repokey', self.repository_location)
1643+
# Create two archives so that the "All archives" section is meaningful and stable.
1644+
self.cmd('create', '--json', self.repository_location + '::a1', 'input')
1645+
create_json2 = json.loads(self.cmd('create', '--json', self.repository_location + '::a2', 'input'))
1646+
info_repo_json = json.loads(self.cmd('info', '--json', self.repository_location))
1647+
1648+
# In JSON, the repository-wide deduplicated size corresponds to cache.stats.unique_size.
1649+
dedup_create_all = create_json2['cache']['stats']['unique_size']
1650+
dedup_info_all = info_repo_json['cache']['stats']['unique_size']
1651+
assert dedup_create_all == dedup_info_all
1652+
1653+
def test_single_archive_all_equals_this_info_and_create(self):
1654+
# With a single archive in the repository, the deduplicated size for
1655+
# "This archive" and "All archives" should be identical, both in
1656+
# `borg create --json` and in `borg info --json` (repo and archive views).
1657+
data = b'Z' * (1024 * 80)
1658+
self.create_regular_file('file1', contents=data)
1659+
self.create_regular_file('file2', contents=data)
1660+
self.cmd('init', '--encryption=repokey', self.repository_location)
1661+
create_json = json.loads(self.cmd('create', '--json', self.repository_location + '::only', 'input'))
1662+
1663+
# From create --json
1664+
dedup_this_create = create_json['archive']['stats']['deduplicated_size']
1665+
dedup_all_create = create_json['cache']['stats']['unique_size']
1666+
1667+
# From info --json (archive and repository views)
1668+
info_archive_json = json.loads(self.cmd('info', '--json', self.repository_location + '::only'))
1669+
info_repo_json = json.loads(self.cmd('info', '--json', self.repository_location))
1670+
assert len(info_archive_json['archives']) == 1
1671+
dedup_this_info = info_archive_json['archives'][0]['stats']['deduplicated_size']
1672+
dedup_all_info = info_repo_json['cache']['stats']['unique_size']
1673+
1674+
# create and info shall give the same numbers
1675+
assert dedup_this_create == dedup_this_info
1676+
assert dedup_all_create == dedup_all_info
1677+
# accounting for "all archives" includes metadata chunks, for "this archive" it does not,
1678+
# thus a mismatch is expected.
1679+
assert dedup_this_create != dedup_all_create
1680+
assert dedup_this_info != dedup_all_info
1681+
16201682
def test_info_json(self):
16211683
self.create_regular_file('file1', size=1024 * 80)
16221684
self.cmd('init', '--encryption=repokey', self.repository_location)
@@ -4021,6 +4083,25 @@ def test_exit_codes(self):
40214083
self.cmd('create', self.repository_location + '::archive', 'input', fork=True,
40224084
exit_code=Repository.InvalidRepository.exit_mcode)
40234085

4086+
def test_original_size_stable_across_recreate(self):
4087+
# Test that changes in archive metadata (like number of chunks) do not influence the original size.
4088+
self.cmd('init', '--encryption=repokey', self.repository_location)
4089+
4090+
def original_size(archive_name):
4091+
info = json.loads(self.cmd('info', '--json', f"{self.repository_location}::{archive_name}"))
4092+
return info['archives'][0]['stats']['original_size']
4093+
4094+
sizes = [12345, 67890]
4095+
self.create_regular_file('file1', size=sizes[0])
4096+
self.create_regular_file('file2', size=sizes[1])
4097+
4098+
self.cmd('create', '--compression=none', self.repository_location + '::archive', 'input')
4099+
assert original_size('archive') == sum(sizes)
4100+
4101+
# Recreate with different chunker params to try to reproduce #8898.
4102+
self.cmd('recreate', '--chunker-params=10,12,11,63', self.repository_location + '::archive')
4103+
assert original_size('archive') == sum(sizes)
4104+
40244105

40254106
@unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
40264107
class ArchiverTestCaseBinary(ArchiverTestCase):

0 commit comments

Comments
 (0)