@@ -1617,6 +1617,68 @@ def test_info(self):
16171617 info_archive = self .cmd ('info' , '--first' , '1' , self .repository_location )
16181618 assert 'Archive name: test\n ' in info_archive
16191619
1620+ def test_info_matches_create_deduplicated_size (self ):
1621+ # Create two identical files to ensure intra-archive deduplication happens,
1622+ # so the deduplicated size is visibly different from compressed size.
1623+ data = b'X' * (1024 * 80 )
1624+ self .create_regular_file ('file1' , contents = data )
1625+ self .create_regular_file ('file2' , contents = data )
1626+ self .cmd ('init' , '--encryption=repokey' , self .repository_location )
1627+ create_json = json .loads (self .cmd ('create' , '--json' , self .repository_location + '::test' , 'input' ))
1628+ info_json = json .loads (self .cmd ('info' , '--json' , self .repository_location + '::test' ))
1629+
1630+ # Compare deduplicated size of "This archive" between create and info.
1631+ dedup_create = create_json ['archive' ]['stats' ]['deduplicated_size' ]
1632+ assert len (info_json ['archives' ]) == 1
1633+ dedup_info = info_json ['archives' ][0 ]['stats' ]['deduplicated_size' ]
1634+ assert dedup_create == dedup_info
1635+
1636+ def test_info_matches_create_all_archives_deduplicated_size (self ):
1637+ # Verify that the "All archives" deduplicated size reported by `borg create --json`
1638+ # matches the one reported by `borg info --json <repo>`.
1639+ data = b'Y' * (1024 * 80 )
1640+ self .create_regular_file ('file1' , contents = data )
1641+ self .create_regular_file ('file2' , contents = data )
1642+ self .cmd ('init' , '--encryption=repokey' , self .repository_location )
1643+ # Create two archives so that the "All archives" section is meaningful and stable.
1644+ self .cmd ('create' , '--json' , self .repository_location + '::a1' , 'input' )
1645+ create_json2 = json .loads (self .cmd ('create' , '--json' , self .repository_location + '::a2' , 'input' ))
1646+ info_repo_json = json .loads (self .cmd ('info' , '--json' , self .repository_location ))
1647+
1648+ # In JSON, the repository-wide deduplicated size corresponds to cache.stats.unique_size.
1649+ dedup_create_all = create_json2 ['cache' ]['stats' ]['unique_size' ]
1650+ dedup_info_all = info_repo_json ['cache' ]['stats' ]['unique_size' ]
1651+ assert dedup_create_all == dedup_info_all
1652+
1653+ def test_single_archive_all_equals_this_info_and_create (self ):
1654+ # With a single archive in the repository, the deduplicated size for
1655+ # "This archive" and "All archives" should be identical, both in
1656+ # `borg create --json` and in `borg info --json` (repo and archive views).
1657+ data = b'Z' * (1024 * 80 )
1658+ self .create_regular_file ('file1' , contents = data )
1659+ self .create_regular_file ('file2' , contents = data )
1660+ self .cmd ('init' , '--encryption=repokey' , self .repository_location )
1661+ create_json = json .loads (self .cmd ('create' , '--json' , self .repository_location + '::only' , 'input' ))
1662+
1663+ # From create --json
1664+ dedup_this_create = create_json ['archive' ]['stats' ]['deduplicated_size' ]
1665+ dedup_all_create = create_json ['cache' ]['stats' ]['unique_size' ]
1666+
1667+ # From info --json (archive and repository views)
1668+ info_archive_json = json .loads (self .cmd ('info' , '--json' , self .repository_location + '::only' ))
1669+ info_repo_json = json .loads (self .cmd ('info' , '--json' , self .repository_location ))
1670+ assert len (info_archive_json ['archives' ]) == 1
1671+ dedup_this_info = info_archive_json ['archives' ][0 ]['stats' ]['deduplicated_size' ]
1672+ dedup_all_info = info_repo_json ['cache' ]['stats' ]['unique_size' ]
1673+
1674+ # create and info shall give the same numbers
1675+ assert dedup_this_create == dedup_this_info
1676+ assert dedup_all_create == dedup_all_info
1677+ # accounting for "all archives" includes metadata chunks, for "this archive" it does not,
1678+ # thus a mismatch is expected.
1679+ assert dedup_this_create != dedup_all_create
1680+ assert dedup_this_info != dedup_all_info
1681+
16201682 def test_info_json (self ):
16211683 self .create_regular_file ('file1' , size = 1024 * 80 )
16221684 self .cmd ('init' , '--encryption=repokey' , self .repository_location )
@@ -4021,6 +4083,25 @@ def test_exit_codes(self):
40214083 self .cmd ('create' , self .repository_location + '::archive' , 'input' , fork = True ,
40224084 exit_code = Repository .InvalidRepository .exit_mcode )
40234085
4086+ def test_original_size_stable_across_recreate (self ):
4087+ # Test that changes in archive metadata (like number of chunks) do not influence the original size.
4088+ self .cmd ('init' , '--encryption=repokey' , self .repository_location )
4089+
4090+ def original_size (archive_name ):
4091+ info = json .loads (self .cmd ('info' , '--json' , f"{ self .repository_location } ::{ archive_name } " ))
4092+ return info ['archives' ][0 ]['stats' ]['original_size' ]
4093+
4094+ sizes = [12345 , 67890 ]
4095+ self .create_regular_file ('file1' , size = sizes [0 ])
4096+ self .create_regular_file ('file2' , size = sizes [1 ])
4097+
4098+ self .cmd ('create' , '--compression=none' , self .repository_location + '::archive' , 'input' )
4099+ assert original_size ('archive' ) == sum (sizes )
4100+
4101+ # Recreate with different chunker params to try to reproduce #8898.
4102+ self .cmd ('recreate' , '--chunker-params=10,12,11,63' , self .repository_location + '::archive' )
4103+ assert original_size ('archive' ) == sum (sizes )
4104+
40244105
40254106@unittest .skipUnless ('binary' in BORG_EXES , 'no borg.exe available' )
40264107class ArchiverTestCaseBinary (ArchiverTestCase ):
0 commit comments