diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8f94b1..0cdd478 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ exclude: &exclude_files > repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v6.0.0 hooks: - id: check-json - id: check-yaml @@ -19,12 +19,12 @@ repos: - id: trailing-whitespace - repo: https://github.com/ikamensh/flynt/ - rev: '1.0.1' + rev: '1.0.6' hooks: - id: flynt - repo: https://github.com/executablebooks/mdformat - rev: '0.7.17' + rev: '1.0.0' hooks: - id: mdformat additional_dependencies: @@ -34,20 +34,20 @@ repos: files: (?x)^(README\.md|CHANGELOG\.md)$ - repo: https://github.com/asottile/pyupgrade - rev: v3.14.0 + rev: v3.21.2 hooks: - id: pyupgrade args: [--py37-plus] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.14.7 hooks: - id: ruff-format - id: ruff args: [--fix, --exit-non-zero-on-fix, --show-fixes] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 + rev: v1.19.0 hooks: - id: mypy additional_dependencies: diff --git a/CHANGELOG.md b/CHANGELOG.md index eb343f2..ed1da7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,15 +2,15 @@ ## v1.4.0 (6 October 2025) -- Add `readable`, `writable`, and `closed` properties to stream classes for TextIOWrapper compatibility [\[1c73d64\]](https://github.com/aiidateam/disk-objectstore/commit/1c73d64137e1b093918337609cb6c8a6dece4a7b) +- Add `readable`, `writable`, and `closed` properties to stream classes for TextIOWrapper compatibility [[1c73d64]](https://github.com/aiidateam/disk-objectstore/commit/1c73d64137e1b093918337609cb6c8a6dece4a7b) ## v1.3.0 (17 April 2025) -- Change API of `database.get_session` to always raise an error [\[6686ad0\]](https://github.com/aiidateam/disk-objectstore/commit/6686ad0c3280bf90e1954b3b8052ec999e8532be) +- Change API of `database.get_session` to always raise an error [[6686ad0]](https://github.com/aiidateam/disk-objectstore/commit/6686ad0c3280bf90e1954b3b8052ec999e8532be) -- Add support for Python 3.13 [\[9b02a50\]](https://github.com/aiidateam/disk-objectstore/commit/9b02a50360749db1ea28ebe20661bf074d6c63a0) +- Add support for Python 3.13 [[9b02a50]](https://github.com/aiidateam/disk-objectstore/commit/9b02a50360749db1ea28ebe20661bf074d6c63a0) -- Properly close SQL connections that led to open file descriptors in Python3.13 [\[6686ad0\]](https://github.com/aiidateam/disk-objectstore/commit/6686ad0c3280bf90e1954b3b8052ec999e8532be) and [\[f5eed0f\]](https://github.com/aiidateam/disk-objectstore/commit/f5eed0f1afd1576f17e5d71d31df9717041fc9f3) +- Properly close SQL connections that led to open file descriptors in Python3.13 [[6686ad0]](https://github.com/aiidateam/disk-objectstore/commit/6686ad0c3280bf90e1954b3b8052ec999e8532be) and [[f5eed0f]](https://github.com/aiidateam/disk-objectstore/commit/f5eed0f1afd1576f17e5d71d31df9717041fc9f3) ## v1.2.0 (26 September 2024) @@ -18,25 +18,25 @@ This only enforces proper semantic versioning as the last release added a new fu ## v1.1.1 (19 September 2024) -- Added progress bar functionality for repack and pack_all_loose [\[737f9c7\]](https://github.com/aiidateam/disk-objectstore/commit/737f9c71151bf7ac297c6431688b4a75eac91b7c) +- Added progress bar functionality for repack and pack_all_loose [[737f9c7]](https://github.com/aiidateam/disk-objectstore/commit/737f9c71151bf7ac297c6431688b4a75eac91b7c) ## v1.1.0 (7 March 2024) ### Features -- Add functionality to easily create a container backup [\[23c784a\]](https://github.com/aiidateam/disk-objectstore/commit/23c784a221954a1518a3e35affdec53681f809b7) +- Add functionality to easily create a container backup [[23c784a]](https://github.com/aiidateam/disk-objectstore/commit/23c784a221954a1518a3e35affdec53681f809b7) ## v1.0.0 (September 2023) ### Features -- Add support for `whence=2` in `PackedObjectReader.seek` [\[5515ab6\]](https://github.com/aiidateam/disk-objectstore/commit/5515ab6d75581b36ecb3e0b8ff37407e05abefda) -- Add support for changing compression when repacking, and add auto compression heuristics [\[599e87c\]](https://github.com/aiidateam/disk-objectstore/commit/599e87c852427e02062f04f5f3d2276013410710) -- Improve efficiency when accessing packed compressed objects [\[10edd63\]](https://github.com/aiidateam/disk-objectstore/commit/10edd6395455d7c59361e608396b672289d8de58) +- Add support for `whence=2` in `PackedObjectReader.seek` [[5515ab6]](https://github.com/aiidateam/disk-objectstore/commit/5515ab6d75581b36ecb3e0b8ff37407e05abefda) +- Add support for changing compression when repacking, and add auto compression heuristics [[599e87c]](https://github.com/aiidateam/disk-objectstore/commit/599e87c852427e02062f04f5f3d2276013410710) +- Improve efficiency when accessing packed compressed objects [[10edd63]](https://github.com/aiidateam/disk-objectstore/commit/10edd6395455d7c59361e608396b672289d8de58) ### Changes -- A number of API methods changed the return type from bare dictionaries to dataclass instances [\[7a63462\]](https://github.com/aiidateam/disk-objectstore/commit/7a634626ea3e5f35aa3cdd458daf9d8b825d759a) +- A number of API methods changed the return type from bare dictionaries to dataclass instances [[7a63462]](https://github.com/aiidateam/disk-objectstore/commit/7a634626ea3e5f35aa3cdd458daf9d8b825d759a) - `Container.get_object_stream_and_meta -> ObjectMeta` - `Container.get_objects_meta -> ObjectMeta` @@ -47,43 +47,43 @@ This only enforces proper semantic versioning as the last release added a new fu The dataclasses are importable from `disk_objectstore.dataclasses`. -- A number of API methods replaced using `os.path` with `str` paths, for `pathlib.Path` [\[df96142\]](https://github.com/aiidateam/disk-objectstore/commit/df9614236b7d420fb610313d70ffae51e7aead75) +- A number of API methods replaced using `os.path` with `str` paths, for `pathlib.Path` [[df96142]](https://github.com/aiidateam/disk-objectstore/commit/df9614236b7d420fb610313d70ffae51e7aead75) The following methods now return a `pathlib.Path` instance: - `Container.get_folder` - `LazyOpener.path` -- Various improvements to docs and code [\[5ba9316\]](https://github.com/aiidateam/disk-objectstore/commit/5ba93162cd49d9b1ca7149c502349bfb06833255) +- Various improvements to docs and code [[5ba9316]](https://github.com/aiidateam/disk-objectstore/commit/5ba93162cd49d9b1ca7149c502349bfb06833255) ### Devops -- Moving documentation to `sphinx+myst` [\[2002f3c\]](https://github.com/aiidateam/disk-objectstore/commit/2002f3c3ec07f7ff46a04df293c8c9a7dff4db6a) -- Adopt PEP 621 and move build spec to `pyproject.toml` [\[4bd0c4e\]](https://github.com/aiidateam/disk-objectstore/commit/4bd0c4e01eaf3c149d4e11921b7ff4d42a5d5da5) -- Make types more permissive [\[c012056\]](https://github.com/aiidateam/disk-objectstore/commit/c0120568a992b41a55b325f3217d4902b5281070) +- Moving documentation to `sphinx+myst` [[2002f3c]](https://github.com/aiidateam/disk-objectstore/commit/2002f3c3ec07f7ff46a04df293c8c9a7dff4db6a) +- Adopt PEP 621 and move build spec to `pyproject.toml` [[4bd0c4e]](https://github.com/aiidateam/disk-objectstore/commit/4bd0c4e01eaf3c149d4e11921b7ff4d42a5d5da5) +- Make types more permissive [[c012056]](https://github.com/aiidateam/disk-objectstore/commit/c0120568a992b41a55b325f3217d4902b5281070) ### Dependencies -- Add Python 3.11 support [\[afdae26\]](https://github.com/aiidateam/disk-objectstore/commit/afdae261a5849e994b5920ca07665fc6a19f3852) -- Unpin `sqlalchemy` adding support for `>=1.4.22` [\[a2a987f\]](https://github.com/aiidateam/disk-objectstore/commit/a2a987f02a128b7cc265982e102d210e6e17d6f6) -- Removed uneeded `ablog` dependencies [\[8165f58\]](https://github.com/aiidateam/disk-objectstore/commit/8165f58fefdd40b55555eef9a2d40ee280593232) +- Add Python 3.11 support [[afdae26]](https://github.com/aiidateam/disk-objectstore/commit/afdae261a5849e994b5920ca07665fc6a19f3852) +- Unpin `sqlalchemy` adding support for `>=1.4.22` [[a2a987f]](https://github.com/aiidateam/disk-objectstore/commit/a2a987f02a128b7cc265982e102d210e6e17d6f6) +- Removed uneeded `ablog` dependencies [[8165f58]](https://github.com/aiidateam/disk-objectstore/commit/8165f58fefdd40b55555eef9a2d40ee280593232) ## v0.6.0 (September 2021) - ⬆️ UPGRADE: Remove Python support for 3.5 and 3.6, and add support for 3.9. -- ⬆️ UPGRADE: SQLAlchemy v1.4 (with v2 API) [\[#114\]](https://github.com/aiidateam/disk-objectstore/pull/114) -- ✨ NEW: Add basic CLI [\[#117\]](https://github.com/aiidateam/disk-objectstore/pull/117) (see README.md for details) -- 🔧 MAINTAIN: Add type annotations and mypy type checking [\[#113\]](https://github.com/aiidateam/disk-objectstore/pull/113) +- ⬆️ UPGRADE: SQLAlchemy v1.4 (with v2 API) [[#114]](https://github.com/aiidateam/disk-objectstore/pull/114) +- ✨ NEW: Add basic CLI [[#117]](https://github.com/aiidateam/disk-objectstore/pull/117) (see README.md for details) +- 🔧 MAINTAIN: Add type annotations and mypy type checking [[#113]](https://github.com/aiidateam/disk-objectstore/pull/113) ## v0.5.0 (November 2020) -- Various general (but very important) speed improvements [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) [\[#102\]](https://github.com/aiidateam/disk-objectstore/pull/102) -- Add callbacks to a number of functions (e.g. export, add_objects_to_pack, ... to allow showing progress bars or similar indicators [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) -- Implement repacking (at least when not changing hashing or compression) [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) -- Remove `export` function, implement `import_objects` function instead, to be called on the other side (it's more efficient) [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) -- Add support for VACUUMing operations on the SQLite database (very important for efficiency) [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) -- Add support for multiple hashing algorithms [\[#96\]](https://github.com/aiidateam/disk-objectstore/pull/96) -- Add concept of (unique) `container_id` [\[#97\]](https://github.com/aiidateam/disk-objectstore/pull/97) -- Generalize the compression algorithm implementation, and multiple algorithms are supported now [\[#99\]](https://github.com/aiidateam/disk-objectstore/pull/99) +- Various general (but very important) speed improvements [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) [[#102]](https://github.com/aiidateam/disk-objectstore/pull/102) +- Add callbacks to a number of functions (e.g. export, add_objects_to_pack, ... to allow showing progress bars or similar indicators [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) +- Implement repacking (at least when not changing hashing or compression) [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) +- Remove `export` function, implement `import_objects` function instead, to be called on the other side (it's more efficient) [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) +- Add support for VACUUMing operations on the SQLite database (very important for efficiency) [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) +- Add support for multiple hashing algorithms [[#96]](https://github.com/aiidateam/disk-objectstore/pull/96) +- Add concept of (unique) `container_id` [[#97]](https://github.com/aiidateam/disk-objectstore/pull/97) +- Generalize the compression algorithm implementation, and multiple algorithms are supported now [[#99]](https://github.com/aiidateam/disk-objectstore/pull/99) ## v0.4.0 (20 July 2020) diff --git a/disk_objectstore/cli.py b/disk_objectstore/cli.py index d2421a6..db48919 100644 --- a/disk_objectstore/cli.py +++ b/disk_objectstore/cli.py @@ -167,12 +167,12 @@ def optimize(dostore: ContainerContext, non_interactive: bool, compress: bool, v if not non_interactive: click.confirm('Is this the only process accessing the container?', abort=True) size = sum(f.stat().st_size for f in dostore.path.glob('**/*') if f.is_file()) - click.echo(f'Initial container size: {round(size/1000, 2)} Mb') + click.echo(f'Initial container size: {round(size / 1000, 2)} Mb') with dostore.container as container: container.pack_all_loose(compress=compress) container.clean_storage(vacuum=vacuum) size = sum(f.stat().st_size for f in dostore.path.glob('**/*') if f.is_file()) - click.echo(f'Final container size: {round(size/1000, 2)} Mb') + click.echo(f'Final container size: {round(size / 1000, 2)} Mb') @main.command('backup') diff --git a/disk_objectstore/container.py b/disk_objectstore/container.py index f33a578..2df50fc 100644 --- a/disk_objectstore/container.py +++ b/disk_objectstore/container.py @@ -1548,9 +1548,9 @@ def add_streamed_objects_to_pack( # pylint: disable=too-many-locals, too-many-b operations! (See e.g. the `import_files()` method). :return: a list of object hash keys """ - assert isinstance( - compress, bool - ), 'Only True of False are valid `compress` modes when adding direclty to a pack' + assert isinstance(compress, bool), ( + 'Only True of False are valid `compress` modes when adding direclty to a pack' + ) yield_per_size = 1000 hashkeys: list[str] = [] @@ -1875,9 +1875,9 @@ def loosen_object(self, hashkey): # This always rewrites it as loose written_hashkey = self.add_streamed_object(stream) - assert ( - written_hashkey == hashkey - ), 'Mismatch in the hashkey when rewriting an existing object as loose! {written_hashkey} vs {hashkey}' + assert written_hashkey == hashkey, ( + 'Mismatch in the hashkey when rewriting an existing object as loose! {written_hashkey} vs {hashkey}' + ) return self._get_loose_path_from_hashkey(hashkey) def _vacuum(self) -> None: @@ -2546,14 +2546,14 @@ def repack_pack( # pylint: disable=too-many-branches,too-many-statements,too-ma In case of "close", the value is None. return value of the callback function is ignored. """ - assert ( - pack_id != self._REPACK_PACK_ID - ), f"The specified pack_id '{pack_id}' is invalid, it is the one used for repacking" + assert pack_id != self._REPACK_PACK_ID, ( + f"The specified pack_id '{pack_id}' is invalid, it is the one used for repacking" + ) # Check that it does not exist - assert not self._get_pack_path_from_pack_id( - self._REPACK_PACK_ID, allow_repack_pack=True - ).exists(), f"The repack pack '{self._REPACK_PACK_ID}' already exists, probably a previous repacking aborted?" + assert not self._get_pack_path_from_pack_id(self._REPACK_PACK_ID, allow_repack_pack=True).exists(), ( + f"The repack pack '{self._REPACK_PACK_ID}' already exists, probably a previous repacking aborted?" + ) session = self._get_operation_session() diff --git a/disk_objectstore/examples/example_objectstore.py b/disk_objectstore/examples/example_objectstore.py index 41d13b2..9785760 100755 --- a/disk_objectstore/examples/example_objectstore.py +++ b/disk_objectstore/examples/example_objectstore.py @@ -71,7 +71,7 @@ def main( files = {} start_counts = container.count_objects() - print(f"Currently known objects: {start_counts['packed']} packed, {start_counts['loose']} loose") + print(f'Currently known objects: {start_counts["packed"]} packed, {start_counts["loose"]} loose') print('Pack objects on disk:', start_counts['pack_files']) print(f'Generating {num_files} files in memory...') @@ -95,9 +95,9 @@ def main( # Check that no loose files were created counts = container.count_objects() - assert ( - counts['loose'] == start_counts['loose'] - ), f"Mismatch (loose in packed case): {start_counts['loose']} != {counts['loose']}" + assert counts['loose'] == start_counts['loose'], ( + f'Mismatch (loose in packed case): {start_counts["loose"]} != {counts["loose"]}' + ) ## Cannot do this with the hash key implenentation - I might have stored the same object twice # assert counts['packed' # ] == start_counts['packed'] + num_files, 'Mismatch (packed in packed case): {} + {} != {}'.format( @@ -156,7 +156,7 @@ def main( # Check that all loose files are gone counts = container.count_objects() loose_folder = container._get_loose_folder() # pylint: disable=protected-access - assert not counts['loose'], 'loose objects left: ' f'{os.listdir(loose_folder)}' + assert not counts['loose'], f'loose objects left: {os.listdir(loose_folder)}' ## I cannot do this because I could have overlap if the object is identical and has the same hash key # assert counts['packed'] == start_counts['packed'] + start_counts[ # 'loose'] + num_files, 'Mismatch (post-pack): {} + {} + {} != {}'.format( diff --git a/disk_objectstore/examples/profile_zeros.py b/disk_objectstore/examples/profile_zeros.py index e23219d..eacaa0a 100755 --- a/disk_objectstore/examples/profile_zeros.py +++ b/disk_objectstore/examples/profile_zeros.py @@ -32,7 +32,7 @@ def main_run(container, size_gb, compress_packs): size_bytes = size_gb * 1024 * 1024 * 1024 start_counts = container.count_objects() - print(f"Currently known objects: {start_counts['packed']} packed, {start_counts['loose']} loose") + print(f'Currently known objects: {start_counts["packed"]} packed, {start_counts["loose"]} loose') print('Pack objects on disk:', start_counts['pack_files']) zero_stream = ZeroStream(length=size_bytes) @@ -44,12 +44,12 @@ def main_run(container, size_gb, compress_packs): # Check that no loose files were created counts = container.count_objects() - assert ( - counts['loose'] == start_counts['loose'] - ), f"Mismatch (loose in packed case): {start_counts['loose']} != {counts['loose']}" - assert ( - counts['packed'] == start_counts['packed'] + 1 - ), f"Mismatch (packed in packed case): {start_counts['packed']} + 1 != {counts['packed']}" + assert counts['loose'] == start_counts['loose'], ( + f'Mismatch (loose in packed case): {start_counts["loose"]} != {counts["loose"]}' + ) + assert counts['packed'] == start_counts['packed'] + 1, ( + f'Mismatch (packed in packed case): {start_counts["packed"]} + 1 != {counts["packed"]}' + ) # print container size info size_info = container.get_total_size() @@ -167,9 +167,9 @@ def main(size_gb, path, clear, check_memory_measurement, with_line_profiler, com interval=memory_check_interval, ) # Check that it's not an empty list - assert ( - memory_report - ), f'>> Process too fast for checking memory usage with interval {memory_check_interval} s!!!' + assert memory_report, ( + f'>> Process too fast for checking memory usage with interval {memory_check_interval} s!!!' + ) print( f'>> Max memory usage (check interval {memory_check_interval} s, ' f'{len(memory_report)} checks performed): {max(memory_report):.3f} MB' diff --git a/disk_objectstore/utils.py b/disk_objectstore/utils.py index 98f5cc9..258c5d3 100644 --- a/disk_objectstore/utils.py +++ b/disk_objectstore/utils.py @@ -185,7 +185,7 @@ def seek(self, target: int, whence: int = 0) -> int: if self.closed: raise ValueError('I/O operation on closed file.') assert self._stream is not None, ( - 'LazyLooseStream has an open stream, but the stream is None! ' 'This should not happen' + 'LazyLooseStream has an open stream, but the stream is None! This should not happen' ) return self._stream.seek(target, whence) @@ -195,7 +195,7 @@ def tell(self) -> int: if self.closed: raise ValueError('I/O operation on closed file.') assert self._stream is not None, ( - 'LazyLooseStream has an open stream, but the stream is None! ' 'This should not happen' + 'LazyLooseStream has an open stream, but the stream is None! This should not happen' ) return self._stream.tell() @@ -212,7 +212,7 @@ def read(self, size: int = -1) -> bytes: if self.closed: raise ValueError('I/O operation on closed file.') assert self._stream is not None, ( - 'LazyLooseStream has an open stream, but the stream is None! ' 'This should not happen' + 'LazyLooseStream has an open stream, but the stream is None! This should not happen' ) return self._stream.read(size) @@ -844,7 +844,7 @@ def read(self, size: int = -1) -> bytes: # don't use anymore the compressed one. if self._use_uncompressed_stream: assert self._lazy_uncompressed_stream is not None, ( - 'Using internally an uncompressed stream, but it is None! ' 'This should not happen' + 'Using internally an uncompressed stream, but it is None! This should not happen' ) return self._lazy_uncompressed_stream.read(size) return self._read_compressed(size) @@ -951,7 +951,7 @@ def tell(self) -> int: """Return current position in file.""" if self._use_uncompressed_stream: assert self._lazy_uncompressed_stream is not None, ( - 'Using internally an uncompressed stream, but it is None! ' 'This should not happen' + 'Using internally an uncompressed stream, but it is None! This should not happen' ) return self._lazy_uncompressed_stream.tell() return self._pos @@ -1017,7 +1017,7 @@ def _seek_internal(self, target: int, whence: int = 0) -> int: # If we are using the uncompressed stream, I just proxy the request if self._use_uncompressed_stream: assert self._lazy_uncompressed_stream is not None, ( - 'Using internally an uncompressed stream, but it is None! ' 'This should not happen' + 'Using internally an uncompressed stream, but it is None! This should not happen' ) return self._lazy_uncompressed_stream.seek(target, whence) diff --git a/tests/concurrent_tests/periodic_packer.py b/tests/concurrent_tests/periodic_packer.py index 64d4bed..36c15a8 100755 --- a/tests/concurrent_tests/periodic_packer.py +++ b/tests/concurrent_tests/periodic_packer.py @@ -44,9 +44,9 @@ def main(path, repetitions, wait_time): end_counts = container.count_objects() print( - f"[PACKER {timestamp()}] Packed objects (was {start_counts['loose']} loose, " - f"{start_counts['packed']} packed; now: " - f"{end_counts['loose']} loose, {end_counts['packed']} packed)." + f'[PACKER {timestamp()}] Packed objects (was {start_counts["loose"]} loose, ' + f'{start_counts["packed"]} packed; now: ' + f'{end_counts["loose"]} loose, {end_counts["packed"]} packed).' ) diff --git a/tests/concurrent_tests/periodic_worker.py b/tests/concurrent_tests/periodic_worker.py index 082bed8..a356bce 100755 --- a/tests/concurrent_tests/periodic_worker.py +++ b/tests/concurrent_tests/periodic_worker.py @@ -51,9 +51,7 @@ def timestamp(): '-s', '--shared-folder', default='/tmp/test-container-shared', - help=( - 'Test folder path, in which all locusts will write the checksums for others to read. ' 'It must already exist.' - ), + help=('Test folder path, in which all locusts will write the checksums for others to read. It must already exist.'), ) @click.option( '-b', @@ -86,10 +84,10 @@ def main( start_counts = container.count_objects() print( - f"[{proc_id} {timestamp()}] Currently known objects: " - f"{start_counts['packed']} packed, {start_counts['loose']} loose" + f'[{proc_id} {timestamp()}] Currently known objects: ' + f'{start_counts["packed"]} packed, {start_counts["loose"]} loose' ) - print(f"[{proc_id} {timestamp()}] Pack objects on disk: {start_counts['pack_files']}") + print(f'[{proc_id} {timestamp()}] Pack objects on disk: {start_counts["pack_files"]}') for iteration in range(repetitions): if iteration != 0: @@ -232,9 +230,9 @@ def main( assert not only_right, f'objects only in all_checksums: {only_right}' assert not only_left, f'objects only in retrieved_checksums: {only_left}' for key, value in retrieved_checksums.items(): - assert ( - all_checksums[key] == value - ), f'Mismatch for {key}: {all_checksums[key]} vs. {value}; meta={metas[key]}' + assert all_checksums[key] == value, ( + f'Mismatch for {key}: {all_checksums[key]} vs. {value}; meta={metas[key]}' + ) del retrieved_content random.shuffle(all_hashkeys) diff --git a/tests/conftest.py b/tests/conftest.py index 979ffdd..c8cf19e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,9 +50,9 @@ def callback(self, action, value): """Check how the callback is called.""" if action == 'init': - assert ( - self.current_action is None - ), f"Starting a new action '{action}' without closing the old one {self.current_action}" + assert self.current_action is None, ( + f"Starting a new action '{action}' without closing the old one {self.current_action}" + ) self.current_action = {'start_value': value, 'value': 0} elif action == 'update': # Track the current position diff --git a/tests/test_container.py b/tests/test_container.py index 582085e..239dab0 100644 --- a/tests/test_container.py +++ b/tests/test_container.py @@ -40,15 +40,15 @@ def _assert_empty_repo(container): :param container: a Container. """ counts = container.count_objects() - assert ( - counts['packed'] == 0 - ), f"The container should be empty at the beginning (but there are {counts['packed']} packed objects)" - assert ( - counts['loose'] == 0 - ), f"The container should be empty at the beginning (but there are {counts['loose']} loose objects)" - assert ( - counts['pack_files'] == 0 - ), f"The container should be empty at the beginning (but there are {counts['pack_files']} pack files)" + assert counts['packed'] == 0, ( + f'The container should be empty at the beginning (but there are {counts["packed"]} packed objects)' + ) + assert counts['loose'] == 0, ( + f'The container should be empty at the beginning (but there are {counts["loose"]} loose objects)' + ) + assert counts['pack_files'] == 0, ( + f'The container should be empty at the beginning (but there are {counts["pack_files"]} pack files)' + ) def _add_objects_loose_loop(container, data): @@ -125,13 +125,13 @@ def test_add_get_loose(temp_container, generate_random_data, retrieve_bulk): obj_md5s = _add_objects_loose_loop(temp_container, data) counts = temp_container.count_objects() - assert ( - counts['packed'] == 0 - ), f"The container should have no packed objects (but there are {counts['packed']} instead)" + assert counts['packed'] == 0, ( + f'The container should have no packed objects (but there are {counts["packed"]} instead)' + ) # I check with the length of the set because I could have picked to random identical objects - assert counts['loose'] == len( - set(obj_md5s) - ), f"The container should have {len(set(obj_md5s))} loose objects (but there are {counts['loose']} instead)" + assert counts['loose'] == len(set(obj_md5s)), ( + f'The container should have {len(set(obj_md5s))} loose objects (but there are {counts["loose"]} instead)' + ) # Retrieve objects (loose), in random order random_keys = list(obj_md5s.keys()) @@ -147,9 +147,9 @@ def test_add_get_loose(temp_container, generate_random_data, retrieve_bulk): assert set(obj_md5s) == set(retrieved_md5s) # Check that the MD5 are correct for obj_hashkey in obj_md5s: # pylint: disable=consider-using-dict-items - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) def test_add_loose_from_stream(temp_container): @@ -189,20 +189,20 @@ def test_add_get_with_packing(temp_container, generate_random_data, use_compress temp_container.pack_all_loose(compress=use_compression) counts = temp_container.count_objects() - assert counts['packed'] == len( - set(obj_md5s) - ), f"The container should have {len(set(obj_md5s))} packed objects (but there are {counts['packed']} instead)" + assert counts['packed'] == len(set(obj_md5s)), ( + f'The container should have {len(set(obj_md5s))} packed objects (but there are {counts["packed"]} instead)' + ) # Loose objects are not immediately deleted assert counts['loose'] == len(set(obj_md5s)), ( - f"The container should still have all {len(set(obj_md5s))} loose objects " - f"(but there are {counts['loose']} instead)" + f'The container should still have all {len(set(obj_md5s))} loose objects ' + f'(but there are {counts["loose"]} instead)' ) # Clean up and remove loose objects that are already packed temp_container.clean_storage() counts = temp_container.count_objects() # Now there shouldn't be any more loose objects - assert counts['loose'] == 0, f"The container should have 0 loose objects (but there are {counts['loose']} instead)" + assert counts['loose'] == 0, f'The container should have 0 loose objects (but there are {counts["loose"]} instead)' # Retrieve objects (loose), in random order random_keys = list(obj_md5s.keys()) @@ -218,9 +218,9 @@ def test_add_get_with_packing(temp_container, generate_random_data, use_compress assert set(obj_md5s) == set(retrieved_md5s) # Check that the MD5 are correct for obj_hashkey in obj_md5s: # pylint: disable=consider-using-dict-items - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) @pytest.mark.parametrize('use_compression', [True, False]) @@ -236,10 +236,10 @@ def test_directly_to_pack_content(temp_container, generate_random_data, use_comp obj_md5s = _add_objects_directly_to_pack(temp_container, data, compress=use_compression) counts = temp_container.count_objects() - assert counts['packed'] == len( - set(data) - ), f"The container should have {len(set(data))} packed objects (but there are {counts['packed']} instead)" - assert counts['loose'] == 0, f"The container should have 0 loose objects (but there are {counts['loose']} instead)" + assert counts['packed'] == len(set(data)), ( + f'The container should have {len(set(data))} packed objects (but there are {counts["packed"]} instead)' + ) + assert counts['loose'] == 0, f'The container should have 0 loose objects (but there are {counts["loose"]} instead)' # Retrieve objects (loose), in random order random_keys = list(obj_md5s.keys()) @@ -252,9 +252,9 @@ def test_directly_to_pack_content(temp_container, generate_random_data, use_comp assert set(obj_md5s) == set(retrieved_md5s) # Check that the MD5 are correct for obj_hashkey in obj_md5s: - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) @pytest.mark.parametrize( @@ -494,10 +494,10 @@ def test_directly_to_pack_streamed(temp_dir, generate_random_data, use_compressi obj_md5s = dict(zip(obj_hashkeys, keys)) counts = temp_container.count_objects() - assert counts['packed'] == len( - set(data) - ), f"The container should have {len(set(data))} packed objects (but there are {counts['packed']} instead)" - assert counts['loose'] == 0, f"The container should have 0 loose objects (but there are {counts['loose']} instead)" + assert counts['packed'] == len(set(data)), ( + f'The container should have {len(set(data))} packed objects (but there are {counts["packed"]} instead)' + ) + assert counts['loose'] == 0, f'The container should have 0 loose objects (but there are {counts["loose"]} instead)' # Retrieve objects (loose), in random order random_keys = list(obj_md5s.keys()) @@ -510,9 +510,9 @@ def test_directly_to_pack_streamed(temp_dir, generate_random_data, use_compressi assert set(obj_md5s) == set(retrieved_md5s) # Check that the MD5 are correct for obj_hashkey in obj_md5s: - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) # I close the container, as this is needed on Windows temp_container.close() @@ -550,19 +550,19 @@ def test_prefix_lengths(temp_dir, generate_random_data, pack_size_target, loose_ assert all(len(inode) == loose_prefix_len for inode in loose_firstlevel) counts = container.count_objects() - assert ( - counts['packed'] == 0 - ), f"The container should have 0 packed objects (but there are {counts['packed']} instead)" - assert counts['loose'] == len( - set(obj_md5s) - ), f"The container should have {len(set(obj_md5s))} loose objects (but there are {counts['loose']} instead)" + assert counts['packed'] == 0, ( + f'The container should have 0 packed objects (but there are {counts["packed"]} instead)' + ) + assert counts['loose'] == len(set(obj_md5s)), ( + f'The container should have {len(set(obj_md5s))} loose objects (but there are {counts["loose"]} instead)' + ) retrieved_md5s = _get_data_and_md5_bulk(container, obj_md5s.keys()) # Check that the MD5 are correct for obj_hashkey in obj_md5s: # pylint: disable=consider-using-dict-items - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) # Pack all loose objects container.pack_all_loose() @@ -577,27 +577,27 @@ def test_prefix_lengths(temp_dir, generate_random_data, pack_size_target, loose_ ) counts = container.count_objects() - assert counts['packed'] == len( - set(obj_md5s) - ), f"The container should have {len(set(obj_md5s))} packed objects (but there are {counts['packed']} instead)" + assert counts['packed'] == len(set(obj_md5s)), ( + f'The container should have {len(set(obj_md5s))} packed objects (but there are {counts["packed"]} instead)' + ) # Loose objects are not immediately deleted assert counts['loose'] == len(set(obj_md5s)), ( - f"The container should still have all {len(set(obj_md5s))} loose objects " - f"(but there are {counts['loose']} instead)" + f'The container should still have all {len(set(obj_md5s))} loose objects ' + f'(but there are {counts["loose"]} instead)' ) # Clean up and remove loose objects that are already packed container.clean_storage() counts = container.count_objects() # Now there shouldn't be any more loose objects - assert counts['loose'] == 0, f"The container should have 0 loose objects (but there are {counts['loose']} instead)" + assert counts['loose'] == 0, f'The container should have 0 loose objects (but there are {counts["loose"]} instead)' retrieved_md5s = _get_data_and_md5_bulk(container, obj_md5s.keys()) # Check that the MD5 are correct for obj_hashkey in obj_md5s: # pylint: disable=consider-using-dict-items - assert ( - obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey] - ), f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + assert obj_md5s[obj_hashkey] == retrieved_md5s[obj_hashkey], ( + f"Object '{obj_hashkey}' has wrong MD5s ({obj_md5s[obj_hashkey]} vs {retrieved_md5s[obj_hashkey]})" + ) # Test also the validation functions valid_pack_ids = ['0', '1', '2', '10', '100'] @@ -2284,9 +2284,9 @@ def test_validate_callback(temp_container, callback_instance): temp_container.validate(callback=callback_instance.callback) - assert ( - callback_instance.current_action is None - ), "The 'validate' call did not perform a final callback with a 'close' event" + assert callback_instance.current_action is None, ( + "The 'validate' call did not perform a final callback with a 'close' event" + ) # I convert to dict because I the order of the actions can change performed_actions_dict = { @@ -2323,9 +2323,9 @@ def test_add_streamed_object_to_pack_callback( # pylint: disable=invalid-name assert temp_container.get_object_content(hashkey) == content - assert ( - callback_instance.current_action is None - ), "The 'validate' call did not perform a final callback with a 'close' event" + assert callback_instance.current_action is None, ( + "The 'validate' call did not perform a final callback with a 'close' event" + ) assert callback_instance.performed_actions == [ { @@ -2365,9 +2365,9 @@ def test_add_streamed_objects_to_pack_callback( # pylint: disable=invalid-name callback=callback_instance.callback, ) - assert ( - callback_instance.current_action is None - ), "The 'add_streamed_objects_to_pack' call did not perform a final callback with a 'close' event" + assert callback_instance.current_action is None, ( + "The 'add_streamed_objects_to_pack' call did not perform a final callback with a 'close' event" + ) expected_actions = [] # First call @@ -3093,18 +3093,18 @@ def test_packs_read_in_order(temp_dir): last_offset = 0 elif meta['pack_id'] != last_pack: assert meta['pack_id'] not in seen_packs, ( - f"Objects were already retrieved from pack {meta['pack_id']}, " - f"the last pack was {last_pack} " - f"and we are trying to retrieve again from pack {meta['pack_id']}" + f'Objects were already retrieved from pack {meta["pack_id"]}, ' + f'the last pack was {last_pack} ' + f'and we are trying to retrieve again from pack {meta["pack_id"]}' ) last_pack = meta['pack_id'] seen_packs.add(meta['pack_id']) last_offset = 0 # We are still in the same pack assert last_offset <= meta['pack_offset'], ( - f"in pack {meta['pack_id']} we are reading offset " - f"{meta['pack_offset']}, but before we were reading " - f"a later offset {last_offset}" + f'in pack {meta["pack_id"]} we are reading offset ' + f'{meta["pack_offset"]}, but before we were reading ' + f'a later offset {last_offset}' ) last_offset = meta['pack_offset'] diff --git a/tests/test_utils.py b/tests/test_utils.py index eebaf36..75a9d72 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -44,9 +44,9 @@ def test_lazy_opener_read(): # This is not open yet lazy.tell() - assert ( - len(current_process.open_files()) == start_open_files - ), 'The LazyOpener is not lazy, but axtually opened the file instead!' + assert len(current_process.open_files()) == start_open_files, ( + 'The LazyOpener is not lazy, but axtually opened the file instead!' + ) with lazy as fhandle: # Shoul be opened at position zero at the beginnign assert lazy.tell() == 0