From 07668c2534c30c832ffb8ac8983320613392561c Mon Sep 17 00:00:00 2001 From: Jayamala Date: Tue, 21 Jan 2025 16:10:28 -0500 Subject: [PATCH 1/8] Changes related to File Provider --- dor/adapters/bag_adapter.py | 11 ++--- dor/config.py | 26 +++++++---- dor/providers/file_provider.py | 24 +++++++++++ dor/providers/file_system_file_provider.py | 21 ++++++++- dor/providers/models.py | 3 -- dor/providers/translocator.py | 9 ++-- dor/service_layer/handlers/unpack_package.py | 2 +- dor/service_layer/handlers/verify_package.py | 5 ++- features/steps/store_resource.py | 11 ++--- features/steps/test_store_resource.py | 25 +++++++---- tests/test_bag_adapter.py | 45 ++++++++++++++------ tests/test_ocfl_repository_gateway.py | 9 ++-- tests/test_translocator.py | 18 +++++--- 13 files changed, 148 insertions(+), 61 deletions(-) diff --git a/dor/adapters/bag_adapter.py b/dor/adapters/bag_adapter.py index 5993f06..d6fd32c 100644 --- a/dor/adapters/bag_adapter.py +++ b/dor/adapters/bag_adapter.py @@ -1,7 +1,7 @@ -import os from pathlib import Path -import bagit +import bagit +from dor.providers.file_provider import FileProvider class DorInfoMissingError(Exception): @@ -35,8 +35,9 @@ class BagAdapter: dor_info_file_name = "dor-info.txt" - def __init__(self, path: Path) -> None: + def __init__(self, path: Path, file_provider: FileProvider) -> None: self.bag = bagit.Bag(str(path)) + self.file_provider = file_provider def validate(self) -> None: try: @@ -52,9 +53,9 @@ def validate(self) -> None: @property def dor_info(self) -> dict[str, str]: - path = self.bag.path + path = Path(self.bag.path) try: - data = bagit._load_tag_file(os.path.join(path, self.dor_info_file_name)) + data = bagit._load_tag_file(self.file_provider.get_combined_path(path, self.dor_info_file_name)) except FileNotFoundError as e: raise DorInfoMissingError from e return data diff --git a/dor/config.py b/dor/config.py index 3add4cd..868134a 100644 --- a/dor/config.py +++ b/dor/config.py @@ -1,5 +1,5 @@ -import os - +from dor.providers.file_provider import FileProvider +from dor.providers.file_system_file_provider import FilesystemFileProvider import sqlalchemy from pydantic.dataclasses import dataclass @@ -18,14 +18,22 @@ class Config: database: DatabaseConfig @classmethod - def from_env(cls): + def from_env(cls, file_provider: FileProvider): return cls( database=DatabaseConfig( - user=os.getenv("POSTGRES_USER", "postgres"), - password=os.getenv("POSTGRES_PASSWORD", "postgres"), - host=os.getenv("POSTGRES_HOST", "db"), - database=os.getenv("POSTGRES_DATABSE", "dor_local"), - test_database=os.getenv("POSTGRES_TEST_DATABASE", "dor_test") + user=file_provider.get_environment_variable( + "POSTGRES_USER", "postgres" + ), + password=file_provider.get_environment_variable( + "POSTGRES_PASSWORD", "postgres" + ), + host=file_provider.get_environment_variable("POSTGRES_HOST", "db"), + database=file_provider.get_environment_variable( + "POSTGRES_DATABSE", "dor_local" + ), + test_database=file_provider.get_environment_variable( + "POSTGRES_TEST_DATABASE", "dor_test" + ), ) ) @@ -46,4 +54,4 @@ def get_test_database_engine_url(self): return self._make_database_engine_url(self.database.test_database) -config = Config.from_env() +config = Config.from_env(file_provider=FilesystemFileProvider()) diff --git a/dor/providers/file_provider.py b/dor/providers/file_provider.py index ad7903c..b049709 100644 --- a/dor/providers/file_provider.py +++ b/dor/providers/file_provider.py @@ -14,3 +14,27 @@ def get_descriptor_dir(self, file_path: Path) -> Path: @abstractmethod def get_norm_path(self, base_path: Path, path_to_apply: str) -> Path: pass + + @abstractmethod + def get_combined_path(self, base_path: Path, path_to_apply: str) -> Path: + pass + + @abstractmethod + def clone_directory_structure(self, source_path: Path, destination_path: Path): + pass + + @abstractmethod + def get_environment_variable(self, env_key: str, default_value: str) -> str: + pass + + @abstractmethod + def delete_dir_and_contents(self, path: Path): + pass + + @abstractmethod + def create_directory(self, path: Path): + pass + + @abstractmethod + def create_directories(self, path: Path): + pass \ No newline at end of file diff --git a/dor/providers/file_system_file_provider.py b/dor/providers/file_system_file_provider.py index c223877..375fb17 100644 --- a/dor/providers/file_system_file_provider.py +++ b/dor/providers/file_system_file_provider.py @@ -1,5 +1,6 @@ import os from pathlib import Path +import shutil from dor.providers.file_provider import FileProvider @@ -14,6 +15,24 @@ def get_descriptor_dir(self, file_path: Path) -> Path: return file_path.parent def get_norm_path(self, base_path: Path, path_to_apply: str) -> Path: - combined_path = os.path.join(base_path, path_to_apply) + combined_path = self.get_combined_path(base_path, path_to_apply) resolved_combined_path = Path(os.path.normpath(combined_path)) return resolved_combined_path + + def get_combined_path(self, base_path: Path, path_to_apply: str) -> Path: + return Path(os.path.join(base_path, path_to_apply)) + + def clone_directory_structure(self, source_path: Path, destination_path: Path): + shutil.copytree(source_path, destination_path) + + def get_environment_variable(self, env_key: str, default_value: str) -> str: + return os.getenv(env_key, default_value) + + def delete_dir_and_contents(self, path: Path): + shutil.rmtree(path, ignore_errors=True) + + def create_directory(self, path: Path): + os.mkdir(path) + + def create_directories(self, path: Path): + os.makedirs(path) diff --git a/dor/providers/models.py b/dor/providers/models.py index 83ecd53..d300ff5 100644 --- a/dor/providers/models.py +++ b/dor/providers/models.py @@ -1,8 +1,5 @@ -# from dataclasses import dataclass, field from dataclasses import field from pydantic.dataclasses import dataclass -# from pydantic import BaseModel -from typing import Optional import uuid from datetime import datetime diff --git a/dor/providers/translocator.py b/dor/providers/translocator.py index bf4c7f9..eeccd03 100644 --- a/dor/providers/translocator.py +++ b/dor/providers/translocator.py @@ -1,8 +1,8 @@ from dataclasses import dataclass from pathlib import Path -import shutil from typing import Callable +from dor.providers.file_provider import FileProvider from gateway.bundle import Bundle @@ -53,13 +53,16 @@ def create_workspace_for_package(self, package_identifier: str) -> FakeWorkspace class Translocator(): - def __init__(self, inbox_path: Path, workspaces_path: Path, minter: Callable[[], str]) -> None: + def __init__(self, inbox_path: Path, workspaces_path: Path, minter: Callable[[], str], file_provider: FileProvider) -> None: self.inbox_path = inbox_path self.workspaces_path = workspaces_path self.minter = minter + self.file_provider = file_provider def create_workspace_for_package(self, package_identifier: str) -> Workspace: workspace_id = self.minter() workspace_path = self.workspaces_path / workspace_id - shutil.copytree(self.inbox_path / package_identifier, workspace_path) + self.file_provider.clone_directory_structure( + self.inbox_path / package_identifier, workspace_path + ) return Workspace(str(workspace_path)) diff --git a/dor/service_layer/handlers/unpack_package.py b/dor/service_layer/handlers/unpack_package.py index 5e9b724..5820b1b 100644 --- a/dor/service_layer/handlers/unpack_package.py +++ b/dor/service_layer/handlers/unpack_package.py @@ -14,7 +14,7 @@ def unpack_package( file_provider: FileProvider, ) -> None: workspace = workspace_class(event.workspace_identifier) - bag_adapter = bag_adapter_class(workspace.package_directory()) + bag_adapter = bag_adapter_class(workspace.package_directory(), file_provider) info = bag_adapter.dor_info workspace.root_identifier = info["Root-Identifier"] diff --git a/dor/service_layer/handlers/verify_package.py b/dor/service_layer/handlers/verify_package.py index 52502cd..1baa008 100644 --- a/dor/service_layer/handlers/verify_package.py +++ b/dor/service_layer/handlers/verify_package.py @@ -1,14 +1,15 @@ from dor.adapters.bag_adapter import ValidationError from dor.domain.events import PackageNotVerified, PackageReceived, PackageVerified +from dor.providers.file_provider import FileProvider from dor.service_layer.unit_of_work import UnitOfWork def verify_package( - event: PackageReceived, uow: UnitOfWork, bag_adapter_class: type, workspace_class: type + event: PackageReceived, uow: UnitOfWork, bag_adapter_class: type, workspace_class: type, file_provider: FileProvider ) -> None: workspace = workspace_class(event.workspace_identifier) - bag_adapter = bag_adapter_class(workspace.package_directory()) + bag_adapter = bag_adapter_class(workspace.package_directory(), file_provider) try: bag_adapter.validate() diff --git a/features/steps/store_resource.py b/features/steps/store_resource.py index 9b750e2..76c31a8 100644 --- a/features/steps/store_resource.py +++ b/features/steps/store_resource.py @@ -1,5 +1,3 @@ -import os -import shutil import uuid from datetime import UTC, datetime from pathlib import Path @@ -207,12 +205,15 @@ def step_impl(context) -> None: inbox = Path("./features/fixtures/inbox") storage = Path("./features/scratch/storage") workspaces = Path("./features/scratch/workspaces") + context.file_provider =FilesystemFileProvider() value = "55ce2f63-c11a-4fac-b3a9-160305b1a0c4" - shutil.rmtree(path=f"./features/scratch/workspaces/{value}", ignore_errors=True) - shutil.rmtree(path=storage, ignore_errors=True) - os.mkdir(storage) + context.file_provider.delete_dir_and_contents( + Path(f"./features/scratch/workspaces/{value}") + ) + context.file_provider.delete_dir_and_contents(storage) + context.file_provider.create_directory(storage) gateway = OcflRepositoryGateway(storage_path=storage) gateway.create_repository() diff --git a/features/steps/test_store_resource.py b/features/steps/test_store_resource.py index cce25d4..ba08d7a 100644 --- a/features/steps/test_store_resource.py +++ b/features/steps/test_store_resource.py @@ -1,11 +1,9 @@ -import os -import shutil from dataclasses import dataclass -from datetime import UTC, datetime from pathlib import Path from typing import Callable, Type from functools import partial +from dor.providers.file_provider import FileProvider from dor.providers.file_system_file_provider import FilesystemFileProvider from pytest_bdd import scenario, given, when, then @@ -34,6 +32,7 @@ class Context: translocator: Translocator = None message_bus: MemoryMessageBus = None stored_event: PackageStored = None + file_provider: FileProvider = FilesystemFileProvider() scenario = partial(scenario, '../store_resource.feature') @@ -52,15 +51,17 @@ def _(): value = '55ce2f63-c11a-4fac-b3a9-160305b1a0c4' - shutil.rmtree(path = f"./features/scratch/workspaces/{value}", ignore_errors = True) - shutil.rmtree(path = storage, ignore_errors = True) - os.mkdir(storage) + context.file_provider.delete_dir_and_contents( + Path(f"./features/scratch/workspaces/{value}") + ) + context.file_provider.delete_dir_and_contents(storage) + context.file_provider.create_directory(storage) gateway = OcflRepositoryGateway(storage_path = storage) gateway.create_repository() context.uow = UnitOfWork(gateway=gateway) - context.translocator = Translocator(inbox_path = inbox, workspaces_path = workspaces, minter = lambda: value) + context.translocator = Translocator(inbox_path = inbox, workspaces_path = workspaces, minter = lambda: value, file_provider=context.file_provider) def stored_callback(event: PackageStored, uow: UnitOfWork) -> None: context.stored_event = event @@ -70,7 +71,13 @@ def stored_callback(event: PackageStored, uow: UnitOfWork) -> None: lambda event: receive_package(event, context.uow, context.translocator) ], PackageReceived: [ - lambda event: verify_package(event, context.uow, BagAdapter, Workspace) + lambda event: verify_package( + event, + context.uow, + BagAdapter, + Workspace, + context.file_provider + ) ], PackageVerified: [ lambda event: unpack_package( @@ -79,7 +86,7 @@ def stored_callback(event: PackageStored, uow: UnitOfWork) -> None: BagAdapter, PackageResourceProvider, Workspace, - FilesystemFileProvider(), + context.file_provider, ) ], PackageUnpacked: [lambda event: store_files(event, context.uow, Workspace)], diff --git a/tests/test_bag_adapter.py b/tests/test_bag_adapter.py index ccbbe52..3b5b2e5 100644 --- a/tests/test_bag_adapter.py +++ b/tests/test_bag_adapter.py @@ -1,22 +1,35 @@ from pathlib import Path +from typing import Callable +from dor.providers.file_system_file_provider import FilesystemFileProvider import pytest from dor.adapters.bag_adapter import BagAdapter, DorInfoMissingError, ValidationError +@pytest.fixture +def bag_adapter_instance() -> Callable[[Path], BagAdapter]: + def _create_instance(path: Path) -> BagAdapter: + return BagAdapter(path=path, file_provider=FilesystemFileProvider()) + return _create_instance + + @pytest.fixture def test_bags_path() -> Path: return Path("tests/fixtures/test_bags") -def test_passes_validation(test_bags_path: Path): + +def test_passes_validation( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_valid" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) reader.validate() -def test_fails_validation_when_dor_info_missing(test_bags_path: Path): + +def test_fails_validation_when_dor_info_missing( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_missing_dor_info" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) with pytest.raises(ValidationError) as excinfo: reader.validate() expected_message = ( @@ -25,9 +38,11 @@ def test_fails_validation_when_dor_info_missing(test_bags_path: Path): ) assert expected_message == excinfo.value.message -def test_fails_validation_when_file_has_been_modified(test_bags_path: Path): + +def test_fails_validation_when_file_has_been_modified( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_with_modified_file" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) with pytest.raises(ValidationError) as excinfo: reader.validate() expected_message = ( @@ -36,21 +51,27 @@ def test_fails_validation_when_file_has_been_modified(test_bags_path: Path): ) assert expected_message == excinfo.value.message -def test_fails_validation_when_dor_info_not_in_tagmanifest(test_bags_path: Path): + +def test_fails_validation_when_dor_info_not_in_tagmanifest( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_with_dor_info_not_in_manifest" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) with pytest.raises(ValidationError) as excinfo: reader.validate() expected_message = "dor-info.txt must be listed in the tagmanifest file." assert expected_message == excinfo.value.message -def test_read_dor_info(test_bags_path: Path): + +def test_read_dor_info( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_valid" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) assert reader.dor_info == {"Deposit-Group-Identifier": "d752b492-eb0b-4150-bcf5-b4cb74bd4a7f"} -def test_read_dor_info_when_missing(test_bags_path: Path): + +def test_read_dor_info_when_missing( + test_bags_path: Path, bag_adapter_instance: Callable[[Path], BagAdapter]): path = test_bags_path / "test_bag_missing_dor_info" - reader = BagAdapter(path) + reader = bag_adapter_instance(path) with pytest.raises(DorInfoMissingError): reader.dor_info diff --git a/tests/test_ocfl_repository_gateway.py b/tests/test_ocfl_repository_gateway.py index d44b163..718ae54 100644 --- a/tests/test_ocfl_repository_gateway.py +++ b/tests/test_ocfl_repository_gateway.py @@ -1,11 +1,10 @@ import hashlib import json -import os -import shutil from pathlib import Path from typing import Any from unittest import TestCase +from dor.providers.file_system_file_provider import FilesystemFileProvider from gateway.bundle import Bundle from gateway.coordinator import Coordinator from gateway.exceptions import ( @@ -42,10 +41,10 @@ def setUp(self): self.storage_path = Path("tests/test_storage") self.pres_storage = self.storage_path / "test_preservation_storage" self.extensions_path = self.pres_storage / "extensions" / "rocfl-staging" - + self.file_provider = FilesystemFileProvider() if self.storage_path.exists(): - shutil.rmtree(self.storage_path) - os.makedirs(self.pres_storage) + self.file_provider.delete_dir_and_contents(self.storage_path) + self.file_provider.create_directories(self.pres_storage) return super().setUp() diff --git a/tests/test_translocator.py b/tests/test_translocator.py index ede04d6..6b98b94 100644 --- a/tests/test_translocator.py +++ b/tests/test_translocator.py @@ -1,6 +1,6 @@ from pathlib import Path -import shutil +from dor.providers.file_system_file_provider import FilesystemFileProvider import pytest from dor.providers.translocator import Translocator @@ -14,15 +14,21 @@ def workspaces_path() -> Path: def inbox_path() -> Path: return Path("tests/fixtures/test_inbox") -def test_create_translocator(inbox_path, workspaces_path) -> None: +def test_create_translocator(inbox_path: Path, workspaces_path: Path) -> None: Translocator( - inbox_path=inbox_path, workspaces_path=workspaces_path, minter=lambda: "some_id" + inbox_path=inbox_path, workspaces_path=workspaces_path, minter=lambda: "some_id", file_provider=FilesystemFileProvider() ) -def test_translocator_can_create_workspace(inbox_path, workspaces_path) -> None: - shutil.rmtree(workspaces_path / "some_id", ignore_errors=True) +def test_translocator_can_create_workspace(inbox_path: Path, workspaces_path: Path) -> None: + file_provider = FilesystemFileProvider() + file_provider.delete_dir_and_contents( + Path(workspaces_path / "some_id") + ) translocator = Translocator( - inbox_path=inbox_path, workspaces_path=workspaces_path, minter=lambda: "some_id" + inbox_path=inbox_path, + workspaces_path=workspaces_path, + minter=lambda: "some_id", + file_provider=file_provider ) workspace = translocator.create_workspace_for_package("xyzzy-0001-v1") From fc5da0255d4cfeb37a6cd5e96df2b33961f1f484 Mon Sep 17 00:00:00 2001 From: Jayamala Date: Wed, 22 Jan 2025 11:06:59 -0500 Subject: [PATCH 2/8] Reverted Environment variable change --- dor/config.py | 28 ++++++++-------------- dor/providers/file_provider.py | 4 ---- dor/providers/file_system_file_provider.py | 3 --- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/dor/config.py b/dor/config.py index 868134a..b18d449 100644 --- a/dor/config.py +++ b/dor/config.py @@ -1,5 +1,5 @@ -from dor.providers.file_provider import FileProvider -from dor.providers.file_system_file_provider import FilesystemFileProvider +import os + import sqlalchemy from pydantic.dataclasses import dataclass @@ -18,22 +18,14 @@ class Config: database: DatabaseConfig @classmethod - def from_env(cls, file_provider: FileProvider): + def from_env(cls): return cls( database=DatabaseConfig( - user=file_provider.get_environment_variable( - "POSTGRES_USER", "postgres" - ), - password=file_provider.get_environment_variable( - "POSTGRES_PASSWORD", "postgres" - ), - host=file_provider.get_environment_variable("POSTGRES_HOST", "db"), - database=file_provider.get_environment_variable( - "POSTGRES_DATABSE", "dor_local" - ), - test_database=file_provider.get_environment_variable( - "POSTGRES_TEST_DATABASE", "dor_test" - ), + user=os.getenv("POSTGRES_USER", "postgres"), + password=os.getenv("POSTGRES_PASSWORD", "postgres"), + host=os.getenv("POSTGRES_HOST", "db"), + database=os.getenv("POSTGRES_DATABSE", "dor_local"), + test_database=os.getenv("POSTGRES_TEST_DATABASE", "dor_test"), ) ) @@ -43,7 +35,7 @@ def _make_database_engine_url(self, database: str): username=self.database.user, password=self.database.password, host=self.database.host, - database=database + database=database, ) return url @@ -54,4 +46,4 @@ def get_test_database_engine_url(self): return self._make_database_engine_url(self.database.test_database) -config = Config.from_env(file_provider=FilesystemFileProvider()) +config = Config.from_env() diff --git a/dor/providers/file_provider.py b/dor/providers/file_provider.py index b049709..bf6595e 100644 --- a/dor/providers/file_provider.py +++ b/dor/providers/file_provider.py @@ -23,10 +23,6 @@ def get_combined_path(self, base_path: Path, path_to_apply: str) -> Path: def clone_directory_structure(self, source_path: Path, destination_path: Path): pass - @abstractmethod - def get_environment_variable(self, env_key: str, default_value: str) -> str: - pass - @abstractmethod def delete_dir_and_contents(self, path: Path): pass diff --git a/dor/providers/file_system_file_provider.py b/dor/providers/file_system_file_provider.py index 375fb17..5e58879 100644 --- a/dor/providers/file_system_file_provider.py +++ b/dor/providers/file_system_file_provider.py @@ -25,9 +25,6 @@ def get_combined_path(self, base_path: Path, path_to_apply: str) -> Path: def clone_directory_structure(self, source_path: Path, destination_path: Path): shutil.copytree(source_path, destination_path) - def get_environment_variable(self, env_key: str, default_value: str) -> str: - return os.getenv(env_key, default_value) - def delete_dir_and_contents(self, path: Path): shutil.rmtree(path, ignore_errors=True) From 4d440cb0e64f860bee23857ce0b3ed66ffd2a876 Mon Sep 17 00:00:00 2001 From: Jayamala Date: Wed, 22 Jan 2025 14:17:22 -0500 Subject: [PATCH 3/8] format fix --- dor/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dor/config.py b/dor/config.py index b18d449..3add4cd 100644 --- a/dor/config.py +++ b/dor/config.py @@ -25,7 +25,7 @@ def from_env(cls): password=os.getenv("POSTGRES_PASSWORD", "postgres"), host=os.getenv("POSTGRES_HOST", "db"), database=os.getenv("POSTGRES_DATABSE", "dor_local"), - test_database=os.getenv("POSTGRES_TEST_DATABASE", "dor_test"), + test_database=os.getenv("POSTGRES_TEST_DATABASE", "dor_test") ) ) @@ -35,7 +35,7 @@ def _make_database_engine_url(self, database: str): username=self.database.user, password=self.database.password, host=self.database.host, - database=database, + database=database ) return url From 40d203670ebeccad2d31e7ed3df7b118c37ba2c3 Mon Sep 17 00:00:00 2001 From: Jayamala Date: Wed, 22 Jan 2025 16:57:09 -0500 Subject: [PATCH 4/8] Added Test cases to File Provider --- tests/test_file_provider.py | 80 +++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/test_file_provider.py b/tests/test_file_provider.py index ec4510f..837b878 100644 --- a/tests/test_file_provider.py +++ b/tests/test_file_provider.py @@ -1,4 +1,7 @@ +import os +import shutil from pathlib import Path +from typing import Any, Callable import pytest from dor.providers.file_system_file_provider import FilesystemFileProvider @@ -56,3 +59,80 @@ def test_apply_relative_path(base_path, path_to_apply, expected_path): def test_get_descriptor_dir(file_path, expected_dir): result = file_provider.get_descriptor_dir(file_path) assert result == expected_dir + +@pytest.mark.parametrize( + "base_path, path_to_apply, expected_result", + [ + ( + Path("/test/base/path"), + "test/path/to/apply", + Path("/test/base/path/test/path/to/apply"), + ) + ], +) +def test_get_norm_path(base_path, path_to_apply, expected_result): + assert file_provider.get_norm_path(base_path, path_to_apply) == expected_result + + +@pytest.mark.parametrize( + "base_path, path_to_apply, expected_result", + [ + ( + Path("/test/base/path"), + "test/path/to/apply", + Path("/test/base/path/test/path/to/apply"), + ) + ], +) +def test_get_combined_path(base_path, path_to_apply, expected_result): + assert file_provider.get_combined_path(base_path, path_to_apply) == expected_result + + +@pytest.fixture +def create_tmp_test_dir(): + tmp_path = Path("tests/test_dir") + tmp_path.mkdir() + yield tmp_path + + # Teardown + if tmp_path.exists(): + shutil.rmtree(tmp_path) + + +def test_delete_dir_and_contents(create_tmp_test_dir): + tmp_path = create_tmp_test_dir + + test_file = tmp_path / "test_file.txt" + test_file.write_text("Test file") + + file_provider.delete_dir_and_contents(tmp_path) + assert not test_file.exists() + + +def test_clone_directory_structure(create_tmp_test_dir): + tmp_path = create_tmp_test_dir + source_path = tmp_path / Path("source") + destination_path = tmp_path / Path("dest") + source_path.mkdir() + + test_file = source_path / "file1.txt" + test_file.write_text("Test file1") + + file_provider.clone_directory_structure(source_path, destination_path) + assert (destination_path / "file1.txt").exists() + + +def test_create_directory(create_tmp_test_dir): + tmp_path = create_tmp_test_dir + new_dir = tmp_path / "new_dir" + + file_provider.create_directory(new_dir) + assert new_dir.exists() + + +def test_create_directories(create_tmp_test_dir): + tmp_path = create_tmp_test_dir + new_dir = tmp_path / "new_dir" / "subdir" + + file_provider.create_directories(new_dir) + assert new_dir.exists() From 1d516224bec9ece9d101237f6b7d96f22fe44fbb Mon Sep 17 00:00:00 2001 From: Jayamala Date: Mon, 27 Jan 2025 14:27:36 -0500 Subject: [PATCH 5/8] Removed Store_resource --- features/steps/store_resource.py | 265 ------------------------------- 1 file changed, 265 deletions(-) delete mode 100644 features/steps/store_resource.py diff --git a/features/steps/store_resource.py b/features/steps/store_resource.py deleted file mode 100644 index c2440b6..0000000 --- a/features/steps/store_resource.py +++ /dev/null @@ -1,265 +0,0 @@ -import uuid -from datetime import UTC, datetime -from pathlib import Path -from typing import Callable, Type - -from behave import given, when, then - -from dor.adapters.bag_adapter import BagAdapter -from dor.domain.events import ( - Event, - PackageReceived, - PackageStored, - PackageSubmitted, - PackageVerified, - PackageUnpacked, -) -from dor.providers.file_system_file_provider import FilesystemFileProvider -from dor.providers.translocator import Translocator, Workspace -from dor.providers.package_resource_provider import PackageResourceProvider -from dor.service_layer.handlers.store_files import store_files -from dor.service_layer.message_bus.memory_message_bus import MemoryMessageBus -from dor.service_layer.unit_of_work import UnitOfWork -from gateway.ocfl_repository_gateway import OcflRepositoryGateway -from dor.service_layer.handlers.receive_package import receive_package -from dor.service_layer.handlers.verify_package import verify_package -from dor.service_layer.handlers.unpack_package import unpack_package -from dor.providers.models import ( - Agent, - FileMetadata, - FileReference, - PackageResource, - PreservationEvent, - AlternateIdentifier, - StructMap, - StructMapItem, - StructMapType, -) - - -class FakePackageResourceProvider: - - def __init__(self, path: Path): - self.path = path - - def get_resources(self): - return [ - PackageResource( - id=uuid.UUID("00000000-0000-0000-0000-000000000001"), - type="Monograph", - alternate_identifier=AlternateIdentifier( - id="xyzzy:00000001", type="DLXS" - ), - events=[ - PreservationEvent( - identifier="e01727d0-b4d9-47a5-925a-4018f9cac6b8", - type="ingest", - datetime=datetime(1983, 5, 17, 11, 9, 45, tzinfo=UTC), - detail="Girl voice lot another blue nearly.", - agent=Agent( - address="matthew24@example.net", role="collection manager" - ), - ) - ], - metadata_files=[ - FileMetadata( - id="_0193972b-e591-7e28-b8cb-1babed52f606", - use="DESCRIPTIVE/COMMON", - ref=FileReference( - locref="../metadata/00000000-0000-0000-0000-000000000001.common.json", - mdtype="DOR:SCHEMA", - mimetype="application/json", - ), - ), - FileMetadata( - id="_0193972b-e592-7647-8e51-10db514433f7", - use="DESCRIPTIVE", - ref=FileReference( - locref="../metadata/00000000-0000-0000-0000-000000000001.metadata.json", - mdtype="DOR:SCHEMA", - mimetype="application/json", - ), - ), - FileMetadata( - id="RIGHTS1", - use="RIGHTS", - ref=FileReference( - locref="https://creativecommons.org/publicdomain/zero/1.0/", - mdtype="OTHER", - ), - ), - ], - struct_maps=[ - StructMap( - id="SM1", - type=StructMapType.PHYSICAL, - items=[ - StructMapItem( - order=1, - type="page", - label="Page 1", - asset_id="urn:dor:00000000-0000-0000-0000-000000001001", - ), - StructMapItem( - order=2, - type="page", - label="Page 2", - asset_id="urn:dor:00000000-0000-0000-0000-000000001002", - ), - ], - ) - ], - ), - PackageResource( - id=uuid.UUID("00000000-0000-0000-0000-000000001001"), - type="Asset", - alternate_identifier=AlternateIdentifier( - id="xyzzy:00000001:00000001", type="DLXS" - ), - events=[ - PreservationEvent( - identifier="81388465-aefd-4a3d-ba99-a868d062b92e", - type="generate access derivative", - datetime=datetime(2005, 8, 22, 22, 54, 45, tzinfo=UTC), - detail="Method south agree until.", - agent=Agent( - address="rguzman@example.net", role="image processing" - ), - ), - PreservationEvent( - identifier="d53540b9-cd23-4e92-9dff-4b28bf050b26", - type="extract text", - datetime=datetime(2006, 8, 23, 16, 21, 57, tzinfo=UTC), - detail="Hear thus part probably that.", - agent=Agent( - address="kurt16@example.org", role="ocr processing" - ), - ), - ], - metadata_files=[ - FileMetadata( - id="_0193972b-e4a4-7985-abe2-f3f1259b78ec", - use="TECHNICAL", - ref=FileReference( - locref="../metadata/00000001.source.jpg.mix.xml", - mdtype="NISOIMG", - ), - ), - FileMetadata( - id="_0193972b-e4ae-73eb-848d-5f8893b68253", - use="TECHNICAL", - ref=FileReference( - locref="../metadata/00000001.access.jpg.mix.xml", - mdtype="NISOIMG", - ), - ), - FileMetadata( - id="_0193972b-e572-7107-b69c-e2f4c660a9aa", - use="TECHNICAL", - ref=FileReference( - locref="../metadata/00000001.plaintext.txt.textmd.xml", - mdtype="TEXTMD", - ), - ), - ], - data_files=[ - FileMetadata( - id="_be653ff450ae7f3520312a53e56c00bc", - mdid="_0193972b-e4a4-7985-abe2-f3f1259b78ec", - use="SOURCE", - ref=FileReference( - locref="../data/00000001.source.jpg", - mimetype="image/jpeg", - ), - ), - FileMetadata( - id="_7e923d9c33b3859e1327fa53a8e609a1", - groupid="_be653ff450ae7f3520312a53e56c00bc", - mdid="_0193972b-e4ae-73eb-848d-5f8893b68253", - use="ACCESS", - ref=FileReference( - locref="../data/00000001.access.jpg", - mimetype="image/jpeg", - ), - ), - FileMetadata( - id="_764ba9761fbc6cbf0462d28d19356148", - groupid="_be653ff450ae7f3520312a53e56c00bc", - mdid="_0193972b-e572-7107-b69c-e2f4c660a9aa", - use="SOURCE", - ref=FileReference( - locref="../data/00000001.plaintext.txt", - mimetype="text/plain", - ), - ), - ], - ), - ] - - -# Test - - -@given("a package containing the scanned pages, OCR, and metadata") -def step_impl(context) -> None: - inbox = Path("./features/fixtures/inbox") - storage = Path("./features/scratch/storage") - workspaces = Path("./features/scratch/workspaces") - context.file_provider =FilesystemFileProvider() - - value = "55ce2f63-c11a-4fac-b3a9-160305b1a0c4" - - context.file_provider.delete_dir_and_contents( - Path(f"./features/scratch/workspaces/{value}") - ) - context.file_provider.delete_dir_and_contents(storage) - context.file_provider.create_directory(storage) - - gateway = OcflRepositoryGateway(storage_path=storage) - gateway.create_repository() - context.uow = UnitOfWork(gateway=gateway) - - context.translocator = Translocator( - inbox_path=inbox, workspaces_path=workspaces, minter=lambda: value - ) - - def stored_callback(event: PackageStored, uow: UnitOfWork) -> None: - context.stored_event = event - - handlers: dict[Type[Event], list[Callable]] = { - PackageSubmitted: [ - lambda event: receive_package(event, context.uow, context.translocator) - ], - PackageReceived: [ - lambda event: verify_package(event, context.uow, BagAdapter, Workspace) - ], - PackageVerified: [ - lambda event: unpack_package( - event, - context.uow, - BagAdapter, - PackageResourceProvider, - Workspace, - FilesystemFileProvider(), - ) - ], - PackageUnpacked: [lambda event: store_files(event, context.uow, Workspace)], - PackageStored: [lambda event: stored_callback(event, context.uow)], - } - context.message_bus = MemoryMessageBus(handlers) - - -@when("the Collection Manager places the packaged resource in the incoming location") -def step_impl(context): - submission_id = "xyzzy-0001-v1" - - event = PackageSubmitted(package_identifier=submission_id) - context.message_bus.handle(event, context.uow) - - -@then("the Collection Manager can see that it was preserved.") -def step_impl(context): - event = context.stored_event - assert event.identifier == "00000000-0000-0000-0000-000000000001" - assert context.uow.gateway.has_object(event.identifier) - From 0462dd28c92856f912ee0403a4b031e66a8b4be4 Mon Sep 17 00:00:00 2001 From: Jayamala Date: Mon, 27 Jan 2025 14:55:30 -0500 Subject: [PATCH 6/8] test has been modified to use file provider --- features/steps/test_store_resource.py | 12 ++++--- tests/test_ocfl_validate.py | 46 ++++++++++++++------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/features/steps/test_store_resource.py b/features/steps/test_store_resource.py index 6a89736..8ba9b9d 100644 --- a/features/steps/test_store_resource.py +++ b/features/steps/test_store_resource.py @@ -70,7 +70,8 @@ def message_bus(path_data: PathData, unit_of_work: AbstractUnitOfWork) -> Memory translocator = Translocator( inbox_path=path_data.inbox, workspaces_path=path_data.workspaces, - minter = lambda: value + minter = lambda: value, + file_provider=FilesystemFileProvider() ) handlers: dict[Type[Event], list[Callable]] = { @@ -107,10 +108,11 @@ def test_store_resource(): @given(u'a package containing the scanned pages, OCR, and metadata') def _(path_data: PathData, unit_of_work: AbstractUnitOfWork): - shutil.rmtree(path=path_data.scratch, ignore_errors = True) - os.mkdir(path_data.scratch) - os.mkdir(path_data.storage) - os.mkdir(path_data.workspaces) + file_provider = FilesystemFileProvider() + file_provider.delete_dir_and_contents(path=path_data.scratch) + file_provider.create_directory(path_data.scratch) + file_provider.create_directory(path_data.storage) + file_provider.create_directory(path_data.workspaces) unit_of_work.gateway.create_repository() diff --git a/tests/test_ocfl_validate.py b/tests/test_ocfl_validate.py index 6985b11..fe88f2d 100644 --- a/tests/test_ocfl_validate.py +++ b/tests/test_ocfl_validate.py @@ -4,6 +4,7 @@ import unittest from unittest.mock import patch, MagicMock, mock_open +from dor.providers.file_system_file_provider import FilesystemFileProvider from gateway.validate import FixityValidator, RocflOCFLFixityValidator class TestRocflOCFLFixityValidator(unittest.TestCase): @@ -12,14 +13,15 @@ def setUp(self): self.fixture_path = Path("tests/fixtures/test_ocfl_repo") self.repository_path = str(self.fixture_path) self.validator = RocflOCFLFixityValidator(repository_path=self.fixture_path) + self.file_provider = FilesystemFileProvider() # Common patches - self.mock_makedirs = patch('os.makedirs').start() + self.mock_makedirs = patch.object(self.file_provider, "create_directories").start() self.mock_open = patch('builtins.open', mock_open(read_data='{"content": [{"sha1": "some_sha1_hash_value", "sha512": "some_sha512_hash_value"}]}')).start() - self.mock_rmtree = patch('shutil.rmtree').start() + self.mock_rmtree = patch.object(self.file_provider, "delete_dir_and_contents").start() self.mock_run = patch('subprocess.run').start() - - #calling protected method + + # calling protected method self.build_command = getattr(self.validator, '_build_command') def tearDown(self): @@ -29,7 +31,7 @@ def tearDown(self): def test_validate_repository_success(self): # Mock the return value for subprocess.run self.mock_run.return_value = MagicMock(stdout="Valid repository.\n", returncode=0) - + # Run the validation result = self.validator.validate_repository() @@ -49,7 +51,7 @@ def test_validate_repository_with_flags(self): def test_validate_object_success(self): self.mock_run.return_value = MagicMock(stdout="Object urn:example:rocfl:object-1 is valid.\n", returncode=0) result = self.validator.validate_objects(["urn:example:rocfl:object-1"]) - + self.mock_run.assert_called_once_with(['rocfl', '-r', self.repository_path, 'validate', 'urn:example:rocfl:object-1'], capture_output=True, text=True, check=True) self.assertEqual(result, "Object urn:example:rocfl:object-1 is valid.\n") @@ -83,7 +85,7 @@ def test_validate_object_with_flags(self): def test_validate_multiple_objects_success(self): self.mock_run.return_value = MagicMock(stdout="Valid object object-1/\nValid object object-2/\n", returncode=0) result = self.validator.validate_multiple_objects_by_path(["object-1/", "object-2/"]) - + self.mock_run.assert_called_once_with([ 'rocfl', '-r', self.repository_path, 'validate', '-p', 'object-1/', 'object-2/' ], capture_output=True, text=True, check=True) @@ -91,7 +93,7 @@ def test_validate_multiple_objects_success(self): def test_validate_multiple_objects_with_flags(self): self.mock_run.return_value = MagicMock(stdout="Valid object object-1/\nValid object object-2/\n", returncode=0) - + # Simulate the flags and call the function result = self.validator.validate_multiple_objects_by_path( ["object-1/", "object-2/"], no_fixity=True, log_level="Error", suppress_warning="ObjectMissing" @@ -108,20 +110,20 @@ def test_validate_multiple_objects_with_flags(self): def test_validate_invalid_object(self): self.mock_run.return_value = MagicMock(stdout="Error: Object urn:example:rocfl:object-3 not found.\n", returncode=1) result = self.validator.validate_objects(["urn:example:rocfl:object-3"]) - + self.mock_run.assert_called_once_with(['rocfl', '-r', self.repository_path, 'validate', 'urn:example:rocfl:object-3'], capture_output=True, text=True, check=True) self.assertEqual(result, "Error: Object urn:example:rocfl:object-3 not found.\n") def test_fixity_check_failure(self): # Simulate a fixity check failure by changing the content and hashing new_sha512 = hashlib.sha512(b"modified content").hexdigest() - + self.mock_run.return_value = MagicMock( stdout=f"Error: Content fixity check failed for urn:example:rocfl:object-1 v1.\nExpected SHA-512 hash: dummy_sha512_hash_value_here\nActual SHA-512 hash: {new_sha512}\n", returncode=1 ) result = self.validator.validate_objects(["urn:example:rocfl:object-1"]) - + self.mock_run.assert_called_once_with(['rocfl', '-r', self.repository_path, 'validate', 'urn:example:rocfl:object-1'], capture_output=True, text=True, check=True) self.assertIn("Error: Content fixity check failed", result) self.assertIn("Expected SHA-512 hash", result) @@ -133,7 +135,7 @@ def test_validate_repo_trigger_warning_W004(self): stdout="Valid repository.\nWarning W004: 'For content-addressing, OCFL Objects SHOULD use sha512.'\n", returncode=0 ) - + result = self.validator.validate_repository() self.mock_run.assert_called_once_with([ @@ -173,7 +175,7 @@ def test_validate_object_trigger_warning_W004(self): self.assertIn("Warning W004:", result) self.assertIn("For content-addressing, OCFL Objects SHOULD use sha512.", result) - + def test_validate_object_suppress_warning_W004(self): # Simulate the suppression of warning W004 self.mock_run.return_value = MagicMock( @@ -182,7 +184,7 @@ def test_validate_object_suppress_warning_W004(self): ) result = self.validator.validate_objects(object_ids=["urn:example:rocfl:object-1"], suppress_warning="W004") - # Assert + # Assert self.mock_run.assert_called_once_with([ 'rocfl', '-r', self.repository_path, 'validate', 'urn:example:rocfl:object-1', '-w', 'W004' ], capture_output=True, text=True, check=True) @@ -194,39 +196,39 @@ def test_validate_object_suppress_warning_W004(self): def test_build_command_no_flags(self): base_command = ['rocfl', '-r', self.repository_path, 'validate'] result = self.build_command(base_command, no_fixity=False, log_level=None, suppress_warning=None) - + # Assert that no flags are added self.assertEqual(result, ['rocfl', '-r', self.repository_path, 'validate']) def test_build_command_with_no_fixity(self): base_command = ['rocfl', '-r', self.repository_path, 'validate'] result = self.build_command(base_command, no_fixity=True, log_level=None, suppress_warning=None) - + # Assert that '-n' flag is added self.assertEqual(result, ['rocfl', '-r', self.repository_path, 'validate', '-n']) def test_build_command_with_log_level(self): base_command = ['rocfl', '-r', self.repository_path, 'validate'] result = self.build_command(base_command, no_fixity=False, log_level='Error', suppress_warning=None) - + # Assert that '-l Error' is added self.assertEqual(result, ['rocfl', '-r', self.repository_path, 'validate', '-l', 'Error']) def test_build_command_with_suppress_warning(self): base_command = ['rocfl', '-r', self.repository_path, 'validate'] result = self.build_command(base_command, no_fixity=False, log_level=None, suppress_warning='Warning123') - + # Assert that '-w Warning123' is added self.assertEqual(result, ['rocfl', '-r', self.repository_path, 'validate', '-w', 'Warning123']) def test_build_command_with_all_flags(self): base_command = ['rocfl', '-r', self.repository_path, 'validate'] result = self.build_command(base_command, no_fixity=True, log_level='Error', suppress_warning='Warning123') - + # Assert that all flags are correctly added self.assertEqual(result, ['rocfl', '-r', self.repository_path, 'validate', '-n', '-l', 'Error', '-w', 'Warning123']) - - + + class TestFixityValidator(unittest.TestCase): def test_validator_checks_object_fixity(self): self.mockrocflvalidator = MagicMock(spec = RocflOCFLFixityValidator) @@ -237,7 +239,7 @@ def test_validator_checks_object_fixity(self): self.mockrocflvalidator.validate_objects.assert_called_once_with(["object-1"]) self.assertTrue(result.is_valid) self.assertEqual("Object object-1 is valid", result.message) - + class RocflOCFLFixityValidatorIntegrationTest(unittest.TestCase): def setUp(self): # Link to fixtures -> https://github.com/OCFL/fixtures From cd7f1c3b76a90dbb5135a3d8dcfe26091191ec72 Mon Sep 17 00:00:00 2001 From: Jayamala Date: Tue, 28 Jan 2025 15:15:37 -0500 Subject: [PATCH 7/8] Refactor get_combined_path to use Path operator --- dor/providers/file_system_file_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dor/providers/file_system_file_provider.py b/dor/providers/file_system_file_provider.py index 5e58879..0670a97 100644 --- a/dor/providers/file_system_file_provider.py +++ b/dor/providers/file_system_file_provider.py @@ -20,7 +20,7 @@ def get_norm_path(self, base_path: Path, path_to_apply: str) -> Path: return resolved_combined_path def get_combined_path(self, base_path: Path, path_to_apply: str) -> Path: - return Path(os.path.join(base_path, path_to_apply)) + return base_path / path_to_apply def clone_directory_structure(self, source_path: Path, destination_path: Path): shutil.copytree(source_path, destination_path) From d00ef3b9be3e93fc19e01ecc77625585a0a5c23c Mon Sep 17 00:00:00 2001 From: Jayamala Date: Fri, 31 Jan 2025 15:15:14 -0500 Subject: [PATCH 8/8] Update PackageReceived handler to include file_provider in verification process --- dor/cli/repo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dor/cli/repo.py b/dor/cli/repo.py index 2a92df6..fb182db 100644 --- a/dor/cli/repo.py +++ b/dor/cli/repo.py @@ -51,7 +51,7 @@ def bootstrap() -> Tuple[MemoryMessageBus, SqlalchemyUnitOfWork]: handlers: dict[Type[Event], list[Callable]] = { PackageSubmitted: [lambda event: receive_package(event, uow, translocator)], - PackageReceived: [lambda event: verify_package(event, uow, BagAdapter, Workspace)], + PackageReceived: [lambda event: verify_package(event, uow, BagAdapter, Workspace, file_provider)], PackageVerified: [ lambda event: unpack_package( event, uow, BagAdapter, PackageResourceProvider, Workspace, file_provider