Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes related to File Provider #30

Merged
merged 9 commits into from
Jan 31, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into dor-file-provider-change
  • Loading branch information
jayamala17 committed Jan 27, 2025
commit 0c148e4ad3261159a3c0d936ba595b13d8c43aa0
21 changes: 21 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
name: Continuous Integration

on:
workflow_dispatch:
push:
pull_request:
branches:
- 'main'

jobs:
ci:
if: ${{ github.event_name == 'push' || github.event.pull_request.merged == true }}

runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run tests
run: |
./init.sh
docker compose run --rm app poetry run pytest
19 changes: 19 additions & 0 deletions .github/workflows/scratch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#Scratch workflow placeholder to create new workflows from a branch other then main
name: Scratch

on:
workflow_dispatch:
inputs:
tag:
description: tag
required: true

jobs:
get_short_tag:
name: get-short-tag
runs-on: ubuntu-latest
steps:
- name: save short tag to environment
run: echo "short_tag=$(echo ${{ github.event.inputs.tag }} | head -c 8 )" >> $GITHUB_ENV
- name: echo env var
run: echo "short_tag ${short_tag}"
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -19,8 +19,5 @@ output/
tests/test_storage
tests/test_workspaces

features/scratch/*

features/scratch/storage/*
!features/scratch/storage/.keep
features/scratch/workspaces/*
!features/scratch/workspaces/.keep
21 changes: 19 additions & 2 deletions dor/adapters/catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# from dor.domain.models import Bin
from abc import ABC, abstractmethod
from dor.domain import models

import uuid
@@ -46,7 +47,23 @@ class Bin(Base):
# DateTime(timezone=True), server_default=func.now()
# )

class MemoryCatalog:

class Catalog(ABC):

@abstractmethod
def add(self, bin: models.Bin):
raise NotImplementedError

@abstractmethod
def get(self, identifier: str):
raise NotImplementedError

@abstractmethod
def get_by_alternate_identifier(self, identifier: str):
raise NotImplementedError


class MemoryCatalog(Catalog):
def __init__(self):
self.bins = []

@@ -66,7 +83,7 @@ def get_by_alternate_identifier(self, identifier):
return None


class SqlalchemyCatalog:
class SqlalchemyCatalog(Catalog):

def __init__(self, session):
self.session = session
2 changes: 2 additions & 0 deletions dor/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import typer
import dor.cli.samples as samples
import dor.cli.repo as repo

app = typer.Typer()
app.add_typer(samples.app, name="samples")
app.add_typer(repo.app, name="repo")


if __name__ == "__main__": # pragma: no cover
85 changes: 85 additions & 0 deletions dor/cli/repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import uuid
from typing import Callable, Type, Tuple

import typer
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from dor.adapters.bag_adapter import BagAdapter
from dor.adapters.catalog import Base, _custom_json_serializer
from dor.config import config
from dor.domain.events import (
Event,
BinCataloged,
PackageReceived,
PackageStored,
PackageSubmitted,
PackageUnpacked,
PackageVerified,
)
from dor.providers.file_system_file_provider import FilesystemFileProvider
from dor.providers.package_resource_provider import PackageResourceProvider
from dor.providers.translocator import Translocator, Workspace
from dor.service_layer.handlers.catalog_bin import catalog_bin
from dor.service_layer.handlers.receive_package import receive_package
from dor.service_layer.handlers.store_files import store_files
from dor.service_layer.handlers.unpack_package import unpack_package
from dor.service_layer.handlers.verify_package import verify_package
from dor.service_layer.message_bus.memory_message_bus import MemoryMessageBus
from dor.service_layer.unit_of_work import SqlalchemyUnitOfWork
from gateway.ocfl_repository_gateway import OcflRepositoryGateway


app = typer.Typer()


def bootstrap() -> Tuple[MemoryMessageBus, SqlalchemyUnitOfWork]:
gateway = OcflRepositoryGateway(storage_path=config.storage_path)

engine = create_engine(
config.get_database_engine_url(), json_serializer=_custom_json_serializer
)
session_factory = sessionmaker(bind=engine)
uow = SqlalchemyUnitOfWork(gateway=gateway, session_factory=session_factory)

translocator = Translocator(
inbox_path=config.inbox_path,
workspaces_path=config.workspaces_path,
minter = lambda: str(uuid.uuid4())
)
file_provider = FilesystemFileProvider()

handlers: dict[Type[Event], list[Callable]] = {
PackageSubmitted: [lambda event: receive_package(event, uow, translocator)],
PackageReceived: [lambda event: verify_package(event, uow, BagAdapter, Workspace)],
PackageVerified: [
lambda event: unpack_package(
event, uow, BagAdapter, PackageResourceProvider, Workspace, file_provider
)
],
PackageUnpacked: [lambda event: store_files(event, uow, Workspace)],
PackageStored: [lambda event: catalog_bin(event, uow)],
BinCataloged: []
}
message_bus = MemoryMessageBus(handlers)
return (message_bus, uow)


@app.command()
def initialize():
gateway = OcflRepositoryGateway(storage_path=config.storage_path)
gateway.create_repository()

engine = create_engine(
config.get_database_engine_url(), json_serializer=_custom_json_serializer
)
Base.metadata.create_all(engine)


@app.command()
def store(
package_identifier: str = typer.Option(help="Name of the package directory"),
):
message_bus, uow = bootstrap()
event = PackageSubmitted(package_identifier=package_identifier)
message_bus.handle(event, uow)
7 changes: 7 additions & 0 deletions dor/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pathlib import Path

import sqlalchemy
from pydantic.dataclasses import dataclass
@@ -15,11 +16,17 @@ class DatabaseConfig:

@dataclass
class Config:
storage_path: Path
inbox_path: Path
workspaces_path: Path
database: DatabaseConfig

@classmethod
def from_env(cls):
return cls(
storage_path=Path(os.getenv("STORAGE_PATH", "")),
inbox_path=Path(os.getenv("INBOX_PATH", "")),
workspaces_path=Path(os.getenv("WORKSPACES_PATH", "")),
database=DatabaseConfig(
user=os.getenv("POSTGRES_USER", "postgres"),
password=os.getenv("POSTGRES_PASSWORD", "postgres"),
9 changes: 8 additions & 1 deletion dor/domain/events.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
from typing import Any

from dor.domain.models import VersionInfo
from dor.providers.models import PackageResource


@dataclass
@@ -38,7 +39,7 @@ class PackageNotVerified(Event):
@dataclass
class PackageUnpacked(Event):
identifier: str
resources: list[Any]
resources: list[PackageResource]
tracking_identifier: str
version_info: VersionInfo
workspace_identifier: str
@@ -49,3 +50,9 @@ class PackageUnpacked(Event):
class PackageStored(Event):
identifier: str
tracking_identifier: str
resources: list[PackageResource]

@dataclass
class BinCataloged(Event):
identifier: str
tracking_identifier: str
32 changes: 32 additions & 0 deletions dor/service_layer/handlers/catalog_bin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import json
from pathlib import Path

from dor.domain.events import PackageStored, BinCataloged
from dor.domain.models import Bin
from dor.service_layer.unit_of_work import AbstractUnitOfWork

def catalog_bin(event: PackageStored, uow: AbstractUnitOfWork) -> None:
root_resource = [resource for resource in event.resources if resource.type == 'Monograph'][0]
common_metadata_file = [
metadata_file for metadata_file in root_resource.metadata_files if "common" in metadata_file.ref.locref
][0]
common_metadata_file_path = Path(common_metadata_file.ref.locref)
object_files = uow.gateway.get_object_files(event.identifier)
matching_object_file = [
object_file for object_file in object_files if common_metadata_file_path == object_file.logical_path
][0]
literal_common_metadata_path = matching_object_file.literal_path
common_metadata = json.loads(literal_common_metadata_path.read_text())

bin = Bin(
identifier=event.identifier,
alternate_identifiers=[root_resource.alternate_identifier.id],
common_metadata=common_metadata,
package_resources=event.resources
)
with uow:
uow.catalog.add(bin)
uow.commit()

uow.add_event(BinCataloged(identifier=event.identifier, tracking_identifier=event.tracking_identifier))

4 changes: 2 additions & 2 deletions dor/service_layer/handlers/receive_package.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any

from dor.domain.events import PackageSubmitted, PackageReceived
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork

def receive_package(event: PackageSubmitted, uow: UnitOfWork, translocator: Any) -> None:
def receive_package(event: PackageSubmitted, uow: AbstractUnitOfWork, translocator: Any) -> None:
workspace = translocator.create_workspace_for_package(event.package_identifier)

received_event = PackageReceived(
8 changes: 5 additions & 3 deletions dor/service_layer/handlers/store_files.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
from dor.domain.events import PackageStored, PackageUnpacked
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork


def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type) -> None:
def store_files(event: PackageUnpacked, uow: AbstractUnitOfWork, workspace_class: type) -> None:
workspace = workspace_class(event.workspace_identifier, event.identifier)

entries: list[Path] = []
@@ -24,6 +24,8 @@ def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type)
)

stored_event = PackageStored(
identifier=event.identifier, tracking_identifier=event.tracking_identifier
identifier=event.identifier,
tracking_identifier=event.tracking_identifier,
resources=event.resources
)
uow.add_event(stored_event)
4 changes: 2 additions & 2 deletions dor/service_layer/handlers/unpack_package.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from dor.domain.events import PackageUnpacked, PackageVerified
from dor.domain.models import VersionInfo
from dor.providers.file_provider import FileProvider
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork
from gateway.coordinator import Coordinator


def unpack_package(
event: PackageVerified,
uow: UnitOfWork,
uow: AbstractUnitOfWork,
bag_adapter_class: type,
package_resource_provider_class: type,
workspace_class: type,
5 changes: 2 additions & 3 deletions dor/service_layer/handlers/verify_package.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from dor.adapters.bag_adapter import ValidationError
from dor.domain.events import PackageNotVerified, PackageReceived, PackageVerified
from dor.providers.file_provider import FileProvider
from dor.service_layer.unit_of_work import UnitOfWork

from dor.service_layer.unit_of_work import AbstractUnitOfWork

def verify_package(
event: PackageReceived, uow: UnitOfWork, bag_adapter_class: type, workspace_class: type, file_provider: FileProvider
event: PackageReceived, uow: AbstractUnitOfWork, bag_adapter_class: type, workspace_class: type, file_provider: FileProvider
) -> None:
workspace = workspace_class(event.workspace_identifier)

6 changes: 3 additions & 3 deletions dor/service_layer/message_bus/memory_message_bus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Callable, Type
from dor.domain.events import Event
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork


class MemoryMessageBus:
@@ -13,14 +13,14 @@ def register_event_handler(self, event_type: Type[Event], handler: Callable):
self.event_handlers[event_type] = []
self.event_handlers[event_type].append(handler)

def handle(self, message, uow: UnitOfWork):
def handle(self, message, uow: AbstractUnitOfWork):
# Handles a message, which must be an event.
if isinstance(message, Event):
self._handle_event(message, uow)
else:
raise ValueError(f"Message of type {type(message)} is not a valid Event")

def _handle_event(self, event: Event, uow: UnitOfWork):
def _handle_event(self, event: Event, uow: AbstractUnitOfWork):
# Handles an event by executing its registered handlers.
if event.__class__ not in self.event_handlers:
raise NoHandlerForEventError(f"No handler found for event type {type(event)}")
4 changes: 3 additions & 1 deletion dor/service_layer/unit_of_work.py
Original file line number Diff line number Diff line change
@@ -3,13 +3,15 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from dor.adapters.catalog import MemoryCatalog, SqlalchemyCatalog, _custom_json_serializer
from dor.adapters.catalog import Catalog, MemoryCatalog, SqlalchemyCatalog, _custom_json_serializer
from dor.config import config
from dor.domain.events import Event
from gateway.repository_gateway import RepositoryGateway


class AbstractUnitOfWork(ABC):
catalog: Catalog
gateway: RepositoryGateway

@abstractmethod
def __enter__(self):
4 changes: 4 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
@@ -9,3 +9,7 @@ POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DATABASE=dor_local
POSTGRES_HOST=db

STORAGE_PATH=
INBOX_PATH=
WORKSPACES_PATH=
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.