Skip to content

Commit

Permalink
Merge branch 'main' into dor-file-provider-change
Browse files Browse the repository at this point in the history
  • Loading branch information
jayamala17 committed Jan 27, 2025
2 parents 40d2036 + e58c466 commit 0c148e4
Show file tree
Hide file tree
Showing 23 changed files with 359 additions and 136 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
name: Continuous Integration

on:
workflow_dispatch:
push:
pull_request:
branches:
- 'main'

jobs:
ci:
if: ${{ github.event_name == 'push' || github.event.pull_request.merged == true }}

runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run tests
run: |
./init.sh
docker compose run --rm app poetry run pytest
19 changes: 19 additions & 0 deletions .github/workflows/scratch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#Scratch workflow placeholder to create new workflows from a branch other then main
name: Scratch

on:
workflow_dispatch:
inputs:
tag:
description: tag
required: true

jobs:
get_short_tag:
name: get-short-tag
runs-on: ubuntu-latest
steps:
- name: save short tag to environment
run: echo "short_tag=$(echo ${{ github.event.inputs.tag }} | head -c 8 )" >> $GITHUB_ENV
- name: echo env var
run: echo "short_tag ${short_tag}"
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@ output/
tests/test_storage
tests/test_workspaces

features/scratch/*

features/scratch/storage/*
!features/scratch/storage/.keep
features/scratch/workspaces/*
!features/scratch/workspaces/.keep
21 changes: 19 additions & 2 deletions dor/adapters/catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# from dor.domain.models import Bin
from abc import ABC, abstractmethod
from dor.domain import models

import uuid
Expand Down Expand Up @@ -46,7 +47,23 @@ class Bin(Base):
# DateTime(timezone=True), server_default=func.now()
# )

class MemoryCatalog:

class Catalog(ABC):

@abstractmethod
def add(self, bin: models.Bin):
raise NotImplementedError

@abstractmethod
def get(self, identifier: str):
raise NotImplementedError

@abstractmethod
def get_by_alternate_identifier(self, identifier: str):
raise NotImplementedError


class MemoryCatalog(Catalog):
def __init__(self):
self.bins = []

Expand All @@ -66,7 +83,7 @@ def get_by_alternate_identifier(self, identifier):
return None


class SqlalchemyCatalog:
class SqlalchemyCatalog(Catalog):

def __init__(self, session):
self.session = session
Expand Down
2 changes: 2 additions & 0 deletions dor/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import typer
import dor.cli.samples as samples
import dor.cli.repo as repo

app = typer.Typer()
app.add_typer(samples.app, name="samples")
app.add_typer(repo.app, name="repo")


if __name__ == "__main__": # pragma: no cover
Expand Down
85 changes: 85 additions & 0 deletions dor/cli/repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import uuid
from typing import Callable, Type, Tuple

import typer
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from dor.adapters.bag_adapter import BagAdapter
from dor.adapters.catalog import Base, _custom_json_serializer
from dor.config import config
from dor.domain.events import (
Event,
BinCataloged,
PackageReceived,
PackageStored,
PackageSubmitted,
PackageUnpacked,
PackageVerified,
)
from dor.providers.file_system_file_provider import FilesystemFileProvider
from dor.providers.package_resource_provider import PackageResourceProvider
from dor.providers.translocator import Translocator, Workspace
from dor.service_layer.handlers.catalog_bin import catalog_bin
from dor.service_layer.handlers.receive_package import receive_package
from dor.service_layer.handlers.store_files import store_files
from dor.service_layer.handlers.unpack_package import unpack_package
from dor.service_layer.handlers.verify_package import verify_package
from dor.service_layer.message_bus.memory_message_bus import MemoryMessageBus
from dor.service_layer.unit_of_work import SqlalchemyUnitOfWork
from gateway.ocfl_repository_gateway import OcflRepositoryGateway


app = typer.Typer()


def bootstrap() -> Tuple[MemoryMessageBus, SqlalchemyUnitOfWork]:
gateway = OcflRepositoryGateway(storage_path=config.storage_path)

engine = create_engine(
config.get_database_engine_url(), json_serializer=_custom_json_serializer
)
session_factory = sessionmaker(bind=engine)
uow = SqlalchemyUnitOfWork(gateway=gateway, session_factory=session_factory)

translocator = Translocator(
inbox_path=config.inbox_path,
workspaces_path=config.workspaces_path,
minter = lambda: str(uuid.uuid4())
)
file_provider = FilesystemFileProvider()

handlers: dict[Type[Event], list[Callable]] = {
PackageSubmitted: [lambda event: receive_package(event, uow, translocator)],
PackageReceived: [lambda event: verify_package(event, uow, BagAdapter, Workspace)],
PackageVerified: [
lambda event: unpack_package(
event, uow, BagAdapter, PackageResourceProvider, Workspace, file_provider
)
],
PackageUnpacked: [lambda event: store_files(event, uow, Workspace)],
PackageStored: [lambda event: catalog_bin(event, uow)],
BinCataloged: []
}
message_bus = MemoryMessageBus(handlers)
return (message_bus, uow)


@app.command()
def initialize():
gateway = OcflRepositoryGateway(storage_path=config.storage_path)
gateway.create_repository()

engine = create_engine(
config.get_database_engine_url(), json_serializer=_custom_json_serializer
)
Base.metadata.create_all(engine)


@app.command()
def store(
package_identifier: str = typer.Option(help="Name of the package directory"),
):
message_bus, uow = bootstrap()
event = PackageSubmitted(package_identifier=package_identifier)
message_bus.handle(event, uow)
7 changes: 7 additions & 0 deletions dor/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pathlib import Path

import sqlalchemy
from pydantic.dataclasses import dataclass
Expand All @@ -15,11 +16,17 @@ class DatabaseConfig:

@dataclass
class Config:
storage_path: Path
inbox_path: Path
workspaces_path: Path
database: DatabaseConfig

@classmethod
def from_env(cls):
return cls(
storage_path=Path(os.getenv("STORAGE_PATH", "")),
inbox_path=Path(os.getenv("INBOX_PATH", "")),
workspaces_path=Path(os.getenv("WORKSPACES_PATH", "")),
database=DatabaseConfig(
user=os.getenv("POSTGRES_USER", "postgres"),
password=os.getenv("POSTGRES_PASSWORD", "postgres"),
Expand Down
9 changes: 8 additions & 1 deletion dor/domain/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any

from dor.domain.models import VersionInfo
from dor.providers.models import PackageResource


@dataclass
Expand Down Expand Up @@ -38,7 +39,7 @@ class PackageNotVerified(Event):
@dataclass
class PackageUnpacked(Event):
identifier: str
resources: list[Any]
resources: list[PackageResource]
tracking_identifier: str
version_info: VersionInfo
workspace_identifier: str
Expand All @@ -49,3 +50,9 @@ class PackageUnpacked(Event):
class PackageStored(Event):
identifier: str
tracking_identifier: str
resources: list[PackageResource]

@dataclass
class BinCataloged(Event):
identifier: str
tracking_identifier: str
32 changes: 32 additions & 0 deletions dor/service_layer/handlers/catalog_bin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import json
from pathlib import Path

from dor.domain.events import PackageStored, BinCataloged
from dor.domain.models import Bin
from dor.service_layer.unit_of_work import AbstractUnitOfWork

def catalog_bin(event: PackageStored, uow: AbstractUnitOfWork) -> None:
root_resource = [resource for resource in event.resources if resource.type == 'Monograph'][0]
common_metadata_file = [
metadata_file for metadata_file in root_resource.metadata_files if "common" in metadata_file.ref.locref
][0]
common_metadata_file_path = Path(common_metadata_file.ref.locref)
object_files = uow.gateway.get_object_files(event.identifier)
matching_object_file = [
object_file for object_file in object_files if common_metadata_file_path == object_file.logical_path
][0]
literal_common_metadata_path = matching_object_file.literal_path
common_metadata = json.loads(literal_common_metadata_path.read_text())

bin = Bin(
identifier=event.identifier,
alternate_identifiers=[root_resource.alternate_identifier.id],
common_metadata=common_metadata,
package_resources=event.resources
)
with uow:
uow.catalog.add(bin)
uow.commit()

uow.add_event(BinCataloged(identifier=event.identifier, tracking_identifier=event.tracking_identifier))

4 changes: 2 additions & 2 deletions dor/service_layer/handlers/receive_package.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Any

from dor.domain.events import PackageSubmitted, PackageReceived
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork

def receive_package(event: PackageSubmitted, uow: UnitOfWork, translocator: Any) -> None:
def receive_package(event: PackageSubmitted, uow: AbstractUnitOfWork, translocator: Any) -> None:
workspace = translocator.create_workspace_for_package(event.package_identifier)

received_event = PackageReceived(
Expand Down
8 changes: 5 additions & 3 deletions dor/service_layer/handlers/store_files.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
from dor.domain.events import PackageStored, PackageUnpacked
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork


def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type) -> None:
def store_files(event: PackageUnpacked, uow: AbstractUnitOfWork, workspace_class: type) -> None:
workspace = workspace_class(event.workspace_identifier, event.identifier)

entries: list[Path] = []
Expand All @@ -24,6 +24,8 @@ def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type)
)

stored_event = PackageStored(
identifier=event.identifier, tracking_identifier=event.tracking_identifier
identifier=event.identifier,
tracking_identifier=event.tracking_identifier,
resources=event.resources
)
uow.add_event(stored_event)
4 changes: 2 additions & 2 deletions dor/service_layer/handlers/unpack_package.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from dor.domain.events import PackageUnpacked, PackageVerified
from dor.domain.models import VersionInfo
from dor.providers.file_provider import FileProvider
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork
from gateway.coordinator import Coordinator


def unpack_package(
event: PackageVerified,
uow: UnitOfWork,
uow: AbstractUnitOfWork,
bag_adapter_class: type,
package_resource_provider_class: type,
workspace_class: type,
Expand Down
5 changes: 2 additions & 3 deletions dor/service_layer/handlers/verify_package.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from dor.adapters.bag_adapter import ValidationError
from dor.domain.events import PackageNotVerified, PackageReceived, PackageVerified
from dor.providers.file_provider import FileProvider
from dor.service_layer.unit_of_work import UnitOfWork

from dor.service_layer.unit_of_work import AbstractUnitOfWork

def verify_package(
event: PackageReceived, uow: UnitOfWork, bag_adapter_class: type, workspace_class: type, file_provider: FileProvider
event: PackageReceived, uow: AbstractUnitOfWork, bag_adapter_class: type, workspace_class: type, file_provider: FileProvider
) -> None:
workspace = workspace_class(event.workspace_identifier)

Expand Down
6 changes: 3 additions & 3 deletions dor/service_layer/message_bus/memory_message_bus.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Callable, Type
from dor.domain.events import Event
from dor.service_layer.unit_of_work import UnitOfWork
from dor.service_layer.unit_of_work import AbstractUnitOfWork


class MemoryMessageBus:
Expand All @@ -13,14 +13,14 @@ def register_event_handler(self, event_type: Type[Event], handler: Callable):
self.event_handlers[event_type] = []
self.event_handlers[event_type].append(handler)

def handle(self, message, uow: UnitOfWork):
def handle(self, message, uow: AbstractUnitOfWork):
# Handles a message, which must be an event.
if isinstance(message, Event):
self._handle_event(message, uow)
else:
raise ValueError(f"Message of type {type(message)} is not a valid Event")

def _handle_event(self, event: Event, uow: UnitOfWork):
def _handle_event(self, event: Event, uow: AbstractUnitOfWork):
# Handles an event by executing its registered handlers.
if event.__class__ not in self.event_handlers:
raise NoHandlerForEventError(f"No handler found for event type {type(event)}")
Expand Down
4 changes: 3 additions & 1 deletion dor/service_layer/unit_of_work.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from dor.adapters.catalog import MemoryCatalog, SqlalchemyCatalog, _custom_json_serializer
from dor.adapters.catalog import Catalog, MemoryCatalog, SqlalchemyCatalog, _custom_json_serializer
from dor.config import config
from dor.domain.events import Event
from gateway.repository_gateway import RepositoryGateway


class AbstractUnitOfWork(ABC):
catalog: Catalog
gateway: RepositoryGateway

@abstractmethod
def __enter__(self):
Expand Down
4 changes: 4 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DATABASE=dor_local
POSTGRES_HOST=db

STORAGE_PATH=
INBOX_PATH=
WORKSPACES_PATH=
Loading

0 comments on commit 0c148e4

Please sign in to comment.