Skip to content

Commit

Permalink
Merge pull request #13861 from jmchilton/model_store_typing
Browse files Browse the repository at this point in the history
Improved typing of Galaxy model store related stuff.
  • Loading branch information
jmchilton authored May 5, 2022
2 parents e54147b + b84aec9 commit d12f7c8
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 47 deletions.
4 changes: 2 additions & 2 deletions lib/galaxy/job_execution/output_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def get_hdca(self, object_id):
for collection_dataset in hdca.dataset_instances:
include_files = True
self.export_store.add_dataset(collection_dataset, include_files=include_files)
self.export_store.collection_datasets[collection_dataset.id] = True
self.export_store.collection_datasets.add(collection_dataset.id)

return hdca

Expand All @@ -452,7 +452,7 @@ def add_dataset_collection(self, collection):
for collection_dataset in collection.dataset_instances:
include_files = True
self.export_store.add_dataset(collection_dataset, include_files=include_files)
self.export_store.collection_datasets[collection_dataset.id] = True
self.export_store.collection_datasets.add(collection_dataset.id)

def add_output_dataset_association(self, name, dataset_instance):
self.export_store.add_job_output_dataset_associations(self.get_job_id(), name, dataset_instance)
Expand Down
12 changes: 11 additions & 1 deletion lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.orm.decl_api import DeclarativeMeta
from sqlalchemy.sql import exists
from typing_extensions import Protocol

import galaxy.exceptions
import galaxy.model.metadata
Expand Down Expand Up @@ -279,9 +280,18 @@ def auto_propagated_tags(self):
return [t for t in self.tags if t.user_tname in AUTO_PROPAGATED_TAGS]


class SerializeFilesHandler(Protocol):
def serialize_files(self, dataset: "DatasetInstance", as_dict: Dict[str, Any]) -> None:
pass


class SerializationOptions:
def __init__(
self, for_edit, serialize_dataset_objects=None, serialize_files_handler=None, strip_metadata_files=None
self,
for_edit,
serialize_dataset_objects=None,
serialize_files_handler: Optional[SerializeFilesHandler] = None,
strip_metadata_files=None,
):
self.for_edit = for_edit
if serialize_dataset_objects is None:
Expand Down
112 changes: 81 additions & 31 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,35 @@
)
from typing import (
Any,
Callable,
cast,
Dict,
List,
Optional,
Set,
Tuple,
Union,
)

from bdbag import bdbag_api as bdb
from boltons.iterutils import remap
from sqlalchemy.orm import joinedload
from sqlalchemy.sql import expression
from typing_extensions import Protocol

from galaxy.datatypes.registry import Registry
from galaxy.exceptions import (
MalformedContents,
ObjectNotFound,
)
from galaxy.model.mapping import GalaxyModelMapping
from galaxy.model.metadata import MetadataCollection
from galaxy.model.orm.util import (
add_object_to_object_session,
add_object_to_session,
get_object_session,
)
from galaxy.objectstore import ObjectStore
from galaxy.security.idencoding import IdEncodingHelper
from galaxy.util import (
FILENAME_VALID_CHARS,
Expand All @@ -58,10 +65,23 @@
ATTRS_FILENAME_COLLECTIONS = "collections_attrs.txt"
ATTRS_FILENAME_EXPORT = "export_attrs.txt"
ATTRS_FILENAME_LIBRARIES = "libraries_attrs.txt"
ATTRS_FILENAME_LIBRARY_FOLDERS = "library_folders_attrs.txt"
TRACEBACK = "traceback.txt"
GALAXY_EXPORT_VERSION = "2"


JsonDictT = Dict[str, Any]


class StoreAppProtocol(Protocol):
"""Define the parts of a Galaxy-like app consumed by model store."""

datatypes_registry: Registry
object_store: ObjectStore
security: IdEncodingHelper
model: GalaxyModelMapping


class ImportOptions:
def __init__(self, allow_edit=False, allow_library_creation=False, allow_dataset_object_edit=None):
self.allow_edit = allow_edit
Expand Down Expand Up @@ -104,7 +124,14 @@ def remap_objects(p, k, obj):


class ModelImportStore(metaclass=abc.ABCMeta):
def __init__(self, import_options=None, app=None, user=None, object_store=None, tag_handler=None):
def __init__(
self,
import_options=None,
app: Optional[StoreAppProtocol] = None,
user=None,
object_store=None,
tag_handler=None,
):
if object_store is None:
if app is not None:
object_store = app.object_store
Expand Down Expand Up @@ -949,6 +976,11 @@ def library_properties(self):
libraries_attrs = load(open(libraries_attrs_file_name))
else:
libraries_attrs = []

library_folder_attrs_file_name = os.path.join(self.archive_dir, ATTRS_FILENAME_LIBRARY_FOLDERS)
if os.path.exists(library_folder_attrs_file_name):
libraries_attrs += load(open(library_folder_attrs_file_name))

return libraries_attrs

def jobs_properties(self):
Expand Down Expand Up @@ -1179,15 +1211,17 @@ def __exit__(self, exc_type, exc_val, exc_tb):


class DirectoryModelExportStore(ModelExportStore):
app: Optional[StoreAppProtocol]

def __init__(
self,
export_directory,
app=None,
for_edit=False,
export_directory: str,
app: Optional[StoreAppProtocol] = None,
for_edit: bool = False,
serialize_dataset_objects=None,
export_files=None,
strip_metadata_files=True,
serialize_jobs=True,
export_files: Optional[str] = None,
strip_metadata_files: bool = True,
serialize_jobs: bool = True,
):
"""
:param export_directory: path to export directory. Will be created if it does not exist.
Expand Down Expand Up @@ -1222,20 +1256,22 @@ def __init__(
serialize_files_handler=self,
)
self.export_files = export_files
self.included_datasets = {}
self.included_collections = []
self.included_libraries = []
self.included_library_folders = []
self.collection_datasets = {}
self.collections_attrs = []
self.dataset_id_to_path = {}
self.included_datasets: Dict[model.DatasetInstance, Tuple[model.DatasetInstance, bool]] = {}
self.included_collections: List[Union[model.DatasetCollection, model.HistoryDatasetCollectionAssociation]] = []
self.included_libraries: List[model.Library] = []
self.included_library_folders: List[model.LibraryFolder] = []
self.collection_datasets: Set[int] = set()
self.collections_attrs: List[Union[model.DatasetCollection, model.HistoryDatasetCollectionAssociation]] = []
self.dataset_id_to_path: Dict[int, Tuple[Optional[str], Optional[str]]] = {}

self.job_output_dataset_associations = {}
self.job_output_dataset_associations: Dict[int, Dict[str, model.DatasetInstance]] = {}

def serialize_files(self, dataset, as_dict):
def serialize_files(self, dataset: model.DatasetInstance, as_dict: JsonDictT) -> None:
if self.export_files is None:
return None
elif self.export_files == "symlink":

add: Callable[[str, str], None]
if self.export_files == "symlink":
add = os.symlink
elif self.export_files == "copy":

Expand Down Expand Up @@ -1435,8 +1471,9 @@ def export_jobs(self, jobs: List[model.Job], jobs_attrs=None, include_job_data=T
jobs_attrs_out.write(json_encoder.encode(jobs_attrs))
return jobs_attrs

def export_history(self, history, include_hidden=False, include_deleted=False):
def export_history(self, history: model.History, include_hidden: bool = False, include_deleted: bool = False):
app = self.app
assert app, "exporting histories requires being bound to a session and Galaxy app object"
export_directory = self.export_directory

history_attrs = history.serialize(app.security, self.serialization_options)
Expand Down Expand Up @@ -1471,14 +1508,14 @@ def export_history(self, history, include_hidden=False, include_deleted=False):
include_files = True

self.add_dataset(collection_dataset, include_files=include_files)
self.collection_datasets[collection_dataset.id] = True
self.collection_datasets.add(collection_dataset.id)

# Write datasets' attributes to file.
query = (
sa_session.query(model.HistoryDatasetAssociation)
.filter(model.HistoryDatasetAssociation.history == history)
.join(model.Dataset)
.options(joinedload(model.HistoryDatasetAssociation.dataset).joinedload(model.Dataset.actions))
.options(joinedload(model.HistoryDatasetAssociation.dataset).joinedload(model.Dataset.actions)) # type: ignore[attr-defined]
.order_by(model.HistoryDatasetAssociation.hid)
.filter(model.Dataset.purged == expression.false())
)
Expand All @@ -1492,21 +1529,30 @@ def export_history(self, history, include_hidden=False, include_deleted=False):
if dataset not in self.included_datasets:
self.add_dataset(dataset, include_files=add_dataset)

def export_library(self, library, include_hidden=False, include_deleted=False):
def export_library(self, library: model.Library, include_hidden=False, include_deleted=False):
self.included_libraries.append(library)
root_folder = getattr(library, "root_folder", library)
self.included_library_folders.append(root_folder)
self.export_library_folder(root_folder, include_hidden=include_hidden, include_deleted=include_deleted)
root_folder = library.root_folder
self.export_library_folder_contents(root_folder, include_hidden=include_hidden, include_deleted=include_deleted)

def export_library_folder(self, library_folder: model.LibraryFolder, include_hidden=False, include_deleted=False):
self.included_library_folders.append(library_folder)
self.export_library_folder_contents(
library_folder, include_hidden=include_hidden, include_deleted=include_deleted
)

def export_library_folder(self, library_folder, include_hidden=False, include_deleted=False):
def export_library_folder_contents(
self, library_folder: model.LibraryFolder, include_hidden=False, include_deleted=False
):
for library_dataset in library_folder.datasets:
ldda = library_dataset.library_dataset_dataset_association
add_dataset = (not ldda.visible or not include_hidden) and (not ldda.deleted or include_deleted)
self.add_dataset(ldda, add_dataset)
for folder in library_folder.folders:
self.export_library_folder(folder, include_hidden=include_hidden, include_deleted=include_deleted)
self.export_library_folder_contents(folder, include_hidden=include_hidden, include_deleted=include_deleted)

def add_job_output_dataset_associations(self, job_id, name, dataset_instance):
def add_job_output_dataset_associations(
self, job_id: int, name: str, dataset_instance: model.DatasetInstance
) -> None:
job_output_dataset_associations = self.job_output_dataset_associations
if job_id not in job_output_dataset_associations:
job_output_dataset_associations[job_id] = {}
Expand All @@ -1516,7 +1562,7 @@ def export_collection(
self,
collection: Union[model.DatasetCollection, model.HistoryDatasetCollectionAssociation],
include_deleted: bool = False,
):
) -> None:
self.add_dataset_collection(collection)

# export datasets for this collection
Expand All @@ -1530,15 +1576,15 @@ def export_collection(
include_files = True

self.add_dataset(collection_dataset, include_files=include_files)
self.collection_datasets[collection_dataset.id] = True
self.collection_datasets.add(collection_dataset.id)

def add_dataset_collection(
self, collection: Union[model.DatasetCollection, model.HistoryDatasetCollectionAssociation]
):
) -> None:
self.collections_attrs.append(collection)
self.included_collections.append(collection)

def add_dataset(self, dataset: model.DatasetInstance, include_files: bool = True):
def add_dataset(self, dataset: model.DatasetInstance, include_files: bool = True) -> None:
self.included_datasets[dataset] = (dataset, include_files)

def _finalize(self):
Expand Down Expand Up @@ -1566,6 +1612,10 @@ def to_json(attributes):
with open(libraries_attrs_filename, "w") as libraries_attrs_out:
libraries_attrs_out.write(to_json(self.included_libraries))

library_folders_attrs_filename = os.path.join(export_directory, ATTRS_FILENAME_LIBRARY_FOLDERS)
with open(library_folders_attrs_filename, "w") as library_folder_attrs_out:
library_folder_attrs_out.write(to_json(self.included_library_folders))

collections_attrs_filename = os.path.join(export_directory, ATTRS_FILENAME_COLLECTIONS)
with open(collections_attrs_filename, "w") as collections_attrs_out:
collections_attrs_out.write(to_json(self.collections_attrs))
Expand Down
15 changes: 4 additions & 11 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
List,
NamedTuple,
Optional,
TYPE_CHECKING,
Union,
)

Expand All @@ -33,13 +32,6 @@
)
from galaxy.util.hash_util import HASH_NAME_MAP

if TYPE_CHECKING:
from galaxy.job_execution.output_collect import (
JobContext,
SessionlessJobContext,
)


log = logging.getLogger(__name__)

UNSET = object()
Expand All @@ -60,6 +52,7 @@ class ModelPersistenceContext(metaclass=abc.ABCMeta):
required for datasets and other potential model objects.
"""

job_working_directory: str # TODO: rename
max_discovered_files = float("inf")
discovered_file_count: int

Expand Down Expand Up @@ -602,7 +595,7 @@ def create_library_folder(self, parent_folder, name, description):
return nested_folder

def persist_library_folder(self, library_folder):
self.export_store.export_library(library_folder)
self.export_store.export_library_folder(library_folder)

def add_datasets_to_history(self, datasets, for_output_dataset=None):
# Consider copying these datasets to for_output_dataset copied histories
Expand Down Expand Up @@ -690,7 +683,7 @@ def persist_target_to_export_store(target_dict, export_store, object_store, work


def persist_elements_to_hdca(
model_persistence_context: Union["JobContext", "SessionlessJobContext", SessionlessModelPersistenceContext],
model_persistence_context: ModelPersistenceContext,
elements,
hdca,
collector=None,
Expand Down Expand Up @@ -900,7 +893,7 @@ class DiscoveredResultState(NamedTuple):

def discovered_file_for_element(
dataset,
model_persistence_context: Union["JobContext", "SessionlessJobContext", SessionlessModelPersistenceContext],
model_persistence_context: ModelPersistenceContext,
parent_identifiers=None,
collector=None,
) -> DiscoveredResult:
Expand Down
Loading

0 comments on commit d12f7c8

Please sign in to comment.