From 40c7be64d97da02ec6b0077a0f5953adeca4f29c Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Mon, 22 Jul 2024 10:24:38 -0400 Subject: [PATCH] NickAkhmetov/CAT-775 - EPIC builders boilerplate, migrate API client (#91) --- VERSION.txt | 2 +- setup.cfg | 5 +- src/portal_visualization/builder_factory.py | 57 +- .../builders/epic_builders.py | 59 ++ .../builders/imaging_builders.py | 1 + src/portal_visualization/client.py | 610 ++++++++++++++++++ src/portal_visualization/epic_factory.py | 7 + src/portal_visualization/mock_client.py | 59 ++ src/portal_visualization/utils.py | 23 + test/test_builders.py | 191 +++--- test/test_client.py | 360 +++++++++++ test/test_epic_builders.py | 12 + 12 files changed, 1287 insertions(+), 99 deletions(-) create mode 100644 src/portal_visualization/builders/epic_builders.py create mode 100644 src/portal_visualization/client.py create mode 100644 src/portal_visualization/epic_factory.py create mode 100644 src/portal_visualization/mock_client.py create mode 100644 test/test_client.py create mode 100644 test/test_epic_builders.py diff --git a/VERSION.txt b/VERSION.txt index 3a4036f..53a75d6 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -0.2.5 +0.2.6 diff --git a/setup.cfg b/setup.cfg index 984c539..dbc1a3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,13 +20,14 @@ package_dir = packages = find: python_requires = >=3.7 install_requires = - vitessce>=3.2.6 - hubmap-commons>=2.0.12 + vitessce>=3.2.8 + hubmap-commons>=2.0.15 requests>=2.27.1 nbformat==5.1.3 zarr>=2.17.2 aiohttp>=3.8.1 fsspec>=2022.1.0 + python-datauri>=2.2.0 [options.packages.find] where = src diff --git a/src/portal_visualization/builder_factory.py b/src/portal_visualization/builder_factory.py index 40098f7..f67788b 100644 --- a/src/portal_visualization/builder_factory.py +++ b/src/portal_visualization/builder_factory.py @@ -1,29 +1,23 @@ from .builders.base_builders import NullViewConfBuilder from .builders.sprm_builders import ( - StitchedCytokitSPRMViewConfBuilder, TiledSPRMViewConfBuilder, - MultiImageSPRMAnndataViewConfBuilder + StitchedCytokitSPRMViewConfBuilder, + TiledSPRMViewConfBuilder, + MultiImageSPRMAnndataViewConfBuilder, ) from .builders.imaging_builders import ( SeqFISHViewConfBuilder, IMSViewConfBuilder, ImagePyramidViewConfBuilder, - NanoDESIViewConfBuilder + NanoDESIViewConfBuilder, ) from .builders.anndata_builders import ( MultiomicAnndataZarrViewConfBuilder, SpatialRNASeqAnnDataZarrViewConfBuilder, RNASeqAnnDataZarrViewConfBuilder, - SpatialMultiomicAnnDataZarrViewConfBuilder -) -from .builders.scatterplot_builders import ( - RNASeqViewConfBuilder, ATACSeqViewConfBuilder -) -from .assays import ( - SEQFISH, - MALDI_IMS, - NANODESI, - SALMON_RNASSEQ_SLIDE + SpatialMultiomicAnnDataZarrViewConfBuilder, ) +from .builders.scatterplot_builders import RNASeqViewConfBuilder, ATACSeqViewConfBuilder +from .assays import SEQFISH, MALDI_IMS, NANODESI, SALMON_RNASSEQ_SLIDE # This function processes the hints and returns a tuple of booleans @@ -40,7 +34,17 @@ def process_hints(hints): is_spatial = "spatial" in hints is_support = "is_support" in hints - return is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json, is_spatial, is_support + return ( + is_image, + is_rna, + is_atac, + is_sprm, + is_codex, + is_anndata, + is_json, + is_spatial, + is_support, + ) # This function is the main entrypoint for the builder factory. @@ -50,18 +54,27 @@ def process_hints(hints): # `get_assaytype` is a function which takes an entity UUID and returns # a dict containing the assaytype and vitessce-hints for that entity. def get_view_config_builder(entity, get_assaytype, parent=None): - if (entity.get('uuid') is None): + if entity.get("uuid") is None: raise ValueError("Provided entity does not have a uuid") assay = get_assaytype(entity) - assay_name = assay.get('assaytype') - hints = assay.get('vitessce-hints', []) - is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json, is_spatial, is_support = process_hints( - hints) + assay_name = assay.get("assaytype") + hints = assay.get("vitessce-hints", []) + ( + is_image, + is_rna, + is_atac, + is_sprm, + is_codex, + is_anndata, + is_json, + is_spatial, + is_support, + ) = process_hints(hints) # vis-lifted image pyramids - if (parent is not None): - if (is_support and is_image): - ancestor_assaytype = get_assaytype(parent).get('assaytype') + if parent is not None: + if is_support and is_image: + ancestor_assaytype = get_assaytype(parent).get("assaytype") if SEQFISH == ancestor_assaytype: # e.g. parent = c6a254b2dc2ed46b002500ade163a7cc # e.g. support = 9db61adfc017670a196ea9b3ca1852a0 diff --git a/src/portal_visualization/builders/epic_builders.py b/src/portal_visualization/builders/epic_builders.py new file mode 100644 index 0000000..b86ab75 --- /dev/null +++ b/src/portal_visualization/builders/epic_builders.py @@ -0,0 +1,59 @@ +from abc import ABC, abstractmethod +from vitessce import VitessceConfig +from .base_builders import ConfCells +from ..utils import get_conf_cells + + +# EPIC builders take in a vitessce conf output by a previous builder and modify it +# accordingly to add the EPIC-specific configuration. +class EPICConfBuilder(ABC): + def __init__(self, base_conf: ConfCells, epic_uuid) -> None: + + conf, cells = base_conf + + if conf is None: + raise ValueError("ConfCells object must have a conf attribute") + + self._is_plural = isinstance(conf, list) + + if self._is_plural: + self._base_conf = [ + VitessceConfig.from_dict(conf) for conf in conf + ] + else: + self._base_conf: VitessceConfig = VitessceConfig.from_dict(base_conf.conf) + + self._epic_uuid = epic_uuid + pass + + def get_conf_cells(self): + self.apply() + if (self._is_plural): + return get_conf_cells([conf.to_dict() for conf in self._base_conf]) + return get_conf_cells(self._base_conf) + + def apply(self): + if self._is_plural: + for conf in self._base_conf: + self._apply(conf) + else: + self._apply(self._base_conf) + + @abstractmethod + def _apply(self, conf): # pragma: no cover + pass + + +class SegmentationMaskBuilder(EPICConfBuilder): + + def _apply(self, conf): + datasets = conf.get_datasets() + print(f"Found {len(datasets)} datasets") + # Proof of concept using one of the kaggle segmentation masks for now + # segmentations = ObsSegmentationsOmeTiffWrapper( + # img_url='https://assets.hubmapconsortium.org/c9d9ab5c9ee9642b60dd351024968627/ometiff-pyramids/VAN0042-RK-3-18-registered-PAS-to-postAF-registered.ome_mask.ome.tif?token=AgndN7NVbn83wwDXjpnY1Y0lDoJj2j7zOGmn1WN6qr9pqdkjKmt9C1XYm4KrlWrOXE9rVJvpnEKrPjIXrlKd1hmDGjV', + # # offsets_path=f'./{name}/{name}/offsets/{name}.segmentations.offsets.json', + # obs_types_from_channel_names=True, + # ) + # dataset.add_object(segmentations) + pass diff --git a/src/portal_visualization/builders/imaging_builders.py b/src/portal_visualization/builders/imaging_builders.py index 1aa5bac..e54c7d8 100644 --- a/src/portal_visualization/builders/imaging_builders.py +++ b/src/portal_visualization/builders/imaging_builders.py @@ -76,6 +76,7 @@ def get_conf_cells(self, **kwargs): ) if 'separate/' not in path # Exclude separate/* in MALDI-IMS ] + found_images = sorted(found_images) if len(found_images) == 0: message = f"Image pyramid assay with uuid {self._uuid} has no matching files" raise FileNotFoundError(message) diff --git a/src/portal_visualization/client.py b/src/portal_visualization/client.py new file mode 100644 index 0000000..f74ca99 --- /dev/null +++ b/src/portal_visualization/client.py @@ -0,0 +1,610 @@ +from collections import namedtuple +import traceback +from dataclasses import dataclass + +# Flask is safe to import since hubmap_commons is a dependency +from flask import abort, current_app +import requests +import json +from werkzeug.exceptions import HTTPException + + +from .utils import files_from_response +from .builder_factory import get_view_config_builder +from .epic_factory import get_epic_builder +from .builders.base_builders import ConfCells + +Entity = namedtuple("Entity", ["uuid", "type", "name"], defaults=["TODO: name"]) + + +@dataclass +class VitessceConfLiftedUUID: + vitessce_conf: dict + vis_lifted_uuid: str + + +@dataclass +class PublicationJSONLiftedUUID: + publication_json: dict + vis_lifted_uuid: str + + +def _get_hits(response_json): + """ + The repeated key makes error messages ambiguous. + Split it into separate calls so we can tell which fails. + """ + outer_hits = response_json["hits"] + inner_hits = outer_hits["hits"] + return inner_hits + + +def _handle_request(url, headers=None, body_json=None): + try: + response = ( + requests.post(url, headers=headers, json=body_json) + if body_json + else requests.get(url, headers=headers) + ) + except requests.exceptions.ConnectTimeout as error: # pragma: no cover + current_app.logger.error(error) + abort(504) + try: + response.raise_for_status() + except requests.exceptions.HTTPError as error: # pragma: no cover + current_app.logger.error(error.response.text) + status = error.response.status_code + if status in [400, 404]: + # The same 404 page will be returned, + # whether it's a missing route in portal-ui, + # or a missing entity in the API. + abort(status) + if status in [401]: + # I believe we have 401 errors when the globus credentials + # have expired, but are still in the flask session. + abort(status) + raise + return response + + +class ApiClient: + def __init__( + self, + groups_token=None, + elasticsearch_endpoint=None, + portal_index_path=None, + ubkg_endpoint=None, + assets_endpoint=None, + soft_assay_endpoint=None, + soft_assay_endpoint_path=None, + entity_api_endpoint=None, + ): + self.groups_token = groups_token + self.ubkg_endpoint = ubkg_endpoint + self.assets_endpoint = assets_endpoint + self.entity_api_endpoint = entity_api_endpoint + + self._elasticsearch_endpoint = elasticsearch_endpoint + self._portal_index_path = portal_index_path + + self._soft_assay_endpoint = soft_assay_endpoint + self._soft_assay_endpoint_path = soft_assay_endpoint_path + + self.elasticsearch_url = f"{elasticsearch_endpoint}{portal_index_path}" + self.soft_assay_url = f"{soft_assay_endpoint}/{soft_assay_endpoint_path}" + + def _get_headers(self): + headers = ( + {"Authorization": "Bearer " + self.groups_token} + if self.groups_token + else {} + ) + return headers + + def _clean_headers(self, headers): + if "Authorization" in headers: + headers["Authorization"] = "REDACTED" + return headers + + def _request(self, url, body_json=None): + """ + Makes request to HuBMAP APIs behind API Gateway (Search, Entity, UUID). + """ + headers = self._get_headers() + response = _handle_request(url, headers, body_json) + status = response.status_code + # HuBMAP APIs will redirect to s3 if the response payload over 10 MB. + if status in [303]: + s3_resp = _handle_request(response.content).content + return json.loads(s3_resp) + return response.json() + + def get_all_dataset_uuids(self): + size = 10000 # Default ES limit + query = { + "size": size, + "post_filter": {"term": {"entity_type.keyword": "Dataset"}}, + "_source": ["empty-returns-everything"], + } + response_json = self._request( + self.elasticsearch_url, + body_json=query, + ) + uuids = [hit["_id"] for hit in _get_hits(response_json)] + if len(uuids) == size: + raise Exception("At least 10k datasets: need to make multiple requests") + return uuids + + def get_entities( + self, + plural_lc_entity_type=None, + non_metadata_fields=[], + constraints={}, + uuids=[], + query_override=None, + ): + entity_type = plural_lc_entity_type[:-1].capitalize() + query = { + "size": 10000, # Default ES limit, + "post_filter": {"term": {"entity_type.keyword": entity_type}}, + "query": query_override or _make_query(constraints, uuids), + "_source": { + "include": [*non_metadata_fields, "mapped_metadata", "metadata"], + "exclude": ["*.files"], + }, + } + response_json = self._request(self.elasticsearch_url, body_json=query) + sources = [hit["_source"] for hit in _get_hits(response_json)] + total_hits = response_json["hits"]["total"]["value"] + if len(sources) < total_hits: + raise Exception("Incomplete results: need to make multiple requests") + flat_sources = _flatten_sources(sources, non_metadata_fields) + filled_flat_sources = _fill_sources(flat_sources) + return filled_flat_sources + + def get_entity(self, uuid=None, hbm_id=None): + if uuid is not None and hbm_id is not None: + raise Exception("Only UUID or HBM ID should be provided, not both") + query = { + "query": + # ES guarantees that _id is unique, so this is best: + ( + {"ids": {"values": [uuid]}} + if uuid + else {"match": {"hubmap_id.keyword": hbm_id}} + ) + # With default mapping, without ".keyword", it splits into tokens, + # and we get multiple substring matches, instead of unique match. + } + + response_json = self._request(self.elasticsearch_url, body_json=query) + + hits = _get_hits(response_json) + return _get_entity_from_hits( + hits, has_token=self.groups_token, uuid=uuid, hbm_id=hbm_id + ) + + def get_latest_entity_uuid(self, uuid, type): + lowercase_type = type.lower() + route = f"/{lowercase_type}s/{uuid}/revisions" + response_json = self._request(self.entity_api_endpoint + route) + return _get_latest_uuid(response_json) + + def get_files(self, uuids): + query = { + "size": 10000, + "query": {"bool": {"must": [{"ids": {"values": uuids}}]}}, + "_source": ["files.rel_path"], + } + response_json = self._request(self.elasticsearch_url, body_json=query) + return files_from_response(response_json) + + def get_vitessce_conf_cells_and_lifted_uuid( + self, entity, marker=None, wrap_error=True, parent=None, epic_uuid=None + ): + """ + Returns a dataclass with vitessce_conf and is_lifted. + """ + vis_lifted_uuid = None # default, only gets set if there is a vis-lifted entity + image_pyramid_descendants = self.get_descendant_to_lift(entity["uuid"]) + + # First, try "vis-lifting": Display image pyramids on their parent entity pages. + # Historical context: the visualization requires pyramidal ome tiff images, which + # are generated via an additional pipeline. Since we are displaying the visualization + # on the primary dataset page, we need to "lift" the visualization to the parent entity. + if image_pyramid_descendants: + derived_entity = image_pyramid_descendants + # TODO: Entity structure will change in the future to be consistent + # about "files". Bill confirms that when the new structure comes in + # there will be a period of backward compatibility to allow us to migrate. + + metadata = derived_entity.get("metadata", {}) + + if metadata.get( + "files" + ): # pragma: no cover # We have separate tests for the builder logic + derived_entity["files"] = metadata.get("files", []) + vitessce_conf = self.get_vitessce_conf_cells_and_lifted_uuid( + derived_entity, marker=marker, wrap_error=wrap_error, parent=entity + ).vitessce_conf + vis_lifted_uuid = derived_entity["uuid"] + else: # no files + error = ( + f'Related image entity {derived_entity["uuid"]} ' + + 'is missing file information (no "files" key found in its metadata).' + ) + current_app.logger.info( + f'Missing metadata error encountered in dataset {entity["uuid"]}: {error}' + ) + vitessce_conf = _create_vitessce_error(error) + # If the current entity does not have files and was not determined to have a + # visualization during search API indexing, stop here and return an empty conf. + elif not entity.get("files") and not entity.get("visualization"): + vitessce_conf = ConfCells(None, None) + + # Otherwise, just try to visualize the data for the entity itself: + else: # pragma: no cover # We have separate tests for the builder logic + try: + Builder = get_view_config_builder(entity, self._get_assaytype(), parent) + builder = Builder(entity, self.groups_token, self.assets_endpoint) + vitessce_conf = builder.get_conf_cells(marker=marker) + except Exception as e: + if not wrap_error: + raise e + current_app.logger.error( + f"Building vitessce conf threw error: {traceback.format_exc()}" + ) + vitessce_conf = _create_vitessce_error(str(e)) + + if ( + epic_uuid is not None and vitessce_conf.conf is not None + ): # pragma: no cover # TODO + EPICBuilder = get_epic_builder(epic_uuid) + vitessce_conf = EPICBuilder(vitessce_conf).get_conf_cells() + + return VitessceConfLiftedUUID( + vitessce_conf=vitessce_conf, vis_lifted_uuid=vis_lifted_uuid + ) + + # Helper to create a function that fetches assaytype from the API with current headers + def _get_assaytype(self): # pragma: no cover + def get_assaytype(entity): + uuid = entity.get("uuid") + + url = f"{self.soft_assay_url}/{uuid}" + headers = self._get_headers() + try: + response = requests.get(url, headers=headers) + return response.json() + except Exception as e: + # Redact Authorization header from logs + cleaned_headers = self._clean_headers(headers) + if response: + status = response.status_code + else: + status = None + current_app.logger.error( + { + "source": "get_assaytype", + "url": url, + "headers": cleaned_headers, + "status": status, + "error": str(e), + } + ) + current_app.logger.error( + f"Fetching assaytype threw error: {traceback.format_exc()}" + ) + raise e + + return get_assaytype + + def _file_request(self, url): + headers = ( + {"Authorization": "Bearer " + self.groups_token} + if self.groups_token + else {} + ) + + if self.groups_token: + url += f"?token={self.groups_token}" + + return _handle_request(url, headers).text + + def get_descendant_to_lift(self, uuid, is_publication=False): + """ + Given the data type of the descendant and a uuid, + returns the doc of the most recent descendant + that is in QA or Published status. + """ + + hints = [{"term": {"vitessce-hints": "is_support"}}] + if not is_publication: + hints.append({"term": {"vitessce-hints": "is_image"}}) + + query = { + "query": { + "bool": { + "must": [ + *hints, + {"term": {"ancestor_ids": uuid}}, + {"terms": {"mapped_status.keyword": ["QA", "Published"]}}, + ] + } + }, + "sort": [{"last_modified_timestamp": {"order": "desc"}}], + "size": 1, + } + response_json = self._request( + self.elasticsearch_url, + body_json=query, + ) + + try: + hits = _get_hits(response_json) + source = hits[0]["_source"] + except IndexError: + source = None + return source + + # Helper function for HuBMAP publications + # Returns the publication ancillary json and the vis-lifted uuid + # from the publication support entity + def get_publication_ancillary_json(self, entity): + """ + Returns a dataclass with vitessce_conf and is_lifted. + """ + publication_json = {} + publication_ancillary_uuid = None + publication_ancillary_descendant = self.get_descendant_to_lift( + entity["uuid"], is_publication=True + ) + if publication_ancillary_descendant: + publication_ancillary_uuid = publication_ancillary_descendant["uuid"] + publication_json_path = ( + f"{self.assets_endpoint}/" + f"{publication_ancillary_uuid}/publication_ancillary.json" + ) + try: + publication_resp = self._file_request(publication_json_path) + publication_json = json.loads(publication_resp) + except HTTPException: # pragma: no cover + current_app.logger.error( + f"Fetching publication ancillary json threw error: {traceback.format_exc()}" + ) + + return PublicationJSONLiftedUUID( + publication_json=publication_json, + vis_lifted_uuid=publication_ancillary_uuid, + ) + + # UBKG API methods + + # Helper for making requests to the UBKG API + def _get_ubkg(self, path): + return self._request(f"{self.ubkg_endpoint}/{path}") + + # Retrieves field descriptions from the UBKG API + def get_metadata_descriptions(self): + return self._get_ubkg("field-descriptions") + + +def _make_query(constraints, uuids): + """ + Given a constraints dict of lists, + return a ES query that handles all structual variations. + Repeated values for a single key are OR; + Separate keys are AND. + + >>> constraints = {'color': ['red', 'green'], 'number': ['42']} + >>> uuids = ['abc', '123'] + >>> query = _make_query(constraints, uuids) + >>> from pprint import pp + >>> pp(query['bool']) + {'must': [{'bool': {'should': [{'term': {'metadata.metadata.color.keyword': 'red'}}, + {'term': {'mapped_metadata.color.keyword': 'red'}}, + {'term': {'metadata.metadata.color.keyword': 'green'}}, + {'term': {'mapped_metadata.color.keyword': 'green'}}]}}, + {'bool': {'should': [{'term': {'metadata.metadata.number.keyword': '42'}}, + {'term': {'mapped_metadata.number.keyword': '42'}}]}}, + {'ids': {'values': ['abc', '123']}}]} + """ + shoulds = [ + [ + {"term": {f"{root}.{k}.keyword": v}} + for v in v_list + for root in ["metadata.metadata", "mapped_metadata"] + ] + for k, v_list in constraints.items() + ] + musts = [{"bool": {"should": should}} for should in shoulds] + if uuids: + musts.append({"ids": {"values": uuids}}) + query = {"bool": {"must": musts}} + + return query + + +def _get_nested(path, nested): + """ + >>> path = 'a.b.c' + >>> nested = {'a': {'b': {'c': 123}}} + + >>> _get_nested(path, {}) is None + True + >>> _get_nested(path, nested) + 123 + """ + tokens = path.split(".") + for t in tokens: + nested = nested.get(t, {}) + return nested or None + + +def _flatten_sources(sources, non_metadata_fields): + """ + >>> from pprint import pp + >>> donor_sources = [ + ... {'uuid': 'abcd1234', 'name': 'Ann', + ... 'other': 'skipped', + ... 'mapped_metadata': {'age': [40], 'weight': [150]} + ... }, + ... {'uuid': 'wxyz1234', 'name': 'Bob', + ... 'donor': {'hubmap_id': 'HBM1234.ABCD.7890'}, + ... 'mapped_metadata': {'age': [50], 'multi': ['A', 'B', 'C']} + ... }] + >>> pp(_flatten_sources(donor_sources, ['uuid', 'name', 'donor.hubmap_id'])) + [{'uuid': 'abcd1234', + 'name': 'Ann', + 'donor.hubmap_id': None, + 'age': '40', + 'weight': '150'}, + {'uuid': 'wxyz1234', + 'name': 'Bob', + 'donor.hubmap_id': 'HBM1234.ABCD.7890', + 'age': '50', + 'multi': 'A, B, C'}] + + >>> sample_sources = [ + ... {'uuid': 'abcd1234', + ... 'metadata': {'organ': 'belly button', + ... 'organ_donor_data': {'example': 'Should remove!'}, + ... 'metadata': {'example': 'Should remove!'}} + ... }] + >>> pp(_flatten_sources(sample_sources, ['uuid', 'name'])) + [{'uuid': 'abcd1234', 'name': None, 'organ': 'belly button'}] + """ + flat_sources = [ + { + **{field: _get_nested(field, source) for field in non_metadata_fields}, + # This gets sample and donor metadata. + **source.get("metadata", {}), + # This gets donor metadata, and concatenates nested lists. + **{ + k: ", ".join(str(s) for s in v) + for (k, v) in source.get("mapped_metadata", {}).items() + }, + } + for source in sources + ] + for source in flat_sources: + if "assay_type" in source.get("metadata", {}): + # For donors, this is the metadata in EAV form, + # for samples, this is a placeholder for dev-search, + # but for datasets, we want to move it up a level. + source.update(source["metadata"]) # pragma: no cover + + for field in [ + "metadata", + # From datasets JSON: + "dag_provenance_list", + "extra_metadata", + "files_info_alt_path", + # Dataset TSV columns to hide: + "antibodies_path", + "contributors_path", + "version", + # From samples: + "organ_donor_data", + "living_donor_data", + ]: + source.pop(field, None) # pragma: no cover + return flat_sources + + +def _fill_sources(sources): + """ + Lineup infers columns from first row. + Just to be safe, fill in all keys for all rows. + + >>> sources = [{'a': 1}, {'b': 2}, {}] + >>> from pprint import pp + >>> pp(_fill_sources(sources), width=30, sort_dicts=True) + [{'a': 1, 'b': ''}, + {'a': '', 'b': 2}, + {'a': '', 'b': ''}] + """ + all_keys = set().union(*(source.keys() for source in sources)) + for source in sources: + for missing_key in all_keys - source.keys(): + source[missing_key] = "" + return sources + + +def _get_entity_from_hits(hits, has_token=None, uuid=None, hbm_id=None): + """ + >>> _get_entity_from_hits(['fake-hit-1', 'fake-hit-2']) + Traceback (most recent call last): + ... + Exception: ID not unique; got 2 matches + + >>> def error(f): + ... try: f() + ... except Exception as e: print(type(e).__name__) + + >>> error(lambda: _get_entity_from_hits([], hbm_id='HBM123.XYZ.456')) + Forbidden + + >>> error(lambda: _get_entity_from_hits([], uuid='0123456789abcdef0123456789abcdef')) + Forbidden + + >>> error(lambda: _get_entity_from_hits([], uuid='0123456789abcdef0123456789abcdef', + ... has_token=True)) + NotFound + + >>> error(lambda: _get_entity_from_hits([], uuid='too-short')) + NotFound + + >>> _get_entity_from_hits([{'_source': 'fake-entity'}]) + 'fake-entity' + + """ + if len(hits) == 0: + if (uuid and len(uuid) == 32 or hbm_id) and not has_token: + # Assume that the UUID is not yet published: + # UI will suggest logging in. + abort(403) + abort(404) + if len(hits) > 1: + raise Exception(f"ID not unique; got {len(hits)} matches") + entity = hits[0]["_source"] + return entity + + +def _get_latest_uuid(revisions): + """ + >>> revisions = [{'a_uuid': 'x', 'revision_number': 1}, {'a_uuid': 'z', 'revision_number': 10}] + >>> _get_latest_uuid(revisions) + 'z' + """ + clean_revisions = [ + {("uuid" if k.endswith("_uuid") else k): v for k, v in revision.items()} + for revision in revisions + ] + return max(clean_revisions, key=lambda revision: revision["revision_number"])[ + "uuid" + ] + + +def _create_vitessce_error(error): + return ConfCells( + { + "name": "Error", + "version": "1.0.4", + "datasets": [], + "initStrategy": "none", + "layout": [ + { + "component": "description", + "props": { + "description": f"Error while generating the Vitessce configuration: {error}" + }, + "x": 0, + "y": 0, + "w": 12, + "h": 1, + } + ], + }, + None, + ) diff --git a/src/portal_visualization/epic_factory.py b/src/portal_visualization/epic_factory.py new file mode 100644 index 0000000..d3f339d --- /dev/null +++ b/src/portal_visualization/epic_factory.py @@ -0,0 +1,7 @@ +from portal_visualization.builders.epic_builders import SegmentationMaskBuilder + + +# This function will determine which builder to use for the given entity. +# Since we only have one builder for EPICs right now, we can just return it. +def get_epic_builder(epic_uuid): + return SegmentationMaskBuilder diff --git a/src/portal_visualization/mock_client.py b/src/portal_visualization/mock_client.py new file mode 100644 index 0000000..e52e812 --- /dev/null +++ b/src/portal_visualization/mock_client.py @@ -0,0 +1,59 @@ +import json + +from datauri import DataURI + +from .builders.base_builders import ConfCells +from .client import ApiClient + + +class MockApiClient(ApiClient): # pragma no cover + def get_entity(self, uuid=None, hbm_id=None): + return { + 'created': '2020-01-01 00:00:00', + 'modified': '2020-01-01 00:00:00', + 'provenance_user_displayname': 'Chuck McCallum', + 'provenance_user_email': 'mccalluc@example.com', + 'provenance_group_name': 'Mock Group', + 'hubmap_id': 'abcd-1234', + 'description': 'Mock Entity' + } + + def get_vitessce_conf_cells(self, entity): + return ConfCells(_get_mock_vitessce_conf(), None) + + +def _get_mock_vitessce_conf(): # pragma no cover + cellsData = json.dumps({'cell-id-1': {'mappings': {'t-SNE': [1, 1]}}}) + cellsUri = DataURI.make( + 'text/plain', charset='us-ascii', base64=True, data=cellsData + ) + token = 'fake-token' + return { + 'description': 'DEMO', + 'layers': [ + { + 'name': 'cells', + 'type': 'CELLS', + 'url': cellsUri, + 'requestInit': { + 'headers': { + 'Authorization': 'Bearer ' + token + } + } + }, + ], + 'name': 'Linnarsson', + 'staticLayout': [ + { + 'component': 'scatterplot', + 'props': { + 'mapping': 'UMAP', + 'view': { + 'zoom': 4, + 'target': [0, 0, 0] + } + }, + 'x': 0, 'y': 0, 'w': 12, 'h': 2 + }, + ] + } diff --git a/src/portal_visualization/utils.py b/src/portal_visualization/utils.py index a8261e9..2b62eed 100644 --- a/src/portal_visualization/utils.py +++ b/src/portal_visualization/utils.py @@ -70,3 +70,26 @@ def _get_cells_from_obj(vc_obj): nbformat.v4.new_code_cell(f'from vitessce import {", ".join(imports)}'), nbformat.v4.new_code_cell(f'conf = {conf_expression}\nconf.widget()'), ] + + +def files_from_response(response_json): + ''' + >>> response_json = {'hits': {'hits': [ + ... { + ... '_id': '1234', + ... '_source': { + ... 'files': [{ + ... 'rel_path': 'abc.txt' + ... }] + ... } + ... } + ... ]}} + >>> files_from_response(response_json) + {'1234': ['abc.txt']} + ''' + hits = response_json['hits']['hits'] + return { + hit['_id']: [ + file['rel_path'] for file in hit['_source'].get('files', []) + ] for hit in hits + } diff --git a/test/test_builders.py b/test/test_builders.py index fff4cca..0842b8c 100644 --- a/test/test_builders.py +++ b/test/test_builders.py @@ -9,15 +9,19 @@ import pytest import zarr -from src.portal_visualization.builder_factory \ - import get_view_config_builder, has_visualization +from src.portal_visualization.epic_factory import get_epic_builder +from src.portal_visualization.builders.base_builders import ConfCells +from src.portal_visualization.builder_factory import ( + get_view_config_builder, + has_visualization, +) def str_presenter(dumper, data): # From https://stackoverflow.com/a/33300001 if len(data.splitlines()) > 1: # check for multiline string - return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') - return dumper.represent_scalar('tag:yaml.org,2002:str', data) + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) yaml.add_representer(str, str_presenter) @@ -28,142 +32,180 @@ class MockResponse: content: str -good_entity_paths = list((Path(__file__).parent / 'good-fixtures').glob("*/*-entity.json")) +good_entity_paths = list( + (Path(__file__).parent / "good-fixtures").glob("*/*-entity.json") +) assert len(good_entity_paths) > 0 -image_pyramids = ["IMSViewConfBuilder", "SeqFISHViewConfBuilder", "NanoDESIViewConfBuilder"] -image_pyramid_paths = [path for path in good_entity_paths if path.parent.name in image_pyramids] +image_pyramids = [ + "IMSViewConfBuilder", + "SeqFISHViewConfBuilder", + "NanoDESIViewConfBuilder", +] +image_pyramid_paths = [ + path for path in good_entity_paths if path.parent.name in image_pyramids +] assert len(image_pyramid_paths) > 0 -bad_entity_paths = list((Path(__file__).parent / 'bad-fixtures').glob("*-entity.json")) +bad_entity_paths = list((Path(__file__).parent / "bad-fixtures").glob("*-entity.json")) assert len(bad_entity_paths) > 0 -assaytypes_path = Path(__file__).parent / 'assaytype-fixtures' +assaytypes_path = Path(__file__).parent / "assaytype-fixtures" assert assaytypes_path.is_dir() -defaults = json.load((Path(__file__).parent.parent / 'src/defaults.json').open()) +defaults = json.load((Path(__file__).parent.parent / "src/defaults.json").open()) default_assaytype = { - 'assaytype': 'Null', - 'vitessce-hints': [], + "assaytype": "Null", + "vitessce-hints": [], } def get_assaytype(entity): - uuid = entity.get('uuid') + uuid = entity.get("uuid") if uuid is None: # pragma: no cover return default_assaytype - assay = json.loads(assaytypes_path.joinpath(f'{uuid}.json').read_text()) + assay = json.loads(assaytypes_path.joinpath(f"{uuid}.json").read_text()) return assay @pytest.mark.parametrize( "has_vis_entity", [ - (False, {'uuid': "2c2179ea741d3bbb47772172a316a2bf"}), - (True, json.loads(Path.read_text(good_entity_paths[0]))) + (False, {"uuid": "2c2179ea741d3bbb47772172a316a2bf"}), + (True, json.loads(Path.read_text(good_entity_paths[0]))), # If the first fixture returns a Null builder this would break. ], - ids=lambda has_vis_entity: f'has_visualization={has_vis_entity[0]}') + ids=lambda has_vis_entity: f"has_visualization={has_vis_entity[0]}", +) def test_has_visualization(has_vis_entity): has_vis, entity = has_vis_entity - assert has_vis == has_visualization(entity, get_assaytype) + parent = entity.get("parent") or None # Only used for image pyramids + assert has_vis == has_visualization(entity, get_assaytype, parent) def mock_zarr_store(entity_path, mocker): # Need to mock zarr.open to yield correct values for different scenarios z = zarr.open() - gene_array = zarr.array(['ENSG00000139618', 'ENSG00000139619', 'ENSG00000139620']) - is_annotated = 'is-annotated' in entity_path.name - is_multiome = 'multiome' in entity_path.name + gene_array = zarr.array(["ENSG00000139618", "ENSG00000139619", "ENSG00000139620"]) + is_annotated = "is-annotated" in entity_path.name + is_multiome = "multiome" in entity_path.name if is_multiome: - obs = z.create_group('mod/rna/obs') - var = z.create_group('mod/rna/var') - group_names = ['leiden_wnn', 'leiden_rna', 'cluster_cbg', 'cluster_cbb'] + obs = z.create_group("mod/rna/obs") + var = z.create_group("mod/rna/var") + group_names = ["leiden_wnn", "leiden_rna", "cluster_cbg", "cluster_cbb"] if is_annotated: - group_names.append('predicted_label') + group_names.append("predicted_label") groups = obs.create_groups(*group_names) for group in groups: - group['categories'] = zarr.array(['0', '1', '2']) + group["categories"] = zarr.array(["0", "1", "2"]) - obs = z.create_group('obs') - obs['marker_gene_0'] = gene_array + obs = z.create_group("obs") + obs["marker_gene_0"] = gene_array if is_annotated: - path = f'{"mod/rna/" if is_multiome else ""}uns/annotation_metadata/is_annotated' + path = ( + f'{"mod/rna/" if is_multiome else ""}uns/annotation_metadata/is_annotated' + ) z[path] = True - if 'asct' in entity_path.name: - z['obs/predicted.ASCT.celltype'] = True # only checked for membership in zarr group - elif 'predicted-label' in entity_path.name: - z['obs/predicted_label'] = True # only checked for membership in zarr group - z['obs/predicted_CLID'] = True - if 'marker' in entity_path.name: - obs.attrs['encoding-version'] = '0.1.0' - var = z.create_group('var') - var.attrs['_index'] = 'index' - var['index'] = gene_array - var['hugo_symbol'] = zarr.array([0, 1, 2]) - var['hugo_symbol'].attrs['categories'] = 'hugo_categories' - var['hugo_categories'] = zarr.array(['gene123', 'gene456', 'gene789']) - if 'visium' in entity_path.name: - z['uns/spatial/visium/scalefactors/spot_diameter_micrometers'] = 200.0 - mocker.patch('zarr.open', return_value=z) + if "asct" in entity_path.name: + z["obs/predicted.ASCT.celltype"] = ( + True # only checked for membership in zarr group + ) + elif "predicted-label" in entity_path.name: + z["obs/predicted_label"] = True # only checked for membership in zarr group + z["obs/predicted_CLID"] = True + if "marker" in entity_path.name: + obs.attrs["encoding-version"] = "0.1.0" + var = z.create_group("var") + var.attrs["_index"] = "index" + var["index"] = gene_array + var["hugo_symbol"] = zarr.array([0, 1, 2]) + var["hugo_symbol"].attrs["categories"] = "hugo_categories" + var["hugo_categories"] = zarr.array(["gene123", "gene456", "gene789"]) + if "visium" in entity_path.name: + z["uns/spatial/visium/scalefactors/spot_diameter_micrometers"] = 200.0 + mocker.patch("zarr.open", return_value=z) @pytest.mark.parametrize( - "entity_path", good_entity_paths, ids=lambda path: f'{path.parent.name}/{path.name}') + "entity_path", good_entity_paths, ids=lambda path: f"{path.parent.name}/{path.name}" +) def test_entity_to_vitessce_conf(entity_path, mocker): mock_zarr_store(entity_path, mocker) - possible_marker = entity_path.name.split('-')[-2] + possible_marker = entity_path.name.split("-")[-2] marker = ( - possible_marker.split('=')[1] - if possible_marker.startswith('marker=') - else None) + possible_marker.split("=")[1] if possible_marker.startswith("marker=") else None + ) entity = json.loads(entity_path.read_text()) - parent = entity.get('parent') or None # Only used for image pyramids + parent = entity.get("parent") or None # Only used for image pyramids Builder = get_view_config_builder(entity, get_assaytype, parent) assert Builder.__name__ == entity_path.parent.name # Envvars should not be set during normal test runs, # but to test the end-to-end integration, they are useful. - groups_token = environ.get('GROUPS_TOKEN', 'groups_token') - assets_url = environ.get('ASSETS_URL', 'https://example.com') + groups_token = environ.get("GROUPS_TOKEN", "groups_token") + assets_url = environ.get("ASSETS_URL", "https://example.com") builder = Builder(entity, groups_token, assets_url) conf, cells = builder.get_conf_cells(marker=marker) - expected_conf_path = entity_path.parent / entity_path.name.replace('-entity', '-conf') + expected_conf_path = entity_path.parent / entity_path.name.replace( + "-entity", "-conf" + ) expected_conf = json.loads(expected_conf_path.read_text()) # Compare normalized JSON strings so the diff is easier to read, # and there are fewer false positives. - assert json.dumps(conf, indent=2, sort_keys=True) \ - == json.dumps(expected_conf, indent=2, sort_keys=True) + assert json.dumps(conf, indent=2, sort_keys=True) == json.dumps( + expected_conf, indent=2, sort_keys=True + ) - expected_cells_path = ( - entity_path.parent / entity_path.name.replace('-entity.json', '-cells.yaml')) + expected_cells_path = entity_path.parent / entity_path.name.replace( + "-entity.json", "-cells.yaml" + ) if expected_cells_path.is_file(): expected_cells = yaml.safe_load(expected_cells_path.read_text()) # Compare as YAML to match fixture. assert yaml.dump(clean_cells(cells)) == yaml.dump(expected_cells) + # TODO: This is a stub for now, real tests for the EPIC builders + # will be added in a future PR. -@pytest.mark.parametrize( - "entity_path", bad_entity_paths, ids=lambda path: path.name) + epic_builder = get_epic_builder(entity["uuid"]) + assert epic_builder is not None + + if conf is None: + with pytest.raises(ValueError): + epic_builder(ConfCells(conf, cells), entity["uuid"]).get_conf_cells() + return + + built_epic_conf, _ = epic_builder( + ConfCells(conf, cells), entity["uuid"] + ).get_conf_cells() + + assert built_epic_conf is not None + assert json.dumps(built_epic_conf, indent=2, sort_keys=True) == json.dumps( + conf, indent=2, sort_keys=True + ) + + +@pytest.mark.parametrize("entity_path", bad_entity_paths, ids=lambda path: path.name) def test_entity_to_error(entity_path, mocker): mock_zarr_store(entity_path, mocker) entity = json.loads(entity_path.read_text()) with pytest.raises(Exception) as error_info: - parent = entity.get('parent') or None # Only used for image pyramids + parent = entity.get("parent") or None # Only used for image pyramids Builder = get_view_config_builder(entity, get_assaytype, parent=parent) - builder = Builder(entity, 'groups_token', 'https://example.com/') + builder = Builder(entity, "groups_token", "https://example.com/") builder.get_conf_cells() - actual_error = f'{error_info.type.__name__}: {error_info.value.args[0]}' + actual_error = f"{error_info.type.__name__}: {error_info.value.args[0]}" - error_expected_path = ( - entity_path.parent / entity_path.name.replace('-entity.json', '-error.txt')) + error_expected_path = entity_path.parent / entity_path.name.replace( + "-entity.json", "-error.txt" + ) expected_error = error_expected_path.read_text().strip() assert actual_error == expected_error @@ -171,21 +213,22 @@ def test_entity_to_error(entity_path, mocker): def clean_cells(cells): return [ { - k: v for k, v in dict(c).items() - if k not in {'metadata', 'id', 'execution_count', 'outputs'} - } for c in cells + k: v + for k, v in dict(c).items() + if k not in {"metadata", "id", "execution_count", "outputs"} + } + for c in cells ] -if __name__ == '__main__': # pragma: no cover - parser = argparse.ArgumentParser(description='Generate fixtures') - parser.add_argument( - '--input', required=True, type=Path, help='Input JSON path') +if __name__ == "__main__": # pragma: no cover + parser = argparse.ArgumentParser(description="Generate fixtures") + parser.add_argument("--input", required=True, type=Path, help="Input JSON path") args = parser.parse_args() entity = json.loads(args.input.read_text()) Builder = get_view_config_builder(entity, get_assaytype) - builder = Builder(entity, 'groups_token', 'https://example.com/') + builder = Builder(entity, "groups_token", "https://example.com/") conf, cells = builder.get_conf_cells() - print(yaml.dump(clean_cells(cells), default_style='|')) + print(yaml.dump(clean_cells(cells), default_style="|")) diff --git a/test/test_client.py b/test/test_client.py new file mode 100644 index 0000000..64093b8 --- /dev/null +++ b/test/test_client.py @@ -0,0 +1,360 @@ +import json +from flask import Flask +import pytest + +from portal_visualization.builders.base_builders import ConfCells +from src.portal_visualization.client import ApiClient, _create_vitessce_error + +mock_hit_source = { + "uuid": "ABC123", + "hubmap_id": "HMB123.XYZ", + "mapped_metadata": {"age_unit": ["eons"], "age_value": ["42"]}, +} + +flattened_hit_source = { + "age_unit": "eons", + "age_value": "42", +} + +mock_es = { + "hits": { + "total": {"value": 1}, + "hits": [{"_id": "ABC123", "_source": mock_hit_source}], + } +} + + +@pytest.fixture() +def app(): + app = Flask("test") + app.config.update( + { + "TESTING": True, + "ELASTICSEARCH_ENDPOINT": "search-api-url", + "PORTAL_INDEX_PATH": "/", + } + ) + yield app + + +def mock_post_303(path, **kwargs): + class MockResponse: + def __init__(self): + self.status_code = 303 + self.content = "s3-bucket-url" + + def raise_for_status(self): + pass + + return MockResponse() + + +def mock_get_s3_json_file(path, **kwargs): + class MockResponse: + def __init__(self): + self.status_code = 200 + self.content = json.dumps(mock_es) + self.text = json.dumps(mock_es) + + def raise_for_status(self): + pass + + def json(self): + return mock_es + + return MockResponse() + + +def test_s3_redirect(mocker): + mocker.patch("requests.post", side_effect=mock_post_303) + mocker.patch("requests.get", side_effect=mock_get_s3_json_file) + api_client = ApiClient() + response = api_client._request("search-api-url", body_json={"query": {}}) + assert response == mock_es + + +def mock_es_post(path, **kwargs): + class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = "Logger call requires this" + + def json(self): + return mock_es + + def raise_for_status(self): + pass + + return MockResponse() + + +def test_get_descendant_to_lift(app, mocker): + mocker.patch("requests.post", side_effect=mock_es_post) + with app.app_context(): + api_client = ApiClient() + descendant = api_client.get_descendant_to_lift("uuid123") + assert descendant == mock_hit_source + + +def mock_es_post_no_hits(path, **kwargs): + class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = "Logger call requires this" + + def json(self): + return {"hits": {"total": {"value": 0}, "hits": []}} + + def raise_for_status(self): + pass + + return MockResponse() + + +def test_get_descendant_to_lift_error(app, mocker): + mocker.patch("requests.post", side_effect=mock_es_post_no_hits) + with app.app_context(): + api_client = ApiClient() + descendant = api_client.get_descendant_to_lift("uuid123") + assert descendant is None + + +def test_clean_headers(app): + test_headers = { + "Authorization": "Bearer token", + "Content-Type": "application/json", + "X-Test": "test", + } + with app.app_context(): + api_client = ApiClient() + cleaned_headers = api_client._clean_headers(test_headers) + assert cleaned_headers == { + "Authorization": "REDACTED", + "Content-Type": "application/json", + "X-Test": "test", + } + + +def test_get_all_dataset_uuids(app, mocker): + mocker.patch("requests.post", side_effect=mock_es_post) + with app.app_context(): + api_client = ApiClient() + uuids = api_client.get_all_dataset_uuids() + assert uuids == ["ABC123"] + + +mock_es_more_than_10k = { + "hits": { + "total": {"value": 10001}, + "hits": [{"_id": f"ABC{i}", "_source": mock_hit_source} for i in range(10000)], + } +} + + +def mock_es_post_more_than_10k(path, **kwargs): + class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = "Logger call requires this" + + def json(self): + return mock_es_more_than_10k + + def raise_for_status(self): + pass + + return MockResponse() + + +def test_get_dataset_uuids_more_than_10k(app, mocker): + mocker.patch("requests.post", side_effect=mock_es_post_more_than_10k) + with app.app_context(): + api_client = ApiClient() + with pytest.raises(Exception) as error_info: + api_client.get_all_dataset_uuids() + assert error_info.match("At least 10k datasets") + + +@pytest.mark.parametrize("plural_lc_entity_type", ("datasets", "samples", "donors")) +def test_get_entities(app, mocker, plural_lc_entity_type): + mocker.patch("requests.post", side_effect=mock_es_post) + with app.app_context(): + api_client = ApiClient() + entities = api_client.get_entities(plural_lc_entity_type) + assert json.dumps(entities, indent=2) == json.dumps( + [flattened_hit_source], indent=2 + ) + pass + + +def test_get_entities_more_than_10k(app, mocker): + mocker.patch("requests.post", side_effect=mock_es_post_more_than_10k) + with app.app_context(): + api_client = ApiClient() + with pytest.raises(Exception) as error_info: + api_client.get_entities("datasets") + assert error_info.match("At least 10k datasets") + + +@pytest.mark.parametrize("params", ({"uuid": "uuid"}, {"hbm_id": "hubmap_id"})) +def test_get_entity(app, mocker, params): + mocker.patch("requests.post", side_effect=mock_es_post) + with app.app_context(): + api_client = ApiClient() + entity = api_client.get_entity(**params) + assert json.dumps(entity, indent=2) == json.dumps(mock_hit_source, indent=2) + + +def test_get_entity_two_ids(app, mocker): + with app.app_context(): + api_client = ApiClient() + with pytest.raises(Exception) as error_info: + api_client.get_entity(uuid="uuid", hbm_id="hubmap_id") + assert error_info.match("Only UUID or HBM ID should be provided") + + +def mock_get_revisions(path, **kwargs): + + mock_revisions = [ + {"uuid": "ABC123", "revision_number": 10}, + {"uuid": "DEF456", "revision_number": 11}, + ] + + class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = "Logger call requires this" + self.content = json.dumps(mock_revisions) + + def json(self): + return mock_revisions + + def raise_for_status(self): + pass + + return MockResponse() + + +@pytest.mark.parametrize( + "params", + ( + {"uuid": "uuid", "type": "dataset"}, + {"uuid": "uuid", "type": "sample"}, + {"uuid": "uuid", "type": "donor"}, + ), +) +def test_get_latest_entity_uuid(app, mocker, params): + mocker.patch("requests.get", side_effect=mock_get_revisions) + with app.app_context(): + api_client = ApiClient(entity_api_endpoint="entity-api-url") + entity_uuid = api_client.get_latest_entity_uuid(**params) + assert entity_uuid == "DEF456" + + +def mock_files_response(path, **kwargs): + + mock_file_response = { + "hits": { + "hits": [ + {"_id": "1234", "_source": {"files": [{"rel_path": "abc.txt"}]}}, + {"_id": "5678", "_source": {"files": [{"rel_path": "def.txt"}]}}, + ] + } + } + + class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = "Logger call requires this" + self.content = json.dumps(mock_file_response) + + def json(self): + return mock_file_response + + def raise_for_status(self): + pass + + return MockResponse() + + +def test_get_files(app, mocker): + mocker.patch("requests.post", side_effect=mock_files_response) + with app.app_context(): + api_client = ApiClient() + files = api_client.get_files(["1234", "5678"]) + assert files == {"1234": ["abc.txt"], "5678": ["def.txt"]} + + +related_entity_no_files_error = _create_vitessce_error( + "Related image entity ABC123 is missing file information " + + '(no "files" key found in its metadata).' +) + + +@pytest.mark.parametrize( + "entity, patched_function, side_effect, expected_conf, expected_vis_lifted_uuid", + [ + ( + # No metadata in descendant + {"uuid": "12345"}, + "requests.post", + mock_es_post, + related_entity_no_files_error, + None, + ), + ( + # No descendants, not marked as having a visualization, no files + {"uuid": "12345"}, + "requests.post", + mock_es_post_no_hits, + ConfCells(None, None), + None, + ), + # TODO? Add more test cases for happy scenarios + ], +) +def test_get_vitessce_conf_cells_and_lifted_uuid( + app, + mocker, + entity, + patched_function, + side_effect, + expected_conf, + expected_vis_lifted_uuid, +): + mocker.patch(patched_function, side_effect=side_effect) + with app.app_context(): + api_client = ApiClient( + groups_token="token", + elasticsearch_endpoint="http://example.com", + portal_index_path="/", + ubkg_endpoint="http://example.com", + entity_api_endpoint="http://example.com", + soft_assay_endpoint="http://example.com", + soft_assay_endpoint_path="/", + ) + vitessce_conf = api_client.get_vitessce_conf_cells_and_lifted_uuid(entity) + assert vitessce_conf.vitessce_conf == expected_conf + assert vitessce_conf.vis_lifted_uuid == expected_vis_lifted_uuid + + +@pytest.mark.parametrize("groups_token", [None, "token"]) +def test_get_publication_ancillary_json(app, mocker, groups_token): + mocker.patch("requests.post", side_effect=mock_es_post) + mocker.patch("requests.get", side_effect=mock_get_s3_json_file) + with app.app_context(): + api_client = ApiClient(groups_token=groups_token) + result = api_client.get_publication_ancillary_json({"uuid": "ABC123"}) + assert result.publication_json == mock_es + assert result.vis_lifted_uuid == "ABC123" + + pass + + +def test_get_metadata_descriptions(app, mocker): + mocker.patch("requests.get", side_effect=mock_get_s3_json_file) + with app.app_context(): + api_client = ApiClient() + metadata_descriptions = api_client.get_metadata_descriptions() + assert metadata_descriptions == mock_es + pass diff --git a/test/test_epic_builders.py b/test/test_epic_builders.py new file mode 100644 index 0000000..0e56baa --- /dev/null +++ b/test/test_epic_builders.py @@ -0,0 +1,12 @@ +import pytest +from src.portal_visualization.epic_factory import get_epic_builder + + +@pytest.mark.parametrize( + "epic_uuid, expected", + [ + ("epic_uuid", "SegmentationMaskBuilder"), + ], +) +def test_get_epic_builder(epic_uuid, expected): + assert get_epic_builder(epic_uuid).__name__ == expected