diff --git a/.gitignore b/.gitignore index 4e81f71..fbe556d 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,6 @@ dmypy.json .pyre/ # VSCode -.VSCode \ No newline at end of file +.VSCode + +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md index 9c5f66e..a66bfb8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # portal-visualization -Given HuBMAP Dataset JSON, creates a Vitessce configuration. +Given HuBMAP Dataset JSON (e.g. https://portal.hubmapconsortium.org/browse/dataset/004d4f157df4ba07356cd805131dfc04.json), creates a Vitessce configuration. ## Release process @@ -23,7 +23,8 @@ $ pip install . $ src/vis-preview.py --help usage: vis-preview.py [-h] (--url URL | --json JSON) [--assaytypes_url URL] [--assets_url URL] [--token TOKEN] [--marker MARKER] - [--to_json] + [--to_json] [--epic_uuid UUID] [--parent_uuid UUID] + [--epic_url EPIC_URL] [--epic_json EPIC_JSON] Given HuBMAP Dataset JSON, generate a Vitessce viewconf, and load vitessce.io. @@ -31,14 +32,36 @@ optional arguments: -h, --help show this help message and exit --url URL URL which returns Dataset JSON --json JSON File containing Dataset JSON - --assaytypes_url URL AssayType service; default: - https://ingest.api.hubmapconsortium.org/assaytype/ + --assaytypes_url URL AssayType service; default: https://ingest- + api.dev.hubmapconsortium.org/assaytype/ --assets_url URL Assets endpoint; default: - https://assets.hubmapconsortium.org + https://assets.dev.hubmapconsortium.org --token TOKEN Globus groups token; Only needed if data is not public --marker MARKER Marker to highlight in visualization; Only used in some visualizations. --to_json Output viewconf, rather than open in browser. + --epic_uuid UUID uuid of the EPIC dataset. + --parent_uuid UUID Parent uuid - Only needed for an image-pyramid support + dataset. + --epic_url EPIC_URL URL which returns Dataset JSON for the EPIC dataset + --epic_json EPIC_JSON + File containing Dataset JSON for the EPIC dataset + ``` + + + ``` + Notes: + 1. The token can be retrieved by looking for Authorization Bearer {token represented by a long string} under `search-api` network calls under the network tab in developer's tool when browsing a dataset in portal while logged in. The token is necessary to access non-public datasets, such as those in QA. + 2. The documentation for the `vis-preview.py` script must match the contents of the readme. When a script argument is added or modified, the README must be updated to match the output of `./vis-preview.py --help`. + + ``` + + +## Build & Testing + ``` + To build: `python -m build` + `To run the tests `./test.sh`. Install the `flake8` and `autopep8` packages. + ``` ## Background @@ -47,7 +70,9 @@ optional arguments: Data for the Vitessce visualization almost always comes via raw data that is processed by [ingest-pipeline](https://github.com/hubmapconsortium/ingest-pipeline) airflow dags. Harvard often contributes our own custom pipelines to these dags that can be found in [portal-containers](https://github.com/hubmapconsortium/portal-containers). -The outputs of these pipelines are then converted into view configurations for Vitessce by the [portal backend](https://github.com/hubmapconsortium/portal-ui/blob/0b43a468fff0256a466a3bf928a83893321ea1d9/context/app/api/client.py#L165), + +The outputs of these pipelines are then converted into view configurations for Vitessce by the [portal backend](https://github.com/hubmapconsortium/portal-visualization/blob/main/src/portal_visualization/client.py), The `vis-preview.py` mimics the invocation of `get_view_config_builder` for development and testing purposes independently, i.e., without using the [portal backend](https://github.com/hubmapconsortium/portal-ui/blob/main/context/app/routes_browse.py#L126). + using code in this repo, when a `Dataset` that should be visualized is requested in the client. The view configurations are built using the [Vitessce-Python API](https://vitessce.github.io/vitessce-python/). diff --git a/requirements-dev.txt b/requirements-dev.txt index 93c7df6..54af287 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ pytest==5.2.1 flake8==7.0.0 -autopep8==1.4.4 +autopep8==2.0.4 pytest-mock==3.7.0 -coverage==6.3.1 -pyyaml==6.0 \ No newline at end of file +coverage==7.6.4 +pyyaml==6.0.2 \ No newline at end of file diff --git a/src/defaults.json b/src/defaults.json index 16ce4d4..2750767 100644 --- a/src/defaults.json +++ b/src/defaults.json @@ -1,4 +1,5 @@ { - "assets_url": "https://assets.hubmapconsortium.org", - "assaytypes_url": "https://ingest.api.hubmapconsortium.org/assaytype/" + "assets_url": "https://assets.dev.hubmapconsortium.org", + "assaytypes_url": "https://ingest-api.dev.hubmapconsortium.org/assaytype/", + "dataset_url":"https://portal.dev.hubmapconsortium.org/browse/dataset/" } \ No newline at end of file diff --git a/src/portal_visualization/builder_factory.py b/src/portal_visualization/builder_factory.py index f67788b..51964ff 100644 --- a/src/portal_visualization/builder_factory.py +++ b/src/portal_visualization/builder_factory.py @@ -8,6 +8,7 @@ SeqFISHViewConfBuilder, IMSViewConfBuilder, ImagePyramidViewConfBuilder, + SegImagePyramidViewConfBuilder, NanoDESIViewConfBuilder, ) from .builders.anndata_builders import ( @@ -33,6 +34,7 @@ def process_hints(hints): is_json = "json_based" in hints is_spatial = "spatial" in hints is_support = "is_support" in hints + is_segmentation_base = "segmentation_base" in hints return ( is_image, @@ -44,6 +46,7 @@ def process_hints(hints): is_json, is_spatial, is_support, + is_segmentation_base, ) @@ -53,10 +56,10 @@ def process_hints(hints): # The entity is a dict that contains the entity UUID and metadata. # `get_assaytype` is a function which takes an entity UUID and returns # a dict containing the assaytype and vitessce-hints for that entity. -def get_view_config_builder(entity, get_assaytype, parent=None): +def get_view_config_builder(entity, get_assaytype, parent=None, epic_uuid=None): if entity.get("uuid") is None: raise ValueError("Provided entity does not have a uuid") - assay = get_assaytype(entity) + assay = get_assaytype(entity.get('uuid')) assay_name = assay.get("assaytype") hints = assay.get("vitessce-hints", []) ( @@ -69,11 +72,16 @@ def get_view_config_builder(entity, get_assaytype, parent=None): is_json, is_spatial, is_support, + is_segmentation_base ) = process_hints(hints) # vis-lifted image pyramids if parent is not None: - if is_support and is_image: + # TODO: For now epic (base image's) support datasets doesn't have any hints + if epic_uuid is not None or is_segmentation_base: + return SegImagePyramidViewConfBuilder + + elif is_support and is_image: ancestor_assaytype = get_assaytype(parent).get("assaytype") if SEQFISH == ancestor_assaytype: # e.g. parent = c6a254b2dc2ed46b002500ade163a7cc @@ -134,5 +142,5 @@ def get_view_config_builder(entity, get_assaytype, parent=None): def has_visualization(entity, get_assaytype, parent=None): - builder = get_view_config_builder(entity, get_assaytype, parent) + builder = get_view_config_builder(entity, get_assaytype, parent, epic_uuid=None) return builder != NullViewConfBuilder diff --git a/src/portal_visualization/builders/anndata_builders.py b/src/portal_visualization/builders/anndata_builders.py index fcaba65..5c1a2ec 100644 --- a/src/portal_visualization/builders/anndata_builders.py +++ b/src/portal_visualization/builders/anndata_builders.py @@ -125,7 +125,7 @@ def _set_up_marker_gene(self, marker): if (marker_index >= 0): marker = ensembl_ids[marker_index] else: - pass + pass # pragma: no cover # Encoding Version 0.2.0 # https://anndata.readthedocs.io/en/latest/fileformat-prose.html#categorical-arrays # Our pipeline currently does not use this encoding version @@ -243,7 +243,7 @@ def _setup_anndata_view_config(self, vc, dataset): # This ensures that the view config is valid for datasets with and without a spatial view spatial = self._add_spatial_view(dataset, vc) - views = list(filter(lambda v: v is not None, [ + views = list(filter(lambda v: v is not None, [ # pragma: no cover cell_sets, gene_list, scatterplot, cell_sets_expr, heatmap, spatial])) self._views = views diff --git a/src/portal_visualization/builders/base_builders.py b/src/portal_visualization/builders/base_builders.py index 0102dc3..3c57107 100644 --- a/src/portal_visualization/builders/base_builders.py +++ b/src/portal_visualization/builders/base_builders.py @@ -1,7 +1,9 @@ import urllib from collections import namedtuple from abc import ABC, abstractmethod - +# import json +# import requests +# from pathlib import Path ConfCells = namedtuple('ConfCells', ['conf', 'cells']) @@ -74,7 +76,10 @@ def _build_assets_url(self, rel_path, use_token=True): 'https://example.com/uuid/rel_path/to/clusters.ome.tiff?token=groups_token' """ - base_url = urllib.parse.urljoin(self._assets_endpoint, f"{self._uuid}/{rel_path}") + uuid = self._uuid + if hasattr(self, "_epic_uuid"): # pragma: no cover + uuid = self._epic_uuid + base_url = urllib.parse.urljoin(self._assets_endpoint, f"{uuid}/{rel_path}") token_param = urllib.parse.urlencode({"token": self._groups_token}) return f"{base_url}?{token_param}" if use_token else base_url @@ -118,6 +123,23 @@ def _get_file_paths(self): """ return [file["rel_path"] for file in self._entity["files"]] + # def _get_epic_entity(self): + # TODO: might need this if we decide to read the epic_entity on run time + # request_init = self._get_request_init() + # file_path = Path(__file__).resolve().parent.parent.parent / 'defaults.json' + # print(file_path) + # defaults = json.load(file_path.open()) + # # headers = {"headers": {"Authorization": f"Bearer {self._groups_token}"}} + # url = f'{defaults['dataset_url']}/{self._epic_uuid}.json' + # print(url) + # response = requests.get(url, request_init) + # if response.status_code == 403: + # raise Exception('Protected data: Download JSON via browser; Redo with --json') + # response.raise_for_status() + # json_str = response.text + # entity = json.loads(json_str) + # return entity + class _DocTestBuilder(ViewConfBuilder): # pragma: no cover # The doctests on the methods in this file need a concrete class to instantiate: diff --git a/src/portal_visualization/builders/epic_builders.py b/src/portal_visualization/builders/epic_builders.py index b86ab75..75501a4 100644 --- a/src/portal_visualization/builders/epic_builders.py +++ b/src/portal_visualization/builders/epic_builders.py @@ -1,22 +1,35 @@ -from abc import ABC, abstractmethod -from vitessce import VitessceConfig +from abc import abstractmethod +from vitessce import VitessceConfig, ObsSegmentationsOmeTiffWrapper, AnnDataWrapper, \ + get_initial_coordination_scope_prefix, CoordinationLevel as CL from .base_builders import ConfCells -from ..utils import get_conf_cells +from ..utils import get_conf_cells, get_matches +from .base_builders import ViewConfBuilder +from requests import get +import re +import random +from ..paths import OFFSETS_DIR, IMAGE_PYRAMID_DIR, SEGMENTATION_SUBDIR, SEGMENTATION_ZARR_STORES, SEGMENTATION_DIR +transformations_filename = 'transformations.json' +zarr_path = f'{SEGMENTATION_SUBDIR}/{SEGMENTATION_ZARR_STORES}' + # EPIC builders take in a vitessce conf output by a previous builder and modify it # accordingly to add the EPIC-specific configuration. -class EPICConfBuilder(ABC): - def __init__(self, base_conf: ConfCells, epic_uuid) -> None: + + +class EPICConfBuilder(ViewConfBuilder): + def __init__(self, epic_uuid, base_conf: ConfCells, entity, epic_entity, + groups_token, assets_endpoint, **kwargs) -> None: + super().__init__(entity, groups_token, assets_endpoint, **kwargs) conf, cells = base_conf - if conf is None: + if conf is None: # pragma: no cover raise ValueError("ConfCells object must have a conf attribute") - + # Not sure if need this, as assumption is to have 1 base image self._is_plural = isinstance(conf, list) - if self._is_plural: + if self._is_plural: # pragma: no cover self._base_conf = [ VitessceConfig.from_dict(conf) for conf in conf ] @@ -24,16 +37,18 @@ def __init__(self, base_conf: ConfCells, epic_uuid) -> None: self._base_conf: VitessceConfig = VitessceConfig.from_dict(base_conf.conf) self._epic_uuid = epic_uuid + self._epic_entity = epic_entity + pass def get_conf_cells(self): self.apply() - if (self._is_plural): + if (self._is_plural): # pragma: no cover return get_conf_cells([conf.to_dict() for conf in self._base_conf]) return get_conf_cells(self._base_conf) def apply(self): - if self._is_plural: + if self._is_plural: # pragma: no cover for conf in self._base_conf: self._apply(conf) else: @@ -43,17 +58,147 @@ def apply(self): def _apply(self, conf): # pragma: no cover pass + def zarr_store_url(self): + adata_url = self._build_assets_url(zarr_path, use_token=False) + return adata_url -class SegmentationMaskBuilder(EPICConfBuilder): + def image_transofrmations_url(self): # pragma: no cover + transformations_url = self._build_assets_url(SEGMENTATION_DIR, use_token=True) + return transformations_url + + def segmentations_ome_offset_url(self, img_path): + img_url = self._build_assets_url(f'{SEGMENTATION_SUBDIR}/{img_path}') + return ( + img_url, + str( + re.sub( + r"ome\.tiff?", + "offsets.json", + re.sub(IMAGE_PYRAMID_DIR, OFFSETS_DIR, img_url), + ) + ), + ) + +class SegmentationMaskBuilder(EPICConfBuilder): def _apply(self, conf): + zarr_url = self.zarr_store_url() datasets = conf.get_datasets() - print(f"Found {len(datasets)} datasets") - # Proof of concept using one of the kaggle segmentation masks for now - # segmentations = ObsSegmentationsOmeTiffWrapper( - # img_url='https://assets.hubmapconsortium.org/c9d9ab5c9ee9642b60dd351024968627/ometiff-pyramids/VAN0042-RK-3-18-registered-PAS-to-postAF-registered.ome_mask.ome.tif?token=AgndN7NVbn83wwDXjpnY1Y0lDoJj2j7zOGmn1WN6qr9pqdkjKmt9C1XYm4KrlWrOXE9rVJvpnEKrPjIXrlKd1hmDGjV', - # # offsets_path=f'./{name}/{name}/offsets/{name}.segmentations.offsets.json', - # obs_types_from_channel_names=True, - # ) - # dataset.add_object(segmentations) - pass + # TODO: if extracting epic_entity on the fly is preferred rather than sending as param + # epic_entity = self._get_epic_entity() + # print(epic_entity) + file_paths_found = [file["rel_path"] for file in self._epic_entity["files"]] + + found_images = [ + path for path in get_matches( + file_paths_found, IMAGE_PYRAMID_DIR + r".*\.ome\.tiff?$", + ) + ] + found_images = sorted(found_images) + if len(found_images) == 0: # pragma: no cover + message = f"Image pyramid assay with uuid {self._uuid} has no matching files" + raise FileNotFoundError(message) + + elif len(found_images) >= 1: + img_url, offsets_url = self.segmentations_ome_offset_url( + found_images[0] + ) + + segmentation_scale = self.read_segmentation_scale() + segmentations = ObsSegmentationsOmeTiffWrapper( + img_url=img_url, + offsets_url=offsets_url, + coordinate_transformations=[{"type": "scale", "scale": segmentation_scale}], + obs_types_from_channel_names=True, + coordination_values={ + "fileUid": "segmentation-mask" + } + ) + + mask_names = self.read_metadata_from_url() + if (mask_names is not None): # pragma: no cover + segmentation_objects, segmentations_CL = create_segmentation_objects(zarr_url, mask_names) + for dataset in datasets: + dataset.add_object(segmentations) + for obj in segmentation_objects: + dataset.add_object(obj) + + spatial_view = conf.get_first_view_by_type('spatialBeta') + lc_view = conf.get_first_view_by_type('layerControllerBeta') + conf.link_views_by_dict([spatial_view, lc_view], { + # Neutralizing the base-image colors + 'imageLayer': CL([{'photometricInterpretation': 'RGB', }]), + "segmentationLayer": CL([ + { + "fileUid": "segmentation-mask", + "spatialLayerVisible": True, + "spatialLayerOpacity": 1, + "segmentationChannel": CL(segmentations_CL) + } + ]) + + }, meta=True, scope_prefix=get_initial_coordination_scope_prefix("A", "obsSegmentations")) + + def read_metadata_from_url(self): # pragma: no cover + mask_names = [] + url = f'{self.zarr_store_url()}/metadata.json' + request_init = self._get_request_init() or {} + response = get(url, **request_init) + if response.status_code == 200: + data = response.json() + if isinstance(data, dict) and "mask_names" in data: + mask_names = data["mask_names"] + else: + print("'mask_names' key not found in the response.") + else: + # in this case, the code won't execute for this + print(f"Failed to retrieve metadata.json: {response.status_code} - {response.reason}") + return mask_names + + def read_segmentation_scale(self): # pragma: no cover + url = self._build_assets_url(f'{SEGMENTATION_DIR}/{transformations_filename}') + request_init = self._get_request_init() or {} + # By default no scaling should be applied, format accepted by vitessce + scale = [1, 1, 1, 1, 1] + response = get(url, **request_init) + if response.status_code == 200: + data = response.json() + if isinstance(data, dict) and "scale" in data: + scale = data["scale"] + else: + print("'scale' key not found in the response.") + else: + print(f"Failed to retrieve {transformations_filename}: {response.status_code} - {response.reason}") + return scale + + +def create_segmentation_objects(base_url, mask_names): # pragma: no cover + segmentation_objects = [] + segmentations_CL = [] + for index, mask_name in enumerate(mask_names): + color_channel = generate_unique_color() + mask_url = f'{base_url}/{mask_name}.zarr' + segmentations_zarr = AnnDataWrapper( + adata_url=mask_url, + obs_locations_path="obsm/X_spatial", + obs_labels_names=mask_name, + coordination_values={ + "obsType": mask_name + } + ) + seg_CL = { + # TODO: manually to match image channels - need to be fixed on the JS side + "spatialTargetC": index + 2, + "obsType": mask_name, + "spatialChannelOpacity": 1, + "spatialChannelColor": color_channel, + "obsHighlight": None + + } + segmentation_objects.append(segmentations_zarr) + segmentations_CL.append(seg_CL) + return segmentation_objects, segmentations_CL + + +def generate_unique_color(): # pragma: no cover + return [random.randint(0, 255) for _ in range(3)] diff --git a/src/portal_visualization/builders/imaging_builders.py b/src/portal_visualization/builders/imaging_builders.py index e54c7d8..3811631 100644 --- a/src/portal_visualization/builders/imaging_builders.py +++ b/src/portal_visualization/builders/imaging_builders.py @@ -5,11 +5,13 @@ VitessceConfig, MultiImageWrapper, OmeTiffWrapper, + ImageOmeTiffWrapper, Component as cm, ) from ..utils import get_matches, group_by_file_name, get_conf_cells -from ..paths import IMAGE_PYRAMID_DIR, OFFSETS_DIR, SEQFISH_HYB_CYCLE_REGEX, SEQFISH_FILE_REGEX +from ..paths import (IMAGE_PYRAMID_DIR, OFFSETS_DIR, SEQFISH_HYB_CYCLE_REGEX, + SEQFISH_FILE_REGEX, SEGMENTATION_SUPPORT_IMAGE_SUBDIR) from .base_builders import ViewConfBuilder @@ -46,6 +48,27 @@ def _get_img_and_offset_url(self, img_path, img_dir): ), ) + def _get_img_and_offset_url_seg(self, img_path, img_dir): + """Create a url for the offsets and img for the EPICs base-image support datasets. + :param str img_path: The path of the image + :param str img_dir: The image-specific part of the path to be + replaced by the OFFSETS_DIR constant. + :rtype: tuple The image url and the offsets url + + """ + img_url = self._build_assets_url(img_path) + offset_path = f'{OFFSETS_DIR}/{SEGMENTATION_SUPPORT_IMAGE_SUBDIR}' + return ( + img_url, + str( + re.sub( + r"ome\.tiff?", + "offsets.json", + re.sub(img_dir, offset_path, img_url), + ) + ), + ) + def _setup_view_config_raster(self, vc, dataset, disable_3d=[], use_full_resolution=[]): vc.add_view(cm.SPATIAL, dataset=dataset, x=3, y=0, w=9, h=12).set_props( useFullResolutionImage=use_full_resolution @@ -56,6 +79,15 @@ def _setup_view_config_raster(self, vc, dataset, disable_3d=[], use_full_resolut ) return vc + def _setup_view_config_seg(self, vc, dataset, disable_3d=[], use_full_resolution=[]): + vc.add_view("spatialBeta", dataset=dataset, x=3, y=0, w=9, h=12).set_props( + useFullResolutionImage=use_full_resolution + ) + vc.add_view("layerControllerBeta", dataset=dataset, x=0, y=0, w=3, h=8).set_props( + disable3d=disable_3d, disableChannelsIfRgbDetected=True + ) + return vc + class ImagePyramidViewConfBuilder(AbstractImagingViewConfBuilder): def __init__(self, entity, groups_token, assets_endpoint, **kwargs): @@ -107,6 +139,49 @@ def get_conf_cells(self, **kwargs): return get_conf_cells(conf) +class SegImagePyramidViewConfBuilder(AbstractImagingViewConfBuilder): + def __init__(self, entity, groups_token, assets_endpoint, **kwargs): + """Wrapper class for creating a standard view configuration for image pyramids for segmenation mask, + i.e for high resolution viz-lifted imaging datasets like + https://portal.hubmapconsortium.org/browse/dataset/ + """ + self.image_pyramid_regex = f'{IMAGE_PYRAMID_DIR}/{SEGMENTATION_SUPPORT_IMAGE_SUBDIR}' + self.use_full_resolution = [] + self.use_physical_size_scaling = False + super().__init__(entity, groups_token, assets_endpoint, **kwargs) + + def get_conf_cells(self, **kwargs): + file_paths_found = self._get_file_paths() + found_images = [ + path for path in get_matches( + file_paths_found, self.image_pyramid_regex + r".*\.ome\.tiff?$", + ) + if 'separate/' not in path # Exclude separate/* in MALDI-IMS + ] + found_images = sorted(found_images) + if len(found_images) == 0: # pragma: no cover + message = f"Image pyramid assay with uuid {self._uuid} has no matching files" + raise FileNotFoundError(message) + + vc = VitessceConfig(name="HuBMAP Data Portal", schema_version=self._schema_version) + dataset = vc.add_dataset(name="Visualization Files") + # The base-image will always be 1 + if len(found_images) == 1: + img_url, offsets_url = self._get_img_and_offset_url_seg( + found_images[0], self.image_pyramid_regex + ) + + image = ImageOmeTiffWrapper( + img_url=img_url, offsets_url=offsets_url, name=Path(found_images[0]).name + ) + + dataset = dataset.add_object(image) + vc = self._setup_view_config_seg( + vc, dataset, use_full_resolution=self.use_full_resolution) + conf = vc.to_dict() + return get_conf_cells(conf) + + class IMSViewConfBuilder(ImagePyramidViewConfBuilder): """Wrapper class for generating a Vitessce configurations for IMS data that excludes the image pyramids diff --git a/src/portal_visualization/epic_factory.py b/src/portal_visualization/epic_factory.py index d3f339d..bf7fd37 100644 --- a/src/portal_visualization/epic_factory.py +++ b/src/portal_visualization/epic_factory.py @@ -4,4 +4,6 @@ # This function will determine which builder to use for the given entity. # Since we only have one builder for EPICs right now, we can just return it. def get_epic_builder(epic_uuid): + if epic_uuid is None: + raise ValueError("epic_uuid must be provided") return SegmentationMaskBuilder diff --git a/src/portal_visualization/paths.py b/src/portal_visualization/paths.py index f192ef1..dffd4d7 100644 --- a/src/portal_visualization/paths.py +++ b/src/portal_visualization/paths.py @@ -11,3 +11,7 @@ SPRM_PYRAMID_SUBDIR = "pipeline_output/expr" SEQFISH_HYB_CYCLE_REGEX = r"(HybCycle_\d+|final_mRNA_background)" SEQFISH_FILE_REGEX = r"MMStack_Pos\d+\.ome\.tiff?" +SEGMENTATION_DIR = "extras" +SEGMENTATION_SUBDIR = "extras/transformations" +SEGMENTATION_ZARR_STORES = "hubmap_ui/seg-to-mudata-zarr/objects.zarr" +SEGMENTATION_SUPPORT_IMAGE_SUBDIR = "lab_processed/images" diff --git a/src/vis-preview.py b/src/vis-preview.py index c6faf29..49b94e2 100755 --- a/src/vis-preview.py +++ b/src/vis-preview.py @@ -10,6 +10,7 @@ import requests from portal_visualization.builder_factory import get_view_config_builder +from portal_visualization.epic_factory import get_epic_builder def main(): # pragma: no cover @@ -42,39 +43,95 @@ def main(): # pragma: no cover parser.add_argument( '--to_json', action='store_true', help='Output viewconf, rather than open in browser.') + parser.add_argument( + '--epic_uuid', metavar='UUID', + help='uuid of the EPIC dataset.', + default=None) + parser.add_argument( + '--parent_uuid', metavar='UUID', + help='Parent uuid - Only needed for an image-pyramid support dataset.', + default=None) + parser.add_argument( + '--epic_url', help='URL which returns Dataset JSON for the EPIC dataset') + parser.add_argument( + '--epic_json', type=Path, help='File containing Dataset JSON for the EPIC dataset') args = parser.parse_args() marker = args.marker + epic_uuid = args.epic_uuid + parent_uuid = args.parent_uuid - if args.url: - response = requests.get(args.url) - if response.status_code == 403: - # Even if the user has provided a globus token, - # that isn't useful when making requests to our portal. - raise Exception('Protected data: Download JSON via browser; Redo with --json') - response.raise_for_status() - json_str = response.text - else: - json_str = args.json.read_text() - entity = json.loads(json_str) + headers = get_headers(args.token) + entity = get_entity(args.url, args.json, headers) + + if epic_uuid is not None: + if args.epic_url is None and args.epic_json is None: + raise ValueError('Provide the epic_url or epic_json parameter') + epic_entity = get_entity(args.epic_url, args.epic_json, headers) def get_assaytype(uuid): - headers = {} - if args.token: - headers['Authorization'] = f'Bearer {args.token}' - requests.get(f'{defaults["assaytypes_url"]}/{uuid}', headers=headers).json() + try: + response = requests.get(f'{defaults["assaytypes_url"]}{uuid}', headers=headers) + if response.status_code != 200: + print(f"Error: Received status code {response.status_code}") + else: + try: + data = response.json() + return data + except Exception as e: + print(f"Error in parsing the response {str(e)}") + except Exception as e: + print(f"Error accessing {defaults['assaytypes_url']}{uuid}: {str(e)}") - Builder = get_view_config_builder(entity, get_assaytype) + Builder = get_view_config_builder(entity, get_assaytype, parent_uuid, epic_uuid) builder = Builder(entity, args.token, args.assets_url) print(f'Using: {builder.__class__.__name__}', file=stderr) conf_cells = builder.get_conf_cells(marker=marker) - if args.to_json: - print(json.dumps(conf_cells.conf, indent=2)) + + if (epic_uuid is not None and conf_cells is not None): # pragma: no cover + EpicBuilder = get_epic_builder(epic_uuid) + epic_builder = EpicBuilder(epic_uuid, conf_cells, entity, epic_entity, args.token, args.assets_url) + print(f'Using: {epic_builder.__class__.__name__}', file=stderr) + conf_cells = epic_builder.get_conf_cells() + + if isinstance(conf_cells.conf, list): + conf_as_json = json.dumps(conf_cells.conf[0]) else: conf_as_json = json.dumps(conf_cells.conf) - data_url = f'data:,{quote_plus(conf_as_json)}' - vitessce_url = f'http://vitessce.io/#?url={data_url}' - open_new_tab(vitessce_url) + + if args.to_json: + print(conf_as_json) + + # For testing + # with open('conf.json', 'w') as file: + # if isinstance(conf_cells.conf, list): + # json.dump(conf_cells.conf[0], file, indent=4, separators=(',', ': ')) + # else: + # json.dump(conf_cells.conf, file, indent=4, separators=(',', ': ')) + + data_url = f'data:,{quote_plus(conf_as_json)}' + vitessce_url = f'http://vitessce.io/#?url={data_url}' + open_new_tab(vitessce_url) + + +def get_headers(token): # pragma: no cover + headers = {} + if token: + headers['Authorization'] = f'Bearer {token}' + return headers + + +def get_entity(url_arg, json_arg, headers): # pragma: no cover + if url_arg: + response = requests.get(url_arg, headers=headers) + if response.status_code == 403: + raise Exception('Protected data: Download JSON via browser; Redo with --json') + response.raise_for_status() + json_str = response.text + else: + json_str = json_arg.read_text() + entity = json.loads(json_str) + return entity if __name__ == "__main__": # pragma: no cover diff --git a/test/assaytype-fixtures/d58d16d515fe3237b0ca793da68a5d48.json b/test/assaytype-fixtures/d58d16d515fe3237b0ca793da68a5d48.json new file mode 100644 index 0000000..9bba437 --- /dev/null +++ b/test/assaytype-fixtures/d58d16d515fe3237b0ca793da68a5d48.json @@ -0,0 +1,10 @@ +{ + "assaytype": "image_pyramid", + "contains-pii": false, + "description": "Image Pyramid", + "primary": false, + "vitessce-hints": [ + "is_image", + "segmentation_base" + ] + } \ No newline at end of file diff --git a/test/assaytype-fixtures/fcd7f68678d85a4a4d28a4b269de379e.json b/test/assaytype-fixtures/fcd7f68678d85a4a4d28a4b269de379e.json new file mode 100644 index 0000000..42fa57b --- /dev/null +++ b/test/assaytype-fixtures/fcd7f68678d85a4a4d28a4b269de379e.json @@ -0,0 +1,9 @@ +{ + "contains-pii": false, + "dataset-type": "Histology", + "description": "Segmentation Mask", + "dir-schema": "stained-v0", + "primary": true, + "tbl-schema": "stained-v0", + "vitessce-hints": ["segmentation_mask","is_image","pyramid"] + } \ No newline at end of file diff --git a/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-conf.json b/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-conf.json new file mode 100644 index 0000000..9326a28 --- /dev/null +++ b/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-conf.json @@ -0,0 +1,55 @@ +{ + "version": "1.0.15", + "name": "HuBMAP Data Portal", + "description": "", + "datasets": [ + { + "uid": "A", + "name": "Visualization Files", + "files": [ + { + "fileType": "image.ome-tiff", + "url": "https://example.com/d58d16d515fe3237b0ca793da68a5d48/ometiff-pyramids/lab_processed/images/91706.ome.tif?token=groups_token", + "options": { + "offsetsUrl": "https://example.com/d58d16d515fe3237b0ca793da68a5d48/output_offsets/lab_processed/images/91706.offsets.json?token=groups_token" + } + } + ] + } + ], + "coordinationSpace": { + "dataset": { + "A": "A" + } + }, + "layout": [ + { + "component": "spatialBeta", + "coordinationScopes": { + "dataset": "A" + }, + "x": 3, + "y": 0, + "w": 9, + "h": 12, + "props": { + "useFullResolutionImage": [] + } + }, + { + "component": "layerControllerBeta", + "coordinationScopes": { + "dataset": "A" + }, + "x": 0, + "y": 0, + "w": 3, + "h": 8, + "props": { + "disable3d": [], + "disableChannelsIfRgbDetected": true + } + } + ], + "initStrategy": "auto" +} \ No newline at end of file diff --git a/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-entity.json b/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-entity.json new file mode 100644 index 0000000..f9da979 --- /dev/null +++ b/test/good-fixtures/SegImagePyramidViewConfBuilder/fake-entity.json @@ -0,0 +1,23 @@ +{ + "data_types": [ + "image_pyramid", + "PAS" + ], + "status": "QA", + "immediate_ancestors": [ + { + "data_types": ["PAS"] + } + ], + "files": [ + { + "rel_path": "ometiff-pyramids/lab_processed/images/91706.ome.tif" + }, + { + "rel_path": "output_offsets/91706.ome_mask.offsets.json" + } + ], + "uuid": "d58d16d515fe3237b0ca793da68a5d48", + "metadata": {"dag_provenance_list": []}, + "parent": { "uuid": "22901da5f080b219a514e38381acbb0e" } +} \ No newline at end of file diff --git a/test/good-fixtures/SegmentationMaskBuilder/fake-conf.json b/test/good-fixtures/SegmentationMaskBuilder/fake-conf.json new file mode 100644 index 0000000..2f5401d --- /dev/null +++ b/test/good-fixtures/SegmentationMaskBuilder/fake-conf.json @@ -0,0 +1,141 @@ +{ + "version": "1.0.15", + "name": "HuBMAP Data Portal", + "description": "", + "datasets": [ + { + "uid": "A", + "name": "Visualization Files", + "files": [ + { + "fileType": "image.ome-tiff", + "url": "https://example.com/d58d16d515fe3237b0ca793da68a5d48/ometiff-pyramids/lab_processed/images/91706.ome.tif?token=groups_token", + "options": { + "offsetsUrl": "https://example.com/d58d16d515fe3237b0ca793da68a5d48/output_offsets/lab_processed/images/91706.offsets.json?token=groups_token" + } + }, + { + "fileType": "obsSegmentations.ome-tiff", + "url": "https://example.com/fcd7f68678d85a4a4d28a4b269de379e/extras/transformations/ometiff-pyramids/91706.segmentations.ome.tif?token=groups_token", + "options": { + "coordinateTransformations": [ + { + "type": "scale", + "scale": [ + 1, + 1, + 1, + 1, + 1 + ] + } + ], + "obsTypesFromChannelNames": true, + "offsetsUrl": "https://example.com/fcd7f68678d85a4a4d28a4b269de379e/extras/transformations/output_offsets/91706.segmentations.offsets.json?token=groups_token" + }, + "coordinationValues": { + "fileUid": "segmentation-mask" + } + } + ] + } + ], + "coordinationSpace": { + "dataset": { + "A": "A" + }, + "imageLayer": { + "init_A_obsSegmentations_0": "__dummy__" + }, + "photometricInterpretation": { + "init_A_obsSegmentations_0": "RGB" + }, + "segmentationLayer": { + "init_A_obsSegmentations_0": "__dummy__" + }, + "fileUid": { + "init_A_obsSegmentations_0": "segmentation-mask" + }, + "spatialLayerVisible": { + "init_A_obsSegmentations_0": true + }, + "spatialLayerOpacity": { + "init_A_obsSegmentations_0": 1 + }, + "metaCoordinationScopes": { + "init_A_obsSegmentations_0": { + "imageLayer": [ + "init_A_obsSegmentations_0" + ], + "segmentationLayer": [ + "init_A_obsSegmentations_0" + ] + } + }, + "metaCoordinationScopesBy": { + "init_A_obsSegmentations_0": { + "imageLayer": { + "photometricInterpretation": { + "init_A_obsSegmentations_0": "init_A_obsSegmentations_0" + } + }, + "segmentationLayer": { + "fileUid": { + "init_A_obsSegmentations_0": "init_A_obsSegmentations_0" + }, + "spatialLayerVisible": { + "init_A_obsSegmentations_0": "init_A_obsSegmentations_0" + }, + "spatialLayerOpacity": { + "init_A_obsSegmentations_0": "init_A_obsSegmentations_0" + }, + "segmentationChannel": { + "init_A_obsSegmentations_0": [] + } + } + } + } + }, + "layout": [ + { + "component": "spatialBeta", + "coordinationScopes": { + "dataset": "A", + "metaCoordinationScopes": [ + "init_A_obsSegmentations_0" + ], + "metaCoordinationScopesBy": [ + "init_A_obsSegmentations_0" + ] + }, + "x": 3, + "y": 0, + "w": 9, + "h": 12, + "props": { + "useFullResolutionImage": [] + } + }, + { + "component": "layerControllerBeta", + "coordinationScopes": { + "dataset": "A", + "metaCoordinationScopes": [ + "init_A_obsSegmentations_0" + ], + "metaCoordinationScopesBy": [ + "init_A_obsSegmentations_0" + ] + }, + "x": 0, + "y": 0, + "w": 3, + "h": 8, + "props": { + "disable3d": [], + "disableChannelsIfRgbDetected": true + } + } + ], + "initStrategy": "auto" +} \ No newline at end of file diff --git a/test/good-fixtures/SegmentationMaskBuilder/fake-entity.json b/test/good-fixtures/SegmentationMaskBuilder/fake-entity.json new file mode 100644 index 0000000..aa9c045 --- /dev/null +++ b/test/good-fixtures/SegmentationMaskBuilder/fake-entity.json @@ -0,0 +1,42 @@ +{ + "data_types": [ + "image_pyramid", + "PAS" + ], + "status": "QA", + "immediate_ancestors": [ + { + "data_types": ["PAS"] + } + ], + "files": [ + { + "rel_path": "extras/transformations/ometiff-pyramids/91706.segmentations.ome.tif" + }, + { + "rel_path": "ometiff-pyramids/separate/should-be-ignored.ome.tif" + }, + { + "rel_path": "extras/transformations/output_offsets/91706.segmentations.offsets.json" + }, + { + "rel_path": "extras/transformations/objects.zarr/arteries-arterioles.zarr/.zgroup" + }, + { + "rel_path": "extras/transformations/objects.zarr/tubules.zarr/.zgroup" + }, + { + "rel_path": "extras/transformations/objects.zarr/glomeruli.zarr/.zgroup" + }, + { + "rel_path": "extras/transformations/objects.zarr/metadata.json" + }, + { + "rel_path": "extras/transformations.json" + } + + ], + "uuid": "fcd7f68678d85a4a4d28a4b269de379e", + "metadata": {"dag_provenance_list": []}, + "parent": { "uuid": "f9ae931b8b49252f150d7f8bf1d2d13f" } +} \ No newline at end of file diff --git a/test/test_builders.py b/test/test_builders.py index 0842b8c..78b664e 100644 --- a/test/test_builders.py +++ b/test/test_builders.py @@ -42,6 +42,11 @@ class MockResponse: "SeqFISHViewConfBuilder", "NanoDESIViewConfBuilder", ] + +EPIC_UUID = 'fcd7f68678d85a4a4d28a4b269de379e' +SEG_PYRAMID_BUILDER = "SegImagePyramidViewConfBuilder" +EPIC_BUILDER = 'SegmentationMaskBuilder' + image_pyramid_paths = [ path for path in good_entity_paths if path.parent.name in image_pyramids ] @@ -61,8 +66,13 @@ class MockResponse: } -def get_assaytype(entity): - uuid = entity.get("uuid") +def get_assaytype(input): + # uuid = entity.get("uuid") + if not isinstance(input, str): + uuid = input.get("uuid") + else: + uuid = input + # print(uuid, assaytypes_path.joinpath(f"{uuid}.json")) if uuid is None: # pragma: no cover return default_assaytype assay = json.loads(assaytypes_path.joinpath(f"{uuid}.json").read_text()) @@ -137,58 +147,67 @@ def test_entity_to_vitessce_conf(entity_path, mocker): marker = ( possible_marker.split("=")[1] if possible_marker.startswith("marker=") else None ) - + epic_uuid = None entity = json.loads(entity_path.read_text()) parent = entity.get("parent") or None # Only used for image pyramids - Builder = get_view_config_builder(entity, get_assaytype, parent) - assert Builder.__name__ == entity_path.parent.name - - # Envvars should not be set during normal test runs, - # but to test the end-to-end integration, they are useful. - groups_token = environ.get("GROUPS_TOKEN", "groups_token") - assets_url = environ.get("ASSETS_URL", "https://example.com") - builder = Builder(entity, groups_token, assets_url) - conf, cells = builder.get_conf_cells(marker=marker) - - expected_conf_path = entity_path.parent / entity_path.name.replace( - "-entity", "-conf" - ) - expected_conf = json.loads(expected_conf_path.read_text()) - - # Compare normalized JSON strings so the diff is easier to read, - # and there are fewer false positives. - assert json.dumps(conf, indent=2, sort_keys=True) == json.dumps( - expected_conf, indent=2, sort_keys=True - ) - - expected_cells_path = entity_path.parent / entity_path.name.replace( - "-entity.json", "-cells.yaml" - ) - if expected_cells_path.is_file(): - expected_cells = yaml.safe_load(expected_cells_path.read_text()) - - # Compare as YAML to match fixture. - assert yaml.dump(clean_cells(cells)) == yaml.dump(expected_cells) - - # TODO: This is a stub for now, real tests for the EPIC builders - # will be added in a future PR. - - epic_builder = get_epic_builder(entity["uuid"]) - assert epic_builder is not None - - if conf is None: - with pytest.raises(ValueError): - epic_builder(ConfCells(conf, cells), entity["uuid"]).get_conf_cells() - return + assay_type = get_assaytype(entity["uuid"]) + if 'segmentation_mask' not in assay_type['vitessce-hints']: + if (SEG_PYRAMID_BUILDER == entity_path.parent.name): + epic_uuid = EPIC_UUID + Builder = get_view_config_builder(entity, get_assaytype, parent, epic_uuid) + # Envvars should not be set during normal test runs, + # but to test the end-to-end integration, they are useful. + groups_token = environ.get("GROUPS_TOKEN", "groups_token") + assets_url = environ.get("ASSETS_URL", "https://example.com") + # epic_uuid = environ.get("EPIC_UUID", "epic_uuid") + builder = Builder(entity, groups_token, assets_url) + conf, cells = builder.get_conf_cells(marker=marker) + + assert Builder.__name__ == entity_path.parent.name + expected_conf_path = entity_path.parent / entity_path.name.replace( + "-entity", "-conf" + ) + expected_conf = json.loads(expected_conf_path.read_text()) - built_epic_conf, _ = epic_builder( - ConfCells(conf, cells), entity["uuid"] - ).get_conf_cells() + # Compare normalized JSON strings so the diff is easier to read, + # and there are fewer false positives. + assert json.dumps(conf, indent=2, sort_keys=True) == json.dumps( + expected_conf, indent=2, sort_keys=True + ) - assert built_epic_conf is not None - assert json.dumps(built_epic_conf, indent=2, sort_keys=True) == json.dumps( - conf, indent=2, sort_keys=True - ) + expected_cells_path = entity_path.parent / entity_path.name.replace( + "-entity.json", "-cells.yaml" + ) + if expected_cells_path.is_file(): + expected_cells = yaml.safe_load(expected_cells_path.read_text()) + + # Compare as YAML to match fixture. + assert yaml.dump(clean_cells(cells)) == yaml.dump(expected_cells) + + if (SEG_PYRAMID_BUILDER == entity_path.parent.name): + epic_uuid = EPIC_UUID + epic_entity_path = next(entity_path.parent.parent.rglob(EPIC_BUILDER), None) + epic_builder = get_epic_builder(epic_uuid) + assert epic_builder is not None + assert epic_builder.__name__ == epic_entity_path.name + epic_entity = json.loads(Path(f'{epic_entity_path}/fake-entity.json').read_text()) + if conf is None: # pragma: no cover + with pytest.raises(ValueError): + epic_builder(epic_uuid, + ConfCells(conf, cells), entity, epic_entity, groups_token, assets_url).get_conf_cells() + return + + built_epic_conf, _ = epic_builder(epic_uuid, + ConfCells(conf, cells), entity, epic_entity, groups_token, assets_url + ).get_conf_cells() + assert built_epic_conf is not None + + expected_conf_path = Path(f'{epic_entity_path}/fake-conf.json') + expected_conf = json.loads(expected_conf_path.read_text()) + + assert json.dumps(built_epic_conf, indent=2, sort_keys=True) == json.dumps( + expected_conf, indent=2, sort_keys=True + ) @pytest.mark.parametrize("entity_path", bad_entity_paths, ids=lambda path: path.name) diff --git a/test/test_client.py b/test/test_client.py index 64093b8..acb9bb6 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -172,7 +172,7 @@ def test_get_dataset_uuids_more_than_10k(app, mocker): api_client = ApiClient() with pytest.raises(Exception) as error_info: api_client.get_all_dataset_uuids() - assert error_info.match("At least 10k datasets") + assert error_info.match("At least 10k datasets") # pragma: no cover @pytest.mark.parametrize("plural_lc_entity_type", ("datasets", "samples", "donors")) @@ -193,7 +193,7 @@ def test_get_entities_more_than_10k(app, mocker): api_client = ApiClient() with pytest.raises(Exception) as error_info: api_client.get_entities("datasets") - assert error_info.match("At least 10k datasets") + assert error_info.match("At least 10k datasets") # pragma: no cover @pytest.mark.parametrize("params", ({"uuid": "uuid"}, {"hbm_id": "hubmap_id"})) @@ -210,7 +210,7 @@ def test_get_entity_two_ids(app, mocker): api_client = ApiClient() with pytest.raises(Exception) as error_info: api_client.get_entity(uuid="uuid", hbm_id="hubmap_id") - assert error_info.match("Only UUID or HBM ID should be provided") + assert error_info.match("Only UUID or HBM ID should be provided") # pragma: no cover def mock_get_revisions(path, **kwargs): diff --git a/test/test_epic_builders.py b/test/test_epic_builders.py index 0e56baa..ff84242 100644 --- a/test/test_epic_builders.py +++ b/test/test_epic_builders.py @@ -10,3 +10,8 @@ ) def test_get_epic_builder(epic_uuid, expected): assert get_epic_builder(epic_uuid).__name__ == expected + + +def test_get_epic_builder_no_uuid(): + with pytest.raises(ValueError, match="epic_uuid must be provided"): + get_epic_builder(None)