Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tkakar/CAT-1102-add-scaling-factor-to-seg-config #107

Merged
merged 8 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.8
0.3.9
67 changes: 53 additions & 14 deletions src/portal_visualization/builders/imaging_builders.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from .base_builders import ViewConfBuilder
from ..paths import (IMAGE_PYRAMID_DIR, OFFSETS_DIR, SEQFISH_HYB_CYCLE_REGEX,
SEQFISH_FILE_REGEX, SEGMENTATION_SUPPORT_IMAGE_SUBDIR,
SEGMENTATION_SUBDIR, IMAGE_METADATA_DIR)
from pathlib import Path
import re

Expand All @@ -12,11 +16,10 @@
Component as cm,
)

from ..utils import get_matches, group_by_file_name, get_conf_cells, get_found_images
from ..paths import (IMAGE_PYRAMID_DIR, OFFSETS_DIR, SEQFISH_HYB_CYCLE_REGEX,
SEQFISH_FILE_REGEX, SEGMENTATION_SUPPORT_IMAGE_SUBDIR,
SEGMENTATION_SUBDIR)
from .base_builders import ViewConfBuilder
from ..utils import get_matches, group_by_file_name, get_conf_cells, get_found_images, \
get_found_images_all, get_image_scale, get_image_metadata

from ..constants import base_image_dirs

BASE_IMAGE_VIEW_TYPE = 'image'
SEG_IMAGE_VIEW_TYPE = 'seg'
Expand All @@ -30,6 +33,7 @@ def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
self.use_full_resolution = []
self.use_physical_size_scaling = False
self.view_type = BASE_IMAGE_VIEW_TYPE
self.base_image_metadata = None
super().__init__(entity, groups_token, assets_endpoint, **kwargs)

def _get_img_and_offset_url(self, img_path, img_dir):
Expand All @@ -49,7 +53,8 @@ def _get_img_and_offset_url(self, img_path, img_dir):
... assets_endpoint='https://example.com')
>>> pprint(builder._get_img_and_offset_url("rel_path/to/clusters.ome.tiff", "rel_path/to"))
('https://example.com/uuid/rel_path/to/clusters.ome.tiff?token=groups_token',\n\
'https://example.com/uuid/output_offsets/clusters.offsets.json?token=groups_token')
'https://example.com/uuid/output_offsets/clusters.offsets.json?token=groups_token',\n\
'https://example.com/uuid/image_metadata/clusters.metadata.json?token=groups_token')

"""
img_url = self._build_assets_url(img_path)
Expand All @@ -62,6 +67,13 @@ def _get_img_and_offset_url(self, img_path, img_dir):
re.sub(img_dir, OFFSETS_DIR, img_url),
)
),
str(
re.sub(
r"ome\.tiff?",
"metadata.json",
re.sub(img_dir, IMAGE_METADATA_DIR, img_url),
)
),
)

def _get_img_and_offset_url_seg(self, img_path, img_dir):
Expand All @@ -74,6 +86,7 @@ def _get_img_and_offset_url_seg(self, img_path, img_dir):
"""
img_url = self._build_assets_url(img_path)
offsets_path = re.sub(IMAGE_PYRAMID_DIR, OFFSETS_DIR, img_dir)
metadata_path = re.sub(IMAGE_PYRAMID_DIR, IMAGE_METADATA_DIR, img_dir)
return (
img_url,
str(
Expand All @@ -83,6 +96,13 @@ def _get_img_and_offset_url_seg(self, img_path, img_dir):
re.sub(img_dir, offsets_path, img_url),
)
),
str(
re.sub(
r"ome\.tiff?",
"metadata.json",
re.sub(img_dir, metadata_path, img_url),
)
),
)

def _add_segmentation_image(self, dataset):
Expand All @@ -95,18 +115,24 @@ def _add_segmentation_image(self, dataset):
except Exception as e:
raise RuntimeError(f"Error while searching for segmentation images: {e}")

filtered_images = [img for img in found_images if SEGMENTATION_SUPPORT_IMAGE_SUBDIR not in img]
filtered_images = [
img for img in found_images
if not any(subdir in img for subdir in base_image_dirs)
]

if not filtered_images:
raise FileNotFoundError(f"Segmentation assay with uuid {self._uuid} has no matching files")

img_url, offsets_url = self._get_img_and_offset_url(filtered_images[0], self.seg_image_pyramid_regex)
img_url, offsets_url, metadata_url = self._get_img_and_offset_url(
filtered_images[0], self.seg_image_pyramid_regex)
seg_meta_data = get_image_metadata(self, metadata_url)

scale = get_image_scale(self.base_image_metadata, seg_meta_data)
if dataset is not None:
dataset.add_object(
ObsSegmentationsOmeTiffWrapper(img_url=img_url, offsets_url=offsets_url,
obs_types_from_channel_names=True,
# coordinate_transformations=[{"type": "scale", "scale":
# [0.377.,0.377,1,1,1]}] # need to read from a file
coordinate_transformations=[{"type": "scale", "scale": scale}]
)
)

Expand Down Expand Up @@ -148,7 +174,9 @@ def get_conf_cells_common(self, get_img_and_offset_url_func, **kwargs):
dataset = vc.add_dataset(name="Visualization Files")

if 'seg' in self.view_type:
img_url, offsets_url = get_img_and_offset_url_func(found_images[0], self.image_pyramid_regex)
img_url, offsets_url, metadata_url = get_img_and_offset_url_func(found_images[0], self.image_pyramid_regex)
meta_data = get_image_metadata(self, metadata_url)
self.base_image_metadata = meta_data
dataset = dataset.add_object(
ImageOmeTiffWrapper(img_url=img_url, offsets_url=offsets_url, name=Path(found_images[0]).name)
)
Expand All @@ -161,7 +189,7 @@ def get_conf_cells_common(self, get_img_and_offset_url_func, **kwargs):
img_url=img_url, offsets_url=offsets_url, name=Path(img_path).name
)
for img_path in found_images
for img_url, offsets_url in [get_img_and_offset_url_func(img_path, self.image_pyramid_regex)]
for img_url, offsets_url, _ in [get_img_and_offset_url_func(img_path, self.image_pyramid_regex)]
]
dataset.add_object(
MultiImageWrapper(images, use_physical_size_scaling=self.use_physical_size_scaling)
Expand Down Expand Up @@ -216,10 +244,21 @@ class KaggleSegImagePyramidViewConfBuilder(AbstractImagingViewConfBuilder):

def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
super().__init__(entity, groups_token, assets_endpoint, **kwargs)
self.image_pyramid_regex = f"{IMAGE_PYRAMID_DIR}/{SEGMENTATION_SUPPORT_IMAGE_SUBDIR}"
self.seg_image_pyramid_regex = IMAGE_PYRAMID_DIR
self.view_type = KAGGLE_IMAGE_VIEW_TYPE

# Needed to adjust to various directory structures. For older datasets, the image pyramids will be present in
# 'processed_microscopy' or 'processedMicroscopy' while newer datasets are listed under lab_processed.

image_dir = SEGMENTATION_SUPPORT_IMAGE_SUBDIR
file_paths_found = self._get_file_paths()
paths = get_found_images_all(file_paths_found)
matched_dirs = {dir for dir in base_image_dirs if any(dir in img for img in paths)}

image_dir = next(iter(matched_dirs), image_dir)

self.image_pyramid_regex = f"{IMAGE_PYRAMID_DIR}/{image_dir}"

def get_conf_cells(self, **kwargs):
return self.get_conf_cells_common(self._get_img_and_offset_url_seg, **kwargs)

Expand Down Expand Up @@ -278,7 +317,7 @@ def get_conf_cells(self, **kwargs):
dataset = vc.add_dataset(name=pos_name)
sorted_images = sorted(images, key=self._get_hybcycle)
for img_path in sorted_images:
img_url, offsets_url = self._get_img_and_offset_url(
img_url, offsets_url, _ = self._get_img_and_offset_url(
img_path, IMAGE_PYRAMID_DIR
)
image_wrappers.append(
Expand Down
4 changes: 2 additions & 2 deletions src/portal_visualization/builders/sprm_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _get_ometiff_image_wrapper(self, found_image_file, found_image_path):
:param str found_image_file: The path to look for the image itself
:param str found_image_path: The folder to be replaced with the offsets path
"""
img_url, offsets_url = self._get_img_and_offset_url(
img_url, offsets_url, _ = self._get_img_and_offset_url(
found_image_file, re.escape(found_image_path),
)
return OmeTiffWrapper(
Expand Down Expand Up @@ -172,7 +172,7 @@ def _get_bitmask_image_path(self):
return f"{self._mask_path_regex}/{self._mask_name}" + r"\.ome\.tiff?"

def _get_ometiff_mask_wrapper(self, found_bitmask_file):
bitmask_img_url, bitmask_offsets_url = self._get_img_and_offset_url(
bitmask_img_url, bitmask_offsets_url, _ = self._get_img_and_offset_url(
found_bitmask_file, self.image_pyramid_regex,
)
return OmeTiffWrapper(
Expand Down
13 changes: 13 additions & 0 deletions src/portal_visualization/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Units used in the image metadata for physical sizes
image_units = {
"nm": 1e9,
"μm": 1e6,
"mm": 1e3,
"cm": 1e2,
"dm": 10
}

# The base image pyramids for kaggle-1 and kaggle-2 may have various directory structures depending
# upon when they were processed. For older datasets, the image pyramids will be present
# either in 'processed_microscopy', or 'processedMicroscopy' while newer datasets will be listed under lab_processed.
base_image_dirs = ['lab_processed', 'processed_microscopy', 'processedMicroscopy']
1 change: 1 addition & 0 deletions src/portal_visualization/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
SEGMENTATION_SUBDIR = "extras/transformations"
SEGMENTATION_ZARR_STORES = "hubmap_ui/seg-to-mudata-zarr/objects.zarr"
SEGMENTATION_SUPPORT_IMAGE_SUBDIR = "lab_processed/images"
IMAGE_METADATA_DIR = "image_metadata"
168 changes: 168 additions & 0 deletions src/portal_visualization/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from pathlib import Path
import re
from itertools import groupby
from requests import get
from unicodedata import normalize

import nbformat
from vitessce import VitessceConfig

from .builders.base_builders import ConfCells
from .constants import image_units


def get_matches(files, regex):
Expand Down Expand Up @@ -82,6 +85,171 @@ def get_found_images(image_pyramid_regex, file_paths_found):
return found_images


def get_found_images_all(file_paths_found):
found_images = [
path for path in get_matches(
file_paths_found, r".*\.ome\.tiff?$",
)
if 'separate/' not in path
]
return found_images


def get_image_metadata(self, img_url):
"""
Retrieve metadata from an image URL.
>>> import builtins
>>> from unittest.mock import Mock, patch
>>> mock_instance = Mock()
>>> mock_instance._get_request_init.return_value = {}
>>> mock_response = Mock()
>>> mock_response.status_code = 404
>>> mock_response.reason = 'Not Found'
>>> with patch('requests.get', return_value=mock_response):
... with patch.object(builtins, 'print') as mock_print:
... result = get_image_metadata(mock_instance, 'https://example.com/image')
... mock_print.assert_called_with(f"Failed to retrieve https://example.com/image: 404 - Not Found")
... assert result is None
"""

meta_data = None
request_init = self._get_request_init() or {}
response = get(img_url, **request_init)
if response.status_code == 200: # pragma no cover
data = response.json()
if isinstance(data, dict) and "PhysicalSizeX" in data and 'PhysicalSizeUnitX' in data:
meta_data = data
else:
print("Image does not have metadata")
else:
print(f"Failed to retrieve {img_url}: {response.status_code} - {response.reason}")
return meta_data


def get_image_scale(base_metadata, seg_metadata):
"""
Computes the scale between two image metadata based on physical size.

Args:
base_metadata (dict): Metadata for the base image.
seg_metadata (dict): Metadata for the segmented image.

Returns:
list: A list containing the scale factors for x, y, while keeping others unchanged (as 1).

Doctest:
>>> from unittest.mock import Mock, patch
>>> import builtins
>>> base_metadata = { \
'PhysicalSizeX': 50, 'PhysicalSizeY': 100, 'PhysicalSizeUnitX': 'mm', 'PhysicalSizeUnitY': 'mm' \
}
>>> seg_metadata = { \
'PhysicalSizeX': 25, 'PhysicalSizeY': 50, 'PhysicalSizeUnitX': 'mm', 'PhysicalSizeUnitY': 'mm' \
}
>>> with patch('builtins.print') as mock_print:
... scale = get_image_scale(base_metadata, seg_metadata)
... mock_print.assert_called_with("Scaling factor: ", [2.0, 2.0, 1, 1, 1])
... assert scale == [2.0, 2.0, 1, 1, 1] # Ensure the return value is also correct

>>> base_metadata = { \
'PhysicalSizeX': 50, 'PhysicalSizeY': 100, 'PhysicalSizeUnitX': 'mm', 'PhysicalSizeUnitY': 'mm' \
}
>>> seg_metadata = None
>>> with patch('builtins.print') as mock_print:
... scale = get_image_scale(base_metadata, seg_metadata)
... mock_print.assert_called_with("Scaling factor: ", [1, 1, 1, 1, 1])
... assert scale == [1, 1, 1, 1, 1] # Ensure the return value is also correct
"""

scale = [1, 1, 1, 1, 1]
seg_x, seg_y, seg_x_unit, seg_y_unit = None, None, None, None
base_x, base_y, base_x_unit, base_y_unit = None, None, None, None

if seg_metadata is not None:
seg_x, seg_y, seg_x_unit, seg_y_unit = get_physical_size_units(seg_metadata)

if base_metadata is not None:
base_x, base_y, base_x_unit, base_y_unit = get_physical_size_units(base_metadata)

if all([base_x_unit, base_y_unit, seg_x_unit, seg_y_unit]) and \
all([unit in image_units for unit in [base_x_unit, base_y_unit, seg_x_unit, seg_y_unit]]):
scale_x = (base_x / seg_x) * (image_units[seg_x_unit] / image_units[base_x_unit])
scale_y = (base_y / seg_y) * (image_units[seg_y_unit] / image_units[base_y_unit])

scale = [scale_x, scale_y, 1, 1, 1]
else:
print("PhysicalSize units are not correct")
print("Scaling factor: ", scale)
return scale


def get_physical_size_units(metadata):
"""
Extracts the physical size units (X, Y) from metadata.

Args:
metadata (dict): The metadata dictionary for the image.

Returns:
tuple: A tuple containing the physical sizes and their respective units.

Doctest:

>>> metadata = { \
'PhysicalSizeX': 50, 'PhysicalSizeY': 100, 'PhysicalSizeUnitX': 'mm', 'PhysicalSizeUnitY': 'mm' \
}
>>> get_physical_size_units(metadata)
(50, 100, 'mm', 'mm')

>>> metadata = { \
'PhysicalSizeX': None, 'PhysicalSizeY': 100, 'PhysicalSizeUnitX': 'mm', 'PhysicalSizeUnitY': 'mm' \
}
>>> get_physical_size_units(metadata)
(1, 100, 'mm', 'mm')
"""

# size_x and size_y will be one if nothing is provided
size_x = metadata['PhysicalSizeX'] if metadata['PhysicalSizeX'] is not None else 1
size_y = metadata['PhysicalSizeY'] if metadata['PhysicalSizeY'] is not None else 1
size_x_unit = convert_unicode_unit(metadata, 'PhysicalSizeUnitX')
size_y_unit = convert_unicode_unit(metadata, 'PhysicalSizeUnitY')

return size_x, size_y, size_x_unit, size_y_unit


def convert_unicode_unit(metadata, key):
"""
Converts any unicode string (e.g., representing image units) in the metadata key to a normalized format.

Args:
metadata (dict): The metadata dictionary containing the key.
key (str): The key for the unit (e.g., 'PhysicalSizeUnitX').

Returns:
str or None: The normalized unit as a string, or None if not found.

Doctest:

>>> metadata = {'PhysicalSizeUnitX': 'mm'}
>>> convert_unicode_unit(metadata, 'PhysicalSizeUnitX')
'mm'

>>> metadata = {'PhysicalSizeUnitY': '\u00b5m'}
>>> convert_unicode_unit(metadata, 'PhysicalSizeUnitY')
'μm'

>>> metadata = {'PhysicalSizeUnitY': None}
>>> convert_unicode_unit(metadata, 'PhysicalSizeUnitY')
"""
# Check if the key exists and if the value is a string
if key in metadata and isinstance(metadata[key], str):
# Normalize the unicode string
return normalize('NFKC', metadata[key])

# Return None if the key is not present or the value isn't a string
return None


def files_from_response(response_json):
'''
>>> response_json = {'hits': {'hits': [
Expand Down
Loading