Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion colorizer_data/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ def convert_colorizer_data(
be flattened along the Z-axis using a max projection. If `None`, 2D frame generation
will be skipped.
frames_3d (Frames3dMetadata | None): A `Frames3dMetadata` object containing the 3D image source
("source") and channel ("segmentation_channel") to use for the 3D image source.
("source") and channel ("segmentation_channel") to use for the 3D image source. The source
should be the path to or the URL of an OME-Zarr array (preferred) or OME-TIFF file.
centroid_x_column (str): The name of the column containing x-coordinates of object
centroids, in pixels relative to the frame image, where 0 is the left edge of the
image. Defaults to "Centroid X."
Expand Down
18 changes: 13 additions & 5 deletions colorizer_data/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,13 @@ class Backdrop3dMetadata(DataClassJsonMixin):
name: str
source: str
"""
HTTPS or local path to an OME-Zarr source volume (e.g. ends with
`.ome.zarr`). Can be the same source as the segmentations defined in
`Frames3dMetadata`.
HTTPS URL or relative path from the dataset directory to an OME-Zarr source
volume (e.g. ends with `.ome.zarr`). Can be the same source as the
segmentations defined in `Frames3dMetadata`.

Example:
- `volumes/sample_data.ome.zarr`
- `https://example.com/data/sample_data.ome.zarr`
"""
channel_index: str
"""
Expand All @@ -155,8 +159,12 @@ class Frames3dMetadata(DataClassJsonMixin):

source: str
"""
HTTPS or local path to 3D data, ideally in OME-Zarr format (e.g. ends with
`.ome.zarr`).
HTTPS URL or relative path from the dataset directory to 3D data, ideally in
OME-Zarr format (e.g. ends with `.ome.zarr`).

Example:
- `volumes/sample_data.ome.zarr`
- `https://example.com/data/sample_data.ome.zarr`
"""
segmentation_channel: int = 0
"""The channel of segmentation data. `0` by default."""
Expand Down
31 changes: 31 additions & 0 deletions colorizer_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,3 +776,34 @@ def _get_frame_count_from_3d_source(source: str) -> int:
# Attempt to read the image to get info (such as length)
img = BioImage(source)
return int(img.dims.T)


def is_url(source: str) -> bool:
"""
Checks if a source string is an HTTP(S) URL.
"""
return source.startswith("http://") or source.startswith("https://")


def check_file_source(name: str, source: str | None, outpath: pathlib.Path):
"""
Logs warnings for missing or unreachable file sources.
"""
if source is None:
logging.error(
f"{name} is undefined and will fail to load. Please provide a relative path inside the dataset directory or an HTTPS URL to an OME-Zarr (preferred) or OME-TIFF file."
)
elif not is_url(source):
# Check for absolute paths, parent paths, or missing files/folders.
if os.path.isabs(source):
logging.error(
f"{name} cannot be an absolute path and will fail to load. Please provide a relative path inside the dataset directory or an HTTPS URL. Received: '{source}'"
)
Comment on lines +799 to +801
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ex: "F:/data/data.zarr" is not a loadable path for TFE.

Future changes in this data conversion package will likely try and replace/update this path upstream of this final validation step!

elif ".." in pathlib.Path(source).parts:
logging.warning(
f"{name} should not contain parent directory references ('..'), as it may fail to load in certain deploy environments. Received: '{source}'"
)
elif not os.path.exists(outpath / source):
logging.warning(
f"{name} path could not be found. Please check that it exists. Received: '{source}'"
)
14 changes: 14 additions & 0 deletions colorizer_data/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
DEFAULT_FRAME_PREFIX,
DEFAULT_FRAME_SUFFIX,
_get_frame_count_from_3d_source,
check_file_source,
cast_feature_to_info_type,
copy_remote_or_local_file,
generate_frame_paths,
Expand Down Expand Up @@ -711,3 +712,16 @@ def validate_dataset(
+ " or add an offset if your frame numbers do not start at 0."
+ " You may also need to generate the list of frames yourself if your dataset is skipping frames."
)

# Check that frames3d sources are reachable
if "frames3d" in self.manifest:
frames3d_metadata = Frames3dMetadata.from_dict(self.manifest["frames3d"])
source = frames3d_metadata.source
check_file_source("3D frames source", source, self.outpath)
# Validate backdrops
if frames3d_metadata.backdrops is not None:
for i in range(len(frames3d_metadata.backdrops)):
backdrop_source = frames3d_metadata.backdrops[i].source
check_file_source(
f"3D frames backdrop {i} source", backdrop_source, self.outpath
)
Loading