diff --git a/colorizer_data/converter.py b/colorizer_data/converter.py index 9d3005c..df44b01 100644 --- a/colorizer_data/converter.py +++ b/colorizer_data/converter.py @@ -443,7 +443,8 @@ def convert_colorizer_data( be flattened along the Z-axis using a max projection. If `None`, 2D frame generation will be skipped. frames_3d (Frames3dMetadata | None): A `Frames3dMetadata` object containing the 3D image source - ("source") and channel ("segmentation_channel") to use for the 3D image source. + ("source") and channel ("segmentation_channel") to use for the 3D image source. The source + should be the path to or the URL of an OME-Zarr array (preferred) or OME-TIFF file. centroid_x_column (str): The name of the column containing x-coordinates of object centroids, in pixels relative to the frame image, where 0 is the left edge of the image. Defaults to "Centroid X." diff --git a/colorizer_data/types.py b/colorizer_data/types.py index 2c3e893..151f52f 100644 --- a/colorizer_data/types.py +++ b/colorizer_data/types.py @@ -128,9 +128,13 @@ class Backdrop3dMetadata(DataClassJsonMixin): name: str source: str """ - HTTPS or local path to an OME-Zarr source volume (e.g. ends with - `.ome.zarr`). Can be the same source as the segmentations defined in - `Frames3dMetadata`. + HTTPS URL or relative path from the dataset directory to an OME-Zarr source + volume (e.g. ends with `.ome.zarr`). Can be the same source as the + segmentations defined in `Frames3dMetadata`. + + Example: + - `volumes/sample_data.ome.zarr` + - `https://example.com/data/sample_data.ome.zarr` """ channel_index: str """ @@ -155,8 +159,12 @@ class Frames3dMetadata(DataClassJsonMixin): source: str """ - HTTPS or local path to 3D data, ideally in OME-Zarr format (e.g. ends with - `.ome.zarr`). + HTTPS URL or relative path from the dataset directory to 3D data, ideally in + OME-Zarr format (e.g. ends with `.ome.zarr`). + + Example: + - `volumes/sample_data.ome.zarr` + - `https://example.com/data/sample_data.ome.zarr` """ segmentation_channel: int = 0 """The channel of segmentation data. `0` by default.""" diff --git a/colorizer_data/utils.py b/colorizer_data/utils.py index 18fc6f1..aba30dc 100644 --- a/colorizer_data/utils.py +++ b/colorizer_data/utils.py @@ -776,3 +776,34 @@ def _get_frame_count_from_3d_source(source: str) -> int: # Attempt to read the image to get info (such as length) img = BioImage(source) return int(img.dims.T) + + +def is_url(source: str) -> bool: + """ + Checks if a source string is an HTTP(S) URL. + """ + return source.startswith("http://") or source.startswith("https://") + + +def check_file_source(name: str, source: str | None, outpath: pathlib.Path): + """ + Logs warnings for missing or unreachable file sources. + """ + if source is None: + logging.error( + f"{name} is undefined and will fail to load. Please provide a relative path inside the dataset directory or an HTTPS URL to an OME-Zarr (preferred) or OME-TIFF file." + ) + elif not is_url(source): + # Check for absolute paths, parent paths, or missing files/folders. + if os.path.isabs(source): + logging.error( + f"{name} cannot be an absolute path and will fail to load. Please provide a relative path inside the dataset directory or an HTTPS URL. Received: '{source}'" + ) + elif ".." in pathlib.Path(source).parts: + logging.warning( + f"{name} should not contain parent directory references ('..'), as it may fail to load in certain deploy environments. Received: '{source}'" + ) + elif not os.path.exists(outpath / source): + logging.warning( + f"{name} path could not be found. Please check that it exists. Received: '{source}'" + ) diff --git a/colorizer_data/writer.py b/colorizer_data/writer.py index 086610f..3a62929 100644 --- a/colorizer_data/writer.py +++ b/colorizer_data/writer.py @@ -22,6 +22,7 @@ DEFAULT_FRAME_PREFIX, DEFAULT_FRAME_SUFFIX, _get_frame_count_from_3d_source, + check_file_source, cast_feature_to_info_type, copy_remote_or_local_file, generate_frame_paths, @@ -711,3 +712,16 @@ def validate_dataset( + " or add an offset if your frame numbers do not start at 0." + " You may also need to generate the list of frames yourself if your dataset is skipping frames." ) + + # Check that frames3d sources are reachable + if "frames3d" in self.manifest: + frames3d_metadata = Frames3dMetadata.from_dict(self.manifest["frames3d"]) + source = frames3d_metadata.source + check_file_source("3D frames source", source, self.outpath) + # Validate backdrops + if frames3d_metadata.backdrops is not None: + for i in range(len(frames3d_metadata.backdrops)): + backdrop_source = frames3d_metadata.backdrops[i].source + check_file_source( + f"3D frames backdrop {i} source", backdrop_source, self.outpath + )