Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions colorizer_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,3 +776,32 @@ def _get_frame_count_from_3d_source(source: str) -> int:
# Attempt to read the image to get info (such as length)
img = BioImage(source)
return int(img.dims.T)


def is_url(source: str) -> bool:
"""
Checks if a source string is an HTTP(S) URL.
"""
return source.startswith("http://") or source.startswith("https://")


def check_file_source(name: str, source: str | None, outpath: pathlib.Path):
"""
Logs warnings for missing or unreachable file sources.
"""
if source is None:
logging.error(f"{name} is undefined.")
elif not is_url(source):
# Check for absolute paths, parent paths, or missing files/folders.
if os.path.isabs(source):
logging.error(
f"{name} must be a relative path inside the dataset directory or an HTTP(S) URL. Received: '{source}'"
)
Comment on lines +799 to +801
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ex: "F:/data/data.zarr" is not a loadable path for TFE.

Future changes in this data conversion package will likely try and replace/update this path upstream of this final validation step!

elif ".." in pathlib.Path(source).parts:
logging.warning(
f"{name} should not contain parent directory references ('..'), as it may fail to load in certain deploy environments. Received: '{source}'"
)
elif not os.path.exists(outpath / source):
logging.warning(
f"{name} path could not be found. Please check that it exists. Received: '{source}'"
)
14 changes: 14 additions & 0 deletions colorizer_data/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
DEFAULT_FRAME_PREFIX,
DEFAULT_FRAME_SUFFIX,
_get_frame_count_from_3d_source,
check_file_source,
cast_feature_to_info_type,
copy_remote_or_local_file,
generate_frame_paths,
Expand Down Expand Up @@ -711,3 +712,16 @@ def validate_dataset(
+ " or add an offset if your frame numbers do not start at 0."
+ " You may also need to generate the list of frames yourself if your dataset is skipping frames."
)

# Check that frames3d sources are reachable
if "frames3d" in self.manifest:
frames3d_metadata = Frames3dMetadata.from_dict(self.manifest["frames3d"])
source = frames3d_metadata.source
check_file_source("3D frames source", source, self.outpath)
# Validate backdrops
if frames3d_metadata.backdrops is not None:
for i in range(len(frames3d_metadata.backdrops)):
backdrop_source = frames3d_metadata.backdrops[i].source
check_file_source(
f"3D frames backdrop {i} source", backdrop_source, self.outpath
)
Loading