diff --git a/requirements/requirements.txt b/requirements/requirements.txt index f83f6a61e..b1b06ded7 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -3,6 +3,7 @@ albumentations>=1.3.0 Click>=8.1.3 defusedxml>=0.7.1 +filelock>=3.9.0 flask>=2.2.2 glymur>=0.12.1, ~=0.12.6 # 0.12.6 is incompatible due to a private attribute imagecodecs>=2022.9.26 diff --git a/tests/conftest.py b/tests/conftest.py index 489ea4f08..9fa5b2951 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,12 +60,18 @@ def root_path(request) -> Path: @pytest.fixture(scope="session") -def remote_sample(tmp_path_factory: TempPathFactory) -> Callable: +def tmp_samples_path(tmp_path_factory: TempPathFactory): + """Return a temporary path.""" + return tmp_path_factory.mktemp("data") + + +@pytest.fixture(scope="session") +def remote_sample(tmp_samples_path) -> Callable: """Factory fixture for fetching sample files.""" def __remote_sample(key: str) -> pathlib.Path: """Wrapper around tiatoolbox.data._fetch_remote_sample for tests.""" - return _fetch_remote_sample(key, tmp_path_factory.mktemp("data")) + return _fetch_remote_sample(key, tmp_samples_path) return __remote_sample diff --git a/tests/models/test_dataset.py b/tests/models/test_dataset.py index 728586486..228af1c81 100644 --- a/tests/models/test_dataset.py +++ b/tests/models/test_dataset.py @@ -96,7 +96,7 @@ def test_kather_dataset(tmp_path): "/kather100k-train-nonorm-subset-90.zip" ) save_zip_path = os.path.join(save_dir_path, "Kather.zip") - download_data(url, save_zip_path) + download_data(url, save_path=save_zip_path) unzip_data(save_zip_path, save_dir_path) extracted_dir = os.path.join(save_dir_path, "NCT-CRC-HE-100K-NONORM/") dataset = KatherPatchDataset(save_dir_path=extracted_dir) diff --git a/tests/test_tiffreader.py b/tests/test_tiffreader.py index 8fa4a1e1a..ac85f3a19 100644 --- a/tests/test_tiffreader.py +++ b/tests/test_tiffreader.py @@ -1,13 +1,12 @@ import pytest from defusedxml import ElementTree -from tiatoolbox.data import _fetch_remote_sample from tiatoolbox.wsicore import wsireader -def test_ome_missing_instrument_ref(monkeypatch): +def test_ome_missing_instrument_ref(monkeypatch, remote_sample): """Test that an OME-TIFF can be read without instrument reference.""" - sample = _fetch_remote_sample("ome-brightfield-pyramid-1-small") + sample = remote_sample("ome-brightfield-pyramid-1-small") wsi = wsireader.TIFFWSIReader(sample) page = wsi.tiff.pages[0] description = page.description @@ -26,9 +25,9 @@ def test_ome_missing_instrument_ref(monkeypatch): assert wsi.info.objective_power is None -def test_ome_missing_physicalsize(monkeypatch): +def test_ome_missing_physicalsize(monkeypatch, remote_sample): """Test that an OME-TIFF can be read without physical size.""" - sample = _fetch_remote_sample("ome-brightfield-pyramid-1-small") + sample = remote_sample("ome-brightfield-pyramid-1-small") wsi = wsireader.TIFFWSIReader(sample) page = wsi.tiff.pages[0] description = page.description @@ -47,9 +46,9 @@ def test_ome_missing_physicalsize(monkeypatch): assert wsi.info.mpp is None -def test_ome_missing_physicalsizey(monkeypatch, caplog): +def test_ome_missing_physicalsizey(monkeypatch, caplog, remote_sample): """Test that an OME-TIFF can be read without physical size.""" - sample = _fetch_remote_sample("ome-brightfield-pyramid-1-small") + sample = remote_sample("ome-brightfield-pyramid-1-small") wsi = wsireader.TIFFWSIReader(sample) page = wsi.tiff.pages[0] description = page.description @@ -68,9 +67,9 @@ def test_ome_missing_physicalsizey(monkeypatch, caplog): assert "Only one MPP value found. Using it for both X and Y" in caplog.text -def test_tiffreader_non_tiled_metadata(monkeypatch): +def test_tiffreader_non_tiled_metadata(monkeypatch, remote_sample): """Test that fetching metadata for non-tiled TIFF works.""" - sample = _fetch_remote_sample("ome-brightfield-pyramid-1-small") + sample = remote_sample("ome-brightfield-pyramid-1-small") wsi = wsireader.TIFFWSIReader(sample) monkeypatch.setattr(wsi.tiff, "is_ome", False) monkeypatch.setattr( diff --git a/tests/test_utils.py b/tests/test_utils.py index d424b572c..44b0f2b73 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,7 @@ import pandas as pd import pytest from PIL import Image +from requests import HTTPError from shapely.geometry import Polygon from tests.test_annotation_stores import cell_polygon @@ -934,12 +935,16 @@ def test_download_unzip_data(): if os.path.exists(save_dir_path): shutil.rmtree(save_dir_path, ignore_errors=True) os.makedirs(save_dir_path) - save_zip_path = os.path.join(save_dir_path, "test_directory.zip") - misc.download_data(url, save_zip_path) - misc.download_data(url, save_zip_path, overwrite=True) # do overwrite - misc.unzip_data(save_zip_path, save_dir_path, del_zip=False) # not remove - assert os.path.exists(save_zip_path) - misc.unzip_data(save_zip_path, save_dir_path) + + save_zip_path1 = misc.download_data(url, save_dir=save_dir_path) + save_zip_path2 = misc.download_data( + url, save_dir=save_dir_path, overwrite=True + ) # do overwrite + assert save_zip_path1 == save_zip_path2 + + misc.unzip_data(save_zip_path1, save_dir_path, del_zip=False) # not remove + assert os.path.exists(save_zip_path1) + misc.unzip_data(save_zip_path1, save_dir_path) extracted_path = os.path.join(save_dir_path, "test_directory") # to avoid hidden files in case of MAC-OS or Windows (?) @@ -992,11 +997,19 @@ def test_download_data(): # URL not valid # shouldn't use save_path if test runs correctly save_path = os.path.join(save_dir_path, "temp") - with pytest.raises(ConnectionError): + with pytest.raises(HTTPError): misc.download_data( "https://tiatoolbox.dcs.warwick.ac.uk/invalid-url", save_path ) + # Both save_dir and save_path are specified + with pytest.raises(ValueError, match="save_path and save_dir"): + misc.download_data(url, save_dir=save_dir_path, save_path=save_path) + + # None of save_dir and save_path are specified + with pytest.raises(ValueError, match="save_path or save_dir"): + misc.download_data(url) + def test_parse_cv2_interpolaton(): """Test parsing interpolation modes for cv2.""" diff --git a/tests/test_wsireader.py b/tests/test_wsireader.py index ad499fa24..c2a4cb3d0 100644 --- a/tests/test_wsireader.py +++ b/tests/test_wsireader.py @@ -28,7 +28,6 @@ from tiatoolbox import cli, rcParam, utils from tiatoolbox.annotation import SQLiteStore -from tiatoolbox.data import _fetch_remote_sample from tiatoolbox.utils import imread from tiatoolbox.utils.exceptions import FileNotSupported from tiatoolbox.utils.magic import is_sqlite3 @@ -2035,9 +2034,9 @@ def test_ngff_sqlitestore(tmp_path, remote_sample): wsireader.NGFFWSIReader(dest.path) -def test_ngff_zattrs_non_micrometer_scale_mpp(tmp_path, caplog): +def test_ngff_zattrs_non_micrometer_scale_mpp(tmp_path, remote_sample, caplog): """Test that mpp is None if scale is not in micrometers.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample with a non-micrometer scale sample_copy = tmp_path / "ngff-1" shutil.copytree(sample, sample_copy) @@ -2053,9 +2052,9 @@ def test_ngff_zattrs_non_micrometer_scale_mpp(tmp_path, caplog): assert wsi.info.mpp is None -def test_ngff_zattrs_missing_axes_mpp(tmp_path): +def test_ngff_zattrs_missing_axes_mpp(tmp_path, remote_sample): """Test that mpp is None if axes are missing.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample with no axes sample_copy = tmp_path / "ngff-1" shutil.copytree(sample, sample_copy) @@ -2068,9 +2067,9 @@ def test_ngff_zattrs_missing_axes_mpp(tmp_path): assert wsi.info.mpp is None -def test_ngff_empty_datasets_mpp(tmp_path): +def test_ngff_empty_datasets_mpp(tmp_path, remote_sample): """Test that mpp is None if there are no datasets.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample with no axes sample_copy = tmp_path / "ngff-1" shutil.copytree(sample, sample_copy) @@ -2083,9 +2082,9 @@ def test_ngff_empty_datasets_mpp(tmp_path): assert wsi.info.mpp is None -def test_ngff_no_scale_transforms_mpp(tmp_path): +def test_ngff_no_scale_transforms_mpp(tmp_path, remote_sample): """Test that mpp is None if no scale transforms are present.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample with no axes sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2100,9 +2099,9 @@ def test_ngff_no_scale_transforms_mpp(tmp_path): assert wsi.info.mpp is None -def test_ngff_missing_omero_version(tmp_path): +def test_ngff_missing_omero_version(tmp_path, remote_sample): """Test that the reader can handle missing omero version.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2115,9 +2114,9 @@ def test_ngff_missing_omero_version(tmp_path): wsireader.WSIReader.open(sample_copy) -def test_ngff_missing_multiscales_returns_false(tmp_path): +def test_ngff_missing_multiscales_returns_false(tmp_path, remote_sample): """Test that missing multiscales key returns False for is_ngff.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2130,9 +2129,9 @@ def test_ngff_missing_multiscales_returns_false(tmp_path): assert not wsireader.is_ngff(sample_copy) -def test_ngff_wrong_format_metadata(tmp_path, caplog): +def test_ngff_wrong_format_metadata(tmp_path, caplog, remote_sample): """Test that is_ngff is False and logs a warning if metadata is wrong.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2147,9 +2146,9 @@ def test_ngff_wrong_format_metadata(tmp_path, caplog): assert "must be present and of the correct type" in caplog.text -def test_ngff_omero_below_min_version(tmp_path): +def test_ngff_omero_below_min_version(tmp_path, remote_sample): """Test for FileNotSupported when omero version is below minimum.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2163,9 +2162,9 @@ def test_ngff_omero_below_min_version(tmp_path): wsireader.WSIReader.open(sample_copy) -def test_ngff_omero_above_max_version(tmp_path, caplog): +def test_ngff_omero_above_max_version(tmp_path, caplog, remote_sample): """Test for FileNotSupported when omero version is above maximum.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2181,9 +2180,9 @@ def test_ngff_omero_above_max_version(tmp_path, caplog): assert "maximum supported version" in caplog.text -def test_ngff_multiscales_below_min_version(tmp_path): +def test_ngff_multiscales_below_min_version(tmp_path, remote_sample): """Test for FileNotSupported when multiscales version is below minimum.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2197,9 +2196,9 @@ def test_ngff_multiscales_below_min_version(tmp_path): wsireader.WSIReader.open(sample_copy) -def test_ngff_multiscales_above_max_version(tmp_path, caplog): +def test_ngff_multiscales_above_max_version(tmp_path, caplog, remote_sample): """Test for FileNotSupported when multiscales version is above maximum.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2215,7 +2214,7 @@ def test_ngff_multiscales_above_max_version(tmp_path, caplog): assert "maximum supported version" in caplog.text -def test_ngff_non_numeric_version(tmp_path, monkeypatch): +def test_ngff_non_numeric_version(tmp_path, monkeypatch, remote_sample): """Test that the reader can handle non-numeric omero versions.""" # Patch the is_ngff function to change the min/max version if_ngff = wsireader.is_ngff # noqa: F841 @@ -2232,7 +2231,7 @@ def patched_is_ngff( monkeypatch.setattr(wsireader, "is_ngff", patched_is_ngff) - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2245,9 +2244,9 @@ def patched_is_ngff( wsireader.WSIReader.open(sample_copy) -def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog): +def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog, remote_sample): """Test that the reader logs a warning inconsistent multiscales versions.""" - sample = _fetch_remote_sample("ngff-1") + sample = remote_sample("ngff-1") # Create a copy of the sample sample_copy = tmp_path / "ngff-1.zarr" shutil.copytree(sample, sample_copy) @@ -2269,246 +2268,251 @@ def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog): assert "multiple versions" in caplog.text -class TestReader: - scenarios = [ - ( - "AnnotationReaderOverlaid", - { - "reader_class": AnnotationStoreReader, - "sample_key": "annotation_store_svs_1", - "kwargs": { - "renderer": AnnotationRenderer( - "type", - COLOR_DICT, - ), - "base_wsi": WSIReader.open(_fetch_remote_sample("svs-1-small")), - "alpha": 0.5, - }, - }, - ), - ( - "AnnotationReaderMaskOnly", - { - "reader_class": AnnotationStoreReader, - "sample_key": "annotation_store_svs_1", - "kwargs": { - "renderer": AnnotationRenderer( - "type", - COLOR_DICT, - blur_radius=3, - ), - }, - }, - ), - ( - "TIFFReader", - { - "reader_class": TIFFWSIReader, - "sample_key": "ome-brightfield-pyramid-1-small", - "kwargs": {}, - }, - ), - ( - "DICOMReader", - { - "reader_class": DICOMWSIReader, - "sample_key": "dicom-1", - "kwargs": {}, +@pytest.fixture( + scope="module", + params=[ + { + "reader_class": AnnotationStoreReader, + "sample_key": "annotation_store_svs_1", + "kwargs": { + "base_wsi_key": "svs-1-small", + "renderer": AnnotationRenderer( + "type", + COLOR_DICT, + ), + "alpha": 0.5, }, - ), - ( - "NGFFWSIReader", - { - "reader_class": NGFFWSIReader, - "sample_key": "ngff-1", - "kwargs": {}, - }, - ), - ( - "OpenSlideWSIReader (Small SVS)", - { - "reader_class": OpenSlideWSIReader, - "sample_key": "svs-1-small", - "kwargs": {}, - }, - ), - ( - "OmnyxJP2WSIReader", - { - "reader_class": OmnyxJP2WSIReader, - "sample_key": "jp2-omnyx-1", - "kwargs": {}, + }, + { + "reader_class": AnnotationStoreReader, + "sample_key": "annotation_store_svs_1", + "kwargs": { + "renderer": AnnotationRenderer( + "type", + COLOR_DICT, + blur_radius=3, + ), }, - ), + }, + { + "reader_class": TIFFWSIReader, + "sample_key": "ome-brightfield-pyramid-1-small", + "kwargs": {}, + }, + { + "reader_class": DICOMWSIReader, + "sample_key": "dicom-1", + "kwargs": {}, + }, + { + "reader_class": NGFFWSIReader, + "sample_key": "ngff-1", + "kwargs": {}, + }, + { + "reader_class": OpenSlideWSIReader, + "sample_key": "svs-1-small", + "kwargs": {}, + }, + { + "reader_class": OmnyxJP2WSIReader, + "sample_key": "jp2-omnyx-1", + "kwargs": {}, + }, + ], + ids=[ + "AnnotationReaderOverlaid", + "AnnotationReaderMaskOnly", + "TIFFReader", + "DICOMReader", + "NGFFWSIReader", + "OpenSlideWSIReader (Small SVS)", + "OmnyxJP2WSIReader", + ], +) +def wsi(request, remote_sample): + """WSIReader instance fixture. + Reader type varies as fixture is parametrized. + + """ + reader_class = request.param.pop("reader_class") + sample = remote_sample(request.param.pop("sample_key")) + + kwargs = request.param.pop("kwargs") + new_kwargs = {} + + for key, value in kwargs.items(): + if key.endswith("_key") and isinstance(value, str): + new_kwargs[key[:-4]] = remote_sample(value) + else: + new_kwargs[key] = value + + return reader_class( + sample, + **new_kwargs, + ) + + +def test_base_open(wsi): + """Checks that WSIReader.open detects the type correctly.""" + new_wsi = WSIReader.open(wsi.input_path) + assert type(new_wsi) is type(wsi) + + +def test_wsimeta_attrs(wsi): + """Check for expected attrs in .info / WSIMeta. + + Checks for existence of expected attrs but not their contents. + + """ + info = wsi.info + expected_attrs = [ + "slide_dimensions", + "axes", + "level_dimensions", + "level_count", + "level_downsamples", + "vendor", + "mpp", + "objective_power", + "file_path", ] + for attr in expected_attrs: + assert hasattr(info, attr) - @staticmethod - def test_base_open(sample_key, reader_class, kwargs): - """Checks that WSIReader.open detects the type correctly.""" - sample = _fetch_remote_sample(sample_key) - wsi = WSIReader.open(sample) - assert isinstance(wsi, reader_class) - - @staticmethod - def test_wsimeta_attrs(sample_key, reader_class, kwargs): - """Check for expected attrs in .info / WSIMeta. - - Checks for existence of expected attrs but not their contents. - - """ - sample = _fetch_remote_sample(sample_key) - wsi = reader_class(sample, **kwargs) - info = wsi.info - expected_attrs = [ - "slide_dimensions", - "axes", - "level_dimensions", - "level_count", - "level_downsamples", - "vendor", - "mpp", - "objective_power", - "file_path", - ] - for attr in expected_attrs: - assert hasattr(info, attr) - - @staticmethod - def test_read_rect_level_consistency(sample_key, reader_class, kwargs): - """Compare the same region at each stored resolution level. - - Read the same region at each stored resolution level and compare - the resulting image using phase cross correlation to check that - they are aligned. - - """ - sample = _fetch_remote_sample(sample_key) - wsi = reader_class(sample, **kwargs) - location = (0, 0) - size = np.array([1024, 1024]) - # Avoid testing very small levels (e.g. as in Omnyx JP2) because - # MSE for very small levels is noisy. - level_downsamples = [ - downsample for downsample in wsi.info.level_downsamples if downsample <= 32 - ] - imgs = [ - wsi.read_rect(location, size // downsample, level, "level") - for level, downsample in enumerate(level_downsamples) - ] - smallest_size = imgs[-1].shape[:2][::-1] - resized = [imresize(img, output_size=smallest_size) for img in imgs] - # Some blurring applied to account for changes in sharpness arising - # from interpolation when calculating the downsampled levels. This - # adds some tolerance for the comparison. - blurred = [cv2.GaussianBlur(img, (5, 5), cv2.BORDER_REFLECT) for img in resized] - as_float = [img.astype(np.float_) for img in blurred] - - # Pair-wise check resolutions for mean squared error - for i, a in enumerate(as_float): - for b in as_float[i + 1 :]: - _, error, phase_diff = phase_cross_correlation(a, b, normalization=None) - assert phase_diff < 0.125 - assert error < 0.125 - - @staticmethod - def test_read_bounds_level_consistency(sample_key, reader_class, kwargs): - """Compare the same region at each stored resolution level. - - Read the same region at each stored resolution level and compare - the resulting image using phase cross correlation to check that - they are aligned. - - """ - sample = _fetch_remote_sample(sample_key) - wsi = reader_class(sample, **kwargs) - bounds = (0, 0, 1024, 1024) - # This logic can be moved from the helper to here when other - # reader classes have been parameterised into scenarios also. - read_bounds_level_consistency(wsi, bounds) - - @staticmethod - def test_fuzz_read_region_baseline_size(sample_key, reader_class, kwargs): - """Fuzz test for `read_bounds` output size at level 0 (baseline). - - - Tests that the output image size matches the input bounds size. - - 50 random seeded reads are performed. - - All test bounds are within the the slide dimensions. - - Bounds sizes are randomised between 1 and 512 in width and height. - - """ - random.seed(123) - sample = _fetch_remote_sample(sample_key) - wsi = reader_class(sample, **kwargs) - width, height = wsi.info.slide_dimensions - iterations = 50 - - if sample_key == "jp2-omnyx-1": - iterations = 5 - - for _ in range(iterations): - size = (random.randint(1, 512), random.randint(1, 512)) - location = ( - random.randint(0, width - size[0]), - random.randint(0, height - size[1]), - ) - bounds = locsize2bounds(location, size) - region = wsi.read_bounds(bounds, resolution=0, units="level") - assert region.shape[:2][::-1] == size - - @staticmethod - def test_read_rect_coord_space_consistency(sample_key, reader_class, kwargs): - """Test that read_rect coord_space modes are consistent. - - Using `read_rect` with `coord_space="baseline"` and - `coord_space="resolution"` should produce the same output when - the bounds are a multiple of the scale difference between the two - modes. I.E. reading at baseline with a set of coordinates should - yield the same region as reading at half the resolution and - with coordinates which are half the size. Note that the output - will not be of the same size, but the field of view will match. - - """ - sample = _fetch_remote_sample(sample_key) - reader = reader_class(sample, **kwargs) - roi1 = reader.read_rect( - np.array([500, 500]), - np.array([2000, 2000]), - coord_space="baseline", - resolution=1.00, - units="baseline", - ) - roi2 = reader.read_rect( - np.array([250, 250]), - np.array([1000, 1000]), - coord_space="resolution", - resolution=0.5, - units="baseline", + +def test_read_rect_level_consistency(wsi): + """Compare the same region at each stored resolution level. + + Read the same region at each stored resolution level and compare + the resulting image using phase cross correlation to check that + they are aligned. + + """ + location = (0, 0) + size = np.array([1024, 1024]) + # Avoid testing very small levels (e.g. as in Omnyx JP2) because + # MSE for very small levels is noisy. + level_downsamples = [ + downsample for downsample in wsi.info.level_downsamples if downsample <= 32 + ] + imgs = [ + wsi.read_rect(location, size // downsample, level, "level") + for level, downsample in enumerate(level_downsamples) + ] + smallest_size = imgs[-1].shape[:2][::-1] + resized = [imresize(img, output_size=smallest_size) for img in imgs] + # Some blurring applied to account for changes in sharpness arising + # from interpolation when calculating the downsampled levels. This + # adds some tolerance for the comparison. + blurred = [cv2.GaussianBlur(img, (5, 5), cv2.BORDER_REFLECT) for img in resized] + as_float = [img.astype(np.float_) for img in blurred] + + # Pair-wise check resolutions for mean squared error + for i, a in enumerate(as_float): + for b in as_float[i + 1 :]: + _, error, phase_diff = phase_cross_correlation(a, b, normalization=None) + assert phase_diff < 0.125 + assert error < 0.125 + + +def test_read_bounds_level_consistency(wsi): + """Compare the same region at each stored resolution level. + + Read the same region at each stored resolution level and compare + the resulting image using phase cross correlation to check that + they are aligned. + + """ + bounds = (0, 0, 1024, 1024) + # This logic can be moved from the helper to here when other + # reader classes have been parameterised into scenarios also. + read_bounds_level_consistency(wsi, bounds) + + +def test_fuzz_read_region_baseline_size(wsi): + """Fuzz test for `read_bounds` output size at level 0 (baseline). + + - Tests that the output image size matches the input bounds size. + - 50 random seeded reads are performed. + - All test bounds are within the slide dimensions. + - Bounds sizes are randomised between 1 and 512 in width and height. + + """ + random.seed(123) + width, height = wsi.info.slide_dimensions + iterations = 50 + + if wsi.input_path.stem == "test1": + iterations = 5 + + for _ in range(iterations): + size = (random.randint(1, 512), random.randint(1, 512)) + location = ( + random.randint(0, width - size[0]), + random.randint(0, height - size[1]), ) - # Make the regions the same size for comparison of content - roi2 = imresize(roi2, output_size=[2000, 2000]) + bounds = locsize2bounds(location, size) + region = wsi.read_bounds(bounds, resolution=0, units="level") + assert region.shape[:2][::-1] == size - # Check MSE - mse = np.mean((roi1 - roi2) ** 2) - assert mse < 100 - # Check PSNR - psnr = peak_signal_noise_ratio(roi1, roi2) - assert psnr > 25 +def test_read_rect_coord_space_consistency(wsi): + """Test that read_rect coord_space modes are consistent. - # Check SSIM (skip very small roi regions) - if np.greater(roi1.shape[2], 16).all(): - ssim = structural_similarity(roi1, roi2, multichannel=True) - assert ssim > 0.9 + Using `read_rect` with `coord_space="baseline"` and + `coord_space="resolution"` should produce the same output when + the bounds are a multiple of the scale difference between the two + modes. I.E. reading at baseline with a set of coordinates should + yield the same region as reading at half the resolution and + with coordinates which are half the size. Note that the output + will not be of the same size, but the field of view will match. - @staticmethod - def test_file_path_does_not_exist(sample_key, reader_class, kwargs): - """Test that FileNotFoundError is raised when file does not exist.""" + """ + roi1 = wsi.read_rect( + np.array([500, 500]), + np.array([2000, 2000]), + coord_space="baseline", + resolution=1.00, + units="baseline", + ) + roi2 = wsi.read_rect( + np.array([250, 250]), + np.array([1000, 1000]), + coord_space="resolution", + resolution=0.5, + units="baseline", + ) + # Make the regions the same size for comparison of content + roi2 = imresize(roi2, output_size=[2000, 2000]) + + # Check MSE + mse = np.mean((roi1 - roi2) ** 2) + assert mse < 100 + + # Check PSNR + psnr = peak_signal_noise_ratio(roi1, roi2) + assert psnr > 25 + + # Check SSIM (skip very small roi regions) + if np.greater(roi1.shape[2], 16).all(): + ssim = structural_similarity(roi1, roi2, multichannel=True) + assert ssim > 0.9 + + +def test_file_path_does_not_exist(): + for reader_class in [ + AnnotationStoreReader, + TIFFWSIReader, + DICOMWSIReader, + NGFFWSIReader, + OpenSlideWSIReader, + OmnyxJP2WSIReader, + ]: with pytest.raises(FileNotFoundError): _ = reader_class("./foo.bar") - @staticmethod - def test_read_mpp(sample_key, reader_class, kwargs): - """Test that the mpp is read correctly.""" - sample = _fetch_remote_sample(sample_key) - wsi = reader_class(sample, **kwargs) - assert wsi.info.mpp == pytest.approx(0.25, 1) + +def test_read_mpp(wsi): + """Test that the mpp is read correctly.""" + assert wsi.info.mpp == pytest.approx(0.25, 1) diff --git a/tiatoolbox/data/__init__.py b/tiatoolbox/data/__init__.py index 0afe36ac5..90e58f794 100644 --- a/tiatoolbox/data/__init__.py +++ b/tiatoolbox/data/__init__.py @@ -2,15 +2,15 @@ """Package to define datasets available to download via TIAToolbox.""" import pathlib import tempfile -import zipfile from typing import Optional, Union from urllib.parse import urlparse import numpy as np import pkg_resources -import requests import yaml +from tiatoolbox.utils import download_data + # Load a dictionary of sample files data (names and urls) SAMPLE_FILES_REGISTRY_PATH = pkg_resources.resource_filename( "tiatoolbox", "data/remote_samples.yaml" @@ -55,28 +55,8 @@ def _fetch_remote_sample( filename = SAMPLE_FILES[key].get("filename", url_filename) file_path = tmp_path / filename # Download the file if it doesn't exist - if not file_path.is_file(): - print(f"Downloading sample file {filename}") - # Start the connection with a 5s timeout to avoid hanging forever - response = requests.get(url, stream=True, timeout=5) - # Raise an exception for status codes != 200 - response.raise_for_status() - # Write the file in blocks of 1024 bytes to avoid running out of memory - with open(file_path, "wb") as handle: - for block in response.iter_content(1024): - handle.write(block) - # Extract the (zip) archive contents if required - if sample.get("extract"): - print(f"Extracting sample file {filename}") - extract_path = tmp_path / filename.replace(".zip", "") - with zipfile.ZipFile(file_path, "r") as zip_handle: - zip_handle.extractall(path=extract_path) - file_path = extract_path - return file_path - print(f"Skipping download of sample file {filename}") - if sample.get("extract"): - file_path = tmp_path / filename.replace(".zip", "") - return file_path + + return download_data(url, save_path=file_path, unzip=sample.get("extract", False)) def _local_sample_path(path: Union[str, pathlib.Path]) -> pathlib.Path: diff --git a/tiatoolbox/models/architecture/__init__.py b/tiatoolbox/models/architecture/__init__.py index bf26cb47b..3de01f105 100644 --- a/tiatoolbox/models/architecture/__init__.py +++ b/tiatoolbox/models/architecture/__init__.py @@ -44,7 +44,7 @@ def fetch_pretrained_weights( file_name = info["url"].split("/")[-1] save_path = os.path.join(rcParam["TIATOOLBOX_HOME"], "models/", file_name) - download_data(info["url"], save_path, overwrite) + download_data(info["url"], save_path=save_path, overwrite=overwrite) return pathlib.Path(save_path) diff --git a/tiatoolbox/models/dataset/info.py b/tiatoolbox/models/dataset/info.py index 5562f1fb6..97b9cf902 100644 --- a/tiatoolbox/models/dataset/info.py +++ b/tiatoolbox/models/dataset/info.py @@ -91,7 +91,7 @@ def __init__( "https://tiatoolbox.dcs.warwick.ac.uk/datasets" "/kather100k-train-nonorm-subset-20k.zip" ) - download_data(url, save_zip_path) + download_data(url, save_path=save_zip_path) unzip_data(save_zip_path, save_dir_path) save_dir_path = Path(save_dir_path, "kather100k-validation") # bring outside to prevent case where download fail diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index 9d4fc85fb..3a69398c7 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -16,6 +16,7 @@ import requests import torch import yaml +from filelock import FileLock from shapely import geometry from shapely.affinity import translate from shapely.geometry import shape as feature2geometry @@ -622,35 +623,75 @@ def assert_dtype_int( raise AssertionError(message) -def download_data(url: str, save_path: os | PathLike, overwrite: bool = False): +def download_data( + url: str, + save_path: os | PathLike = None, + save_dir: os | PathLike = None, + overwrite: bool = False, + unzip: bool = False, +) -> pathlib.Path: """Download data from a given URL to location. Can overwrite data if demanded else no action is taken Args: - url (path): URL from where to download the data. - save_path (os | PathLike): Location to unzip the data. - overwrite (bool): True to force overwriting of existing data, default=False + url (str): + URL from where to download the data. + save_path (os | PathLike): + Location to download the data (including filename). + Can't be used with save_dir. + save_dir (os | PathLike): + Directory to save the data. Can't be used with save_path. + overwrite (bool): + True to force overwriting of existing data, default=False + unzip (bool): + True to unzip the data, default=False """ - print(f"Download from {url}") - print(f"Save to {save_path}") - save_dir = pathlib.Path(save_path).parent + + if save_path is not None and save_dir is not None: + raise ValueError("save_path and save_dir can't both be specified") + + if save_path is not None: + save_dir = pathlib.Path(save_path).parent + save_path = pathlib.Path(save_path) + + elif save_dir is not None: + save_dir = pathlib.Path(save_dir) + save_path = save_dir / pathlib.Path(url).name + + else: + raise ValueError("save_path or save_dir must be specified") + + logger.debug("Download from %s to %s", url, save_path) if not os.path.exists(save_dir): os.makedirs(save_dir) - if not overwrite and os.path.exists(save_path): - return - r = requests.get(url) - request_response = requests.head(url) - status_code = request_response.status_code - url_exists = status_code == 200 + if not overwrite and os.path.exists(save_path) and not unzip: + return save_path - if not url_exists: - raise ConnectionError(f"Could not find URL at {url}") + lock_path = save_path.with_suffix(".lock") - with open(save_path, "wb") as f: - f.write(r.content) + with FileLock(lock_path): + if not overwrite and os.path.exists(save_path): + pass # file was downloaded by another process + else: + # Start the connection with a 5-second timeout + # to avoid hanging indefinitely. + response = requests.get(url, stream=True, timeout=5) + # Raise an exception for status codes != 200 + response.raise_for_status() + # Write the file in blocks of 1024 bytes to avoid running out of memory + with save_path.open("wb") as handle: + for block in response.iter_content(1024): + handle.write(block) + + if unzip: + unzip_path = save_dir / save_path.stem + unzip_data(str(save_path), str(unzip_path), del_zip=False) + return unzip_path + + return save_path def unzip_data(zip_path: os | PathLike, save_path: os | PathLike, del_zip: bool = True):