From 3a397b82c3d0874cd6954d8845dafd0570ee57d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Mon, 16 Dec 2024 17:54:07 +0100 Subject: [PATCH 01/11] feat: load_hdf5 accepts slice arguments to only consider certain slices of a dataset --- heat/core/io.py | 45 ++++++++++++++++++++++++++++++++++++++ heat/core/tests/test_io.py | 20 +++++++++++++++++ setup.py | 4 ++-- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/heat/core/io.py b/heat/core/io.py index 427c7b8d49..dbeb84620a 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -38,6 +38,31 @@ "load_npy_from_path", ] + +def size_from_slice(size: int, s: slice) -> Tuple[int, int]: + """ + Determines the size of a slice object. + + Parameters + ---------- + size: int + The size of the array the slice object is applied to. + s : slice + The slice object to determine the size of. + + Returns + ------- + int + The size of the sliced object. + int + The start index of the slice object. + """ + from hypothesis import note + + new_range = range(size)[s] + return len(new_range), new_range.start if len(new_range) > 0 else 0 + + try: import netCDF4 as nc except ImportError: @@ -490,6 +515,7 @@ def load_hdf5( dataset: str, dtype: datatype = types.float32, load_fraction: float = 1.0, + slices: Optional[Tuple[slice]] = None, split: Optional[int] = None, device: Optional[str] = None, comm: Optional[Communication] = None, @@ -509,6 +535,8 @@ def load_hdf5( if 1. (default), the whole dataset is loaded from the file specified in path else, the dataset is loaded partially, with the fraction of the dataset (along the split axis) specified by load_fraction If split is None, load_fraction is automatically set to 1., i.e. the whole dataset is loaded. + slices : tuple of slice objects, optional + Load only the specified slices of the dataset. split : int or None, optional The axis along which the data is distributed among the processing cores. device : str, optional @@ -563,6 +591,18 @@ def load_hdf5( with h5py.File(path, "r") as handle: data = handle[dataset] gshape = data.shape + offsets = [0] * len(gshape) + if slices is not None: + if len(slices) != len(gshape): + raise ValueError( + f"Number of slices ({len(slices)}) does not match the number of dimensions ({len(gshape)})" + ) + for i, s in enumerate(slices): + if s.step is not None and s.step != 1: + raise ValueError("Slices with step != 1 are not supported") + gshape = size_from_slice(gshape[i], s) + offsets[i] = s.start if s.start is not None else 0 + if split is not None: gshape = list(gshape) gshape[split] = int(gshape[split] * load_fraction) @@ -570,6 +610,11 @@ def load_hdf5( dims = len(gshape) split = sanitize_axis(gshape, split) _, _, indices = comm.chunk(gshape, split) + + if slices is not None: + for offset, index in zip(offsets, indices): + index.start += offset + balanced = True if split is None: data = torch.tensor( diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index 6f75846e5f..b6fa907d5a 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -11,6 +11,9 @@ import heat as ht from .test_suites.basic_test import TestCase +from hypothesis import given, settings, note +import hypothesis.strategies as st + class TestIO(TestCase): @classmethod @@ -56,6 +59,23 @@ def tearDown(self): # synchronize all nodes ht.MPI_WORLD.Barrier() + @given(size=st.integers(1, 1000), slice=st.slices(1000)) + def test_size_from_slice(self, size, slice): + expected_sequence = list(range(size))[slice] + if len(expected_sequence) == 0: + expected_offset = 0 + else: + expected_offset = expected_sequence[0] + + expected_new_size = len(expected_sequence) + + new_size, offset = ht.io.size_from_slice(size, slice) + note(f"Expected sequence: {expected_sequence}") + note(f"Expected new size: {expected_new_size}, new size: {new_size}") + note(f"Expected offset: {expected_offset}, offset: {offset}") + self.assertEqual(expected_new_size, new_size) + self.assertEqual(expected_offset, offset) + # catch-all loading def test_load(self): # HDF5 diff --git a/setup.py b/setup.py index bb71bf1c30..14e2b9f1e4 100644 --- a/setup.py +++ b/setup.py @@ -44,9 +44,9 @@ "docutils": ["docutils>=0.16"], "hdf5": ["h5py>=2.8.0"], "netcdf": ["netCDF4>=1.5.6"], - "dev": ["pre-commit>=1.18.3"], + "dev": ["pre-commit>=1.18.3", "pytest>=8.0", "hypothesis<=6.100"], "examples": ["scikit-learn>=0.24.0", "matplotlib>=3.1.0"], - "cb": ["perun>=0.2.0"], + "cb": ["perun>=0.8"], "pandas": ["pandas>=1.4"], }, ) From 2288e05054bd5d477e57aeb681bd9f0bd3914393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Tue, 17 Dec 2024 19:26:15 +0100 Subject: [PATCH 02/11] feat: hdf5 partialy loads datasets based on slice objects --- heat/core/io.py | 22 +++++++++++++++------- heat/core/tests/test_io.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/heat/core/io.py b/heat/core/io.py index dbeb84620a..c981b9712a 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -57,8 +57,6 @@ def size_from_slice(size: int, s: slice) -> Tuple[int, int]: int The start index of the slice object. """ - from hypothesis import note - new_range = range(size)[s] return len(new_range), new_range.start if len(new_range) > 0 else 0 @@ -591,6 +589,7 @@ def load_hdf5( with h5py.File(path, "r") as handle: data = handle[dataset] gshape = data.shape + new_gshape = tuple() offsets = [0] * len(gshape) if slices is not None: if len(slices) != len(gshape): @@ -598,10 +597,17 @@ def load_hdf5( f"Number of slices ({len(slices)}) does not match the number of dimensions ({len(gshape)})" ) for i, s in enumerate(slices): - if s.step is not None and s.step != 1: - raise ValueError("Slices with step != 1 are not supported") - gshape = size_from_slice(gshape[i], s) - offsets[i] = s.start if s.start is not None else 0 + if s: + if s.step is not None and s.step != 1: + raise ValueError("Slices with step != 1 are not supported") + new_axis_size, offset = size_from_slice(gshape[i], s) + new_gshape += (new_axis_size,) + offsets[i] = offset + else: + new_gshape += (gshape[i],) + offsets[i] = 0 + + gshape = new_gshape if split is not None: gshape = list(gshape) @@ -612,8 +618,10 @@ def load_hdf5( _, _, indices = comm.chunk(gshape, split) if slices is not None: + new_indices = tuple() for offset, index in zip(offsets, indices): - index.start += offset + new_indices += (slice(index.start + offset, index.stop + offset),) + indices = new_indices balanced = True if split is None: diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index b6fa907d5a..7f993f985e 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -11,7 +11,8 @@ import heat as ht from .test_suites.basic_test import TestCase -from hypothesis import given, settings, note +import pytest +from hypothesis import given, settings, note, assume import hypothesis.strategies as st @@ -912,3 +913,37 @@ def test_load_multiple_csv_exception(self): ht.MPI_WORLD.Barrier() if ht.MPI_WORLD.rank == 0: shutil.rmtree(os.path.join(os.getcwd(), "heat/datasets/csv_tests")) + + +@unittest.skipIf(not ht.io.supports_hdf5(), reason="Requires HDF5") +@pytest.mark.parametrize("axis", [None, 0, 1]) +@pytest.mark.parametrize( + "slices", + [ + (slice(0, 50, None), slice(None, None, None)), + (slice(0, 50, None), slice(0, 2, None)), + (slice(50, 100, None), slice(None, None, None)), + (slice(None, None, None), slice(2, 4, None)), + ], +) +def test_load_partial_hdf5(axis, slices): + print("axis: ", axis) + HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") + HDF5_DATASET = "data" + expect_error = False + for s in slices: + if s and s.step not in [None, 1]: + expect_error = True + break + + if expect_error: + with pytest.raises(ValueError): + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + else: + original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) + expected_iris = original_iris[slices] + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + print("Original shape: " + str(original_iris.shape)) + print("Sliced shape: " + str(sliced_iris.shape)) + print("Expected shape: " + str(expected_iris.shape)) + assert not ht.equal(sliced_iris, expected_iris) From 9f3b1683d6847989dc78b2d2fbebb7d79deadeef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Mon, 13 Jan 2025 17:16:24 +0100 Subject: [PATCH 03/11] pytest: bad, unittest subtest: Yeah! --- .github/rd-release-config.yml | 81 +++++++++++++++++++++++++- heat/core/io.py | 17 ------ heat/core/tests/test_io.py | 104 +++++++++++++++++----------------- setup.py | 2 +- 4 files changed, 131 insertions(+), 73 deletions(-) diff --git a/.github/rd-release-config.yml b/.github/rd-release-config.yml index 6f1d103d27..891b66dfbe 100644 --- a/.github/rd-release-config.yml +++ b/.github/rd-release-config.yml @@ -113,9 +113,6 @@ autolabeler: - label: 'interoperability' title: - '/Support.+/' - - label: 'testing' - files: - - '**/tests/**/*' - label: 'classification' files: - 'heat/classification/**/*' @@ -164,6 +161,84 @@ autolabeler: - label: 'linalg' files: - 'heat/core/linalg/**/*' + - label: 'arithmetics' + files: + - 'heat/core/arithmetics.py' + - label: 'base' + files: + - 'heat/core/base.py' + - label: 'communication' + files: + - 'heat/core/communication.py' + - label: 'complex_math' + files: + - 'heat/core/complex_math.py' + - label: 'constants' + files: + - 'heat/core/constants.py' + - label: 'devices' + files: + - 'heat/core/devices.py' + - label: 'dndarray' + files: + - 'heat/core/dndarray.py' + - label: 'exponential' + files: + - 'heat/core/exponential.py' + - label: 'indexing' + files: + - 'heat/core/indexing.py' + - label: 'io' + files: + - 'heat/core/io.py' + - label: 'logical' + files: + - 'heat/core/logical.py' + - label: 'manipulations' + files: + - 'heat/core/manipulations.py' + - label: 'memory' + files: + - 'heat/core/memory.py' + - label: 'printing' + files: + - 'heat/core/printing.py' + - label: 'random' + files: + - 'heat/core/random.py' + - label: 'relational' + files: + - 'heat/core/relational.py' + - label: 'rounding' + files: + - 'heat/core/rounding.py' + - label: 'santiation' + files: + - 'heat/core/sanitation.py' + - label: 'signal' + files: + - 'heat/core/signal.py' + - label: 'statistics' + files: + - 'heat/core/statistics.py' + - label: 'stride_tricks' + files: + - 'heat/core/stride_tricks.py' + - label: 'tiling' + files: + - 'heat/core/tiling.py' + - label: 'trigonometrics' + files: + - 'heat/core/trigonometrics.py' + - label: 'types' + files: + - 'heat/core/types.py' + - label: 'version' + files: + - 'heat/core/version.py' + - label: 'vmap' + files: + - 'heat/core/vmap.py' change-template: '- #$NUMBER $TITLE (by @$AUTHOR)' category-template: '### $TITLE' diff --git a/heat/core/io.py b/heat/core/io.py index c981b9712a..98fb6e045b 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -512,7 +512,6 @@ def load_hdf5( path: str, dataset: str, dtype: datatype = types.float32, - load_fraction: float = 1.0, slices: Optional[Tuple[slice]] = None, split: Optional[int] = None, device: Optional[str] = None, @@ -529,10 +528,6 @@ def load_hdf5( Name of the dataset to be read. dtype : datatype, optional Data type of the resulting array. - load_fraction : float between 0. (excluded) and 1. (included), default is 1. - if 1. (default), the whole dataset is loaded from the file specified in path - else, the dataset is loaded partially, with the fraction of the dataset (along the split axis) specified by load_fraction - If split is None, load_fraction is automatically set to 1., i.e. the whole dataset is loaded. slices : tuple of slice objects, optional Load only the specified slices of the dataset. split : int or None, optional @@ -571,14 +566,6 @@ def load_hdf5( elif split is not None and not isinstance(split, int): raise TypeError(f"split must be None or int, not {type(split)}") - if not isinstance(load_fraction, float): - raise TypeError(f"load_fraction must be float, but is {type(load_fraction)}") - else: - if split is not None and (load_fraction <= 0.0 or load_fraction > 1.0): - raise ValueError( - f"load_fraction must be between 0. (excluded) and 1. (included), but is {load_fraction}." - ) - # infer the type and communicator for the loaded array dtype = types.canonical_heat_type(dtype) # determine the comm and device the data will be placed on @@ -609,10 +596,6 @@ def load_hdf5( gshape = new_gshape - if split is not None: - gshape = list(gshape) - gshape[split] = int(gshape[split] * load_fraction) - gshape = tuple(gshape) dims = len(gshape) split = sanitize_axis(gshape, split) _, _, indices = comm.chunk(gshape, split) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index 7f993f985e..53ff0fb4dc 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -12,7 +12,6 @@ from .test_suites.basic_test import TestCase import pytest -from hypothesis import given, settings, note, assume import hypothesis.strategies as st @@ -60,22 +59,30 @@ def tearDown(self): # synchronize all nodes ht.MPI_WORLD.Barrier() - @given(size=st.integers(1, 1000), slice=st.slices(1000)) - def test_size_from_slice(self, size, slice): - expected_sequence = list(range(size))[slice] - if len(expected_sequence) == 0: - expected_offset = 0 - else: - expected_offset = expected_sequence[0] - - expected_new_size = len(expected_sequence) - - new_size, offset = ht.io.size_from_slice(size, slice) - note(f"Expected sequence: {expected_sequence}") - note(f"Expected new size: {expected_new_size}, new size: {new_size}") - note(f"Expected offset: {expected_offset}, offset: {offset}") - self.assertEqual(expected_new_size, new_size) - self.assertEqual(expected_offset, offset) + def test_size_from_slice(self): + test_cases = [ + (1000, slice(500)), + (10, slice(0, 10, 2)), + (100, slice(0, 100, 10)), + (1000, slice(0, 1000, 100)), + (0, slice(0)), + ] + for size, slice_obj in test_cases: + with self.subTest(size=size, slice=slice_obj): + expected_sequence = list(range(size))[slice_obj] + if len(expected_sequence) == 0: + expected_offset = 0 + else: + expected_offset = expected_sequence[0] + + expected_new_size = len(expected_sequence) + + new_size, offset = ht.io.size_from_slice(size, slice_obj) + print(f"Expected sequence: {expected_sequence}") + print(f"Expected new size: {expected_new_size}, new size: {new_size}") + print(f"Expected offset: {expected_offset}, offset: {offset}") + self.assertEqual(expected_new_size, new_size) + self.assertEqual(expected_offset, offset) # catch-all loading def test_load(self): @@ -562,10 +569,6 @@ def test_load_hdf5(self): self.assertEqual(iris.larray.dtype, torch.float32) self.assertTrue((self.IRIS == iris.larray).all()) - # cropped load - iris_cropped = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, split=0, load_fraction=0.5) - self.assertEqual(iris_cropped.shape[0], iris.shape[0] // 2) - # positive split axis iris = ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, split=0) self.assertIsInstance(iris, ht.DNDarray) @@ -603,10 +606,6 @@ def test_load_hdf5_exception(self): ht.load_hdf5("iris.h5", 1) with self.assertRaises(TypeError): ht.load_hdf5("iris.h5", dataset="data", split=1.0) - with self.assertRaises(TypeError): - ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, load_fraction="a") - with self.assertRaises(ValueError): - ht.load_hdf5(self.HDF5_PATH, self.HDF5_DATASET, load_fraction=0.0, split=0) # file or dataset does not exist with self.assertRaises(IOError): @@ -916,34 +915,35 @@ def test_load_multiple_csv_exception(self): @unittest.skipIf(not ht.io.supports_hdf5(), reason="Requires HDF5") -@pytest.mark.parametrize("axis", [None, 0, 1]) -@pytest.mark.parametrize( - "slices", - [ +def test_load_partial_hdf5(self): + test_axis = [None, 0, 1] + test_slices = [ (slice(0, 50, None), slice(None, None, None)), (slice(0, 50, None), slice(0, 2, None)), (slice(50, 100, None), slice(None, None, None)), (slice(None, None, None), slice(2, 4, None)), - ], -) -def test_load_partial_hdf5(axis, slices): - print("axis: ", axis) - HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") - HDF5_DATASET = "data" - expect_error = False - for s in slices: - if s and s.step not in [None, 1]: - expect_error = True - break - - if expect_error: - with pytest.raises(ValueError): - sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - else: - original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) - expected_iris = original_iris[slices] - sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - print("Original shape: " + str(original_iris.shape)) - print("Sliced shape: " + str(sliced_iris.shape)) - print("Expected shape: " + str(expected_iris.shape)) - assert not ht.equal(sliced_iris, expected_iris) + ] + test_cases = [(a, s) for a in test_axis for s in test_slices] + + for axis, slices in test_cases: + with self.subTest(axis=axis, slices=slices): + print("axis: ", axis) + HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") + HDF5_DATASET = "data" + expect_error = False + for s in slices: + if s and s.step not in [None, 1]: + expect_error = True + break + + if expect_error: + with pytest.raises(ValueError): + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + else: + original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) + expected_iris = original_iris[slices] + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + print("Original shape: " + str(original_iris.shape)) + print("Sliced shape: " + str(sliced_iris.shape)) + print("Expected shape: " + str(expected_iris.shape)) + assert not ht.equal(sliced_iris, expected_iris) diff --git a/setup.py b/setup.py index 14e2b9f1e4..de02eeb0ef 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ "docutils": ["docutils>=0.16"], "hdf5": ["h5py>=2.8.0"], "netcdf": ["netCDF4>=1.5.6"], - "dev": ["pre-commit>=1.18.3", "pytest>=8.0", "hypothesis<=6.100"], + "dev": ["pre-commit>=1.18.3"], "examples": ["scikit-learn>=0.24.0", "matplotlib>=3.1.0"], "cb": ["perun>=0.8"], "pandas": ["pandas>=1.4"], From 2b1e647f4f4417ce19dbb90fa5d13a0a1342345a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Wed, 15 Jan 2025 10:57:34 +0100 Subject: [PATCH 04/11] fix: removed pytest and hypothesis --- heat/core/tests/test_io.py | 72 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index 53ff0fb4dc..ba9b341488 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -11,9 +11,6 @@ import heat as ht from .test_suites.basic_test import TestCase -import pytest -import hypothesis.strategies as st - class TestIO(TestCase): @classmethod @@ -913,37 +910,40 @@ def test_load_multiple_csv_exception(self): if ht.MPI_WORLD.rank == 0: shutil.rmtree(os.path.join(os.getcwd(), "heat/datasets/csv_tests")) - -@unittest.skipIf(not ht.io.supports_hdf5(), reason="Requires HDF5") -def test_load_partial_hdf5(self): - test_axis = [None, 0, 1] - test_slices = [ - (slice(0, 50, None), slice(None, None, None)), - (slice(0, 50, None), slice(0, 2, None)), - (slice(50, 100, None), slice(None, None, None)), - (slice(None, None, None), slice(2, 4, None)), - ] - test_cases = [(a, s) for a in test_axis for s in test_slices] - - for axis, slices in test_cases: - with self.subTest(axis=axis, slices=slices): - print("axis: ", axis) - HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") - HDF5_DATASET = "data" - expect_error = False - for s in slices: - if s and s.step not in [None, 1]: - expect_error = True - break - - if expect_error: - with pytest.raises(ValueError): + @unittest.skipIf(not ht.io.supports_hdf5(), reason="Requires HDF5") + def test_load_partial_hdf5(self): + test_axis = [None, 0, 1] + test_slices = [ + (slice(0, 50, None), slice(None, None, None)), + (slice(0, 50, None), slice(0, 2, None)), + (slice(50, 100, None), slice(None, None, None)), + (slice(None, None, None), slice(2, 4, None)), + ] + test_cases = [(a, s) for a in test_axis for s in test_slices] + + for axis, slices in test_cases: + with self.subTest(axis=axis, slices=slices): + print("axis: ", axis) + HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") + HDF5_DATASET = "data" + expect_error = False + for s in slices: + if s and s.step not in [None, 1]: + expect_error = True + break + + if expect_error: + with self.assertRaises(ValueError): + sliced_iris = ht.load_hdf5( + HDF5_PATH, HDF5_DATASET, split=axis, slices=slices + ) + else: + original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) + expected_iris = original_iris[slices] sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - else: - original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) - expected_iris = original_iris[slices] - sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - print("Original shape: " + str(original_iris.shape)) - print("Sliced shape: " + str(sliced_iris.shape)) - print("Expected shape: " + str(expected_iris.shape)) - assert not ht.equal(sliced_iris, expected_iris) + print("Original shape: " + str(original_iris.shape)) + print("Sliced shape: " + str(sliced_iris.shape)) + print("Expected shape: " + str(expected_iris.shape)) + print(f"Expected : {expected_iris}") + print(f"Sliced : {sliced_iris}") + self.assertTrue(ht.equal(sliced_iris, expected_iris)) From 9aed5fc883d7465b88cbd4bc05aa84aa53dd46a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Thu, 16 Jan 2025 10:59:25 +0100 Subject: [PATCH 05/11] cov up --- heat/core/tests/test_io.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index ba9b341488..ac72a366d5 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -918,12 +918,15 @@ def test_load_partial_hdf5(self): (slice(0, 50, None), slice(0, 2, None)), (slice(50, 100, None), slice(None, None, None)), (slice(None, None, None), slice(2, 4, None)), + (slice(50), None), + (None, slice(0, 3, 2)), ] test_cases = [(a, s) for a in test_axis for s in test_slices] for axis, slices in test_cases: with self.subTest(axis=axis, slices=slices): print("axis: ", axis) + print("slices: ", slices) HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") HDF5_DATASET = "data" expect_error = False @@ -939,11 +942,10 @@ def test_load_partial_hdf5(self): ) else: original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) - expected_iris = original_iris[slices] + tmp_slices = tuple(slice(None) if s is None else s for s in slices) + expected_iris = original_iris[tmp_slices] sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) print("Original shape: " + str(original_iris.shape)) print("Sliced shape: " + str(sliced_iris.shape)) print("Expected shape: " + str(expected_iris.shape)) - print(f"Expected : {expected_iris}") - print(f"Sliced : {sliced_iris}") self.assertTrue(ht.equal(sliced_iris, expected_iris)) From b96375d07425ca376188b65dcedc41f789adab33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Mon, 10 Feb 2025 17:21:32 +0100 Subject: [PATCH 06/11] slice tuple does not need to have the same number of elements as the dataset --- heat/core/io.py | 2 +- heat/core/tests/test_io.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/heat/core/io.py b/heat/core/io.py index 98fb6e045b..bbc6c23181 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -512,7 +512,7 @@ def load_hdf5( path: str, dataset: str, dtype: datatype = types.float32, - slices: Optional[Tuple[slice]] = None, + slices: Optional[Tuple[Optional[slice], ...]] = None, split: Optional[int] = None, device: Optional[str] = None, comm: Optional[Communication] = None, diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index ac72a366d5..c1aa7d9470 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -920,6 +920,9 @@ def test_load_partial_hdf5(self): (slice(None, None, None), slice(2, 4, None)), (slice(50), None), (None, slice(0, 3, 2)), + (slice(50),)( + slice(50, 100), + ), ] test_cases = [(a, s) for a in test_axis for s in test_slices] From 90564dd3ff4b2bb59855bf2eea3ab22f12573646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Wed, 12 Feb 2025 15:59:37 +0100 Subject: [PATCH 07/11] fix: tests --- heat/core/tests/test_io.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index c1aa7d9470..87a782b3c3 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -920,9 +920,8 @@ def test_load_partial_hdf5(self): (slice(None, None, None), slice(2, 4, None)), (slice(50), None), (None, slice(0, 3, 2)), - (slice(50),)( - slice(50, 100), - ), + (slice(50),), + (slice(50, 100),), ] test_cases = [(a, s) for a in test_axis for s in test_slices] From 542961b02b2e9c125a8d9cd4a218fd63c1e580de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Wed, 12 Feb 2025 16:45:54 +0100 Subject: [PATCH 08/11] forgot to actually remove the check --- heat/core/io.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/heat/core/io.py b/heat/core/io.py index bbc6c23181..3a41db748f 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -579,10 +579,6 @@ def load_hdf5( new_gshape = tuple() offsets = [0] * len(gshape) if slices is not None: - if len(slices) != len(gshape): - raise ValueError( - f"Number of slices ({len(slices)}) does not match the number of dimensions ({len(gshape)})" - ) for i, s in enumerate(slices): if s: if s.step is not None and s.step != 1: From 02e43b3fe4054f637a101779fb5a9e95d65d9b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Wed, 12 Feb 2025 17:30:39 +0100 Subject: [PATCH 09/11] please release me from this life --- heat/core/tests/test_io.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index 87a782b3c3..618115b101 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -931,23 +931,11 @@ def test_load_partial_hdf5(self): print("slices: ", slices) HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") HDF5_DATASET = "data" - expect_error = False - for s in slices: - if s and s.step not in [None, 1]: - expect_error = True - break - - if expect_error: - with self.assertRaises(ValueError): - sliced_iris = ht.load_hdf5( - HDF5_PATH, HDF5_DATASET, split=axis, slices=slices - ) - else: - original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) - tmp_slices = tuple(slice(None) if s is None else s for s in slices) - expected_iris = original_iris[tmp_slices] - sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - print("Original shape: " + str(original_iris.shape)) - print("Sliced shape: " + str(sliced_iris.shape)) - print("Expected shape: " + str(expected_iris.shape)) - self.assertTrue(ht.equal(sliced_iris, expected_iris)) + original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) + tmp_slices = tuple(slice(None) if s is None else s for s in slices) + expected_iris = original_iris[tmp_slices] + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + print("Original shape: " + str(original_iris.shape)) + print("Sliced shape: " + str(sliced_iris.shape)) + print("Expected shape: " + str(expected_iris.shape)) + self.assertTrue(ht.equal(sliced_iris, expected_iris)) From a04669a652078ea5d8c5665b29070c53818909bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guti=C3=A9rrez=20Hermosillo=20Muriedas=2C=20Juan=20Pedro?= <juanpedroghm@gmail.com> Date: Thu, 13 Feb 2025 16:40:47 +0100 Subject: [PATCH 10/11] If this is not the one, I will punch my own ticket --- heat/core/io.py | 6 ++++-- heat/core/tests/test_io.py | 28 ++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/heat/core/io.py b/heat/core/io.py index 3a41db748f..89382ba4e2 100644 --- a/heat/core/io.py +++ b/heat/core/io.py @@ -579,8 +579,10 @@ def load_hdf5( new_gshape = tuple() offsets = [0] * len(gshape) if slices is not None: - for i, s in enumerate(slices): - if s: + for i in range(len(gshape)): + + if i < len(slices) and slices[i]: + s = slices[i] if s.step is not None and s.step != 1: raise ValueError("Slices with step != 1 are not supported") new_axis_size, offset = size_from_slice(gshape[i], s) diff --git a/heat/core/tests/test_io.py b/heat/core/tests/test_io.py index 618115b101..87a782b3c3 100644 --- a/heat/core/tests/test_io.py +++ b/heat/core/tests/test_io.py @@ -931,11 +931,23 @@ def test_load_partial_hdf5(self): print("slices: ", slices) HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5") HDF5_DATASET = "data" - original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) - tmp_slices = tuple(slice(None) if s is None else s for s in slices) - expected_iris = original_iris[tmp_slices] - sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) - print("Original shape: " + str(original_iris.shape)) - print("Sliced shape: " + str(sliced_iris.shape)) - print("Expected shape: " + str(expected_iris.shape)) - self.assertTrue(ht.equal(sliced_iris, expected_iris)) + expect_error = False + for s in slices: + if s and s.step not in [None, 1]: + expect_error = True + break + + if expect_error: + with self.assertRaises(ValueError): + sliced_iris = ht.load_hdf5( + HDF5_PATH, HDF5_DATASET, split=axis, slices=slices + ) + else: + original_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis) + tmp_slices = tuple(slice(None) if s is None else s for s in slices) + expected_iris = original_iris[tmp_slices] + sliced_iris = ht.load_hdf5(HDF5_PATH, HDF5_DATASET, split=axis, slices=slices) + print("Original shape: " + str(original_iris.shape)) + print("Sliced shape: " + str(sliced_iris.shape)) + print("Expected shape: " + str(expected_iris.shape)) + self.assertTrue(ht.equal(sliced_iris, expected_iris)) From 5872686261c28ffe879153c81ddd0969ab524853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Pedro=20Guti=C3=A9rrez=20Hermosillo=20Muriedas?= <juanpedroghm@gmail.com> Date: Mon, 24 Feb 2025 10:50:34 +0100 Subject: [PATCH 11/11] Update release-drafter.yml --- .github/workflows/release-drafter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml index 6ebc18ef4e..9fe1248111 100644 --- a/.github/workflows/release-drafter.yml +++ b/.github/workflows/release-drafter.yml @@ -2,7 +2,7 @@ name: Release Drafter on: pull_request: - types: [opened, reopened, synchronize] + types: [opened, reopened] jobs: update_release_draft: permissions: