From 6962a8320735f9b91cd6fe85b98be31ebeba8932 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 16 May 2025 16:36:55 +0100 Subject: [PATCH 1/8] Switch default to return pathlib.Path rather than py.path.local(). Update deprecation message to announce that the latter will be completely removed later. --- dials_data/download.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/dials_data/download.py b/dials_data/download.py index 15798e61..9cbb4890 100644 --- a/dials_data/download.py +++ b/dials_data/download.py @@ -300,7 +300,7 @@ def result_filter(self, result, **kwargs): """ return result - def __call__(self, test_data: str, pathlib=None, **kwargs): + def __call__(self, test_data: str, pathlib=True, **kwargs): """ Return the location of a dataset, transparently downloading it if necessary and possible. @@ -308,21 +308,17 @@ def __call__(self, test_data: str, pathlib=None, **kwargs): function. :param test_data: name of the requested dataset. :param pathlib: Whether to return the result as a Python pathlib object. - The default for this setting is 'False' for now (leading - to a py.path.local object being returned), but the default - will change to 'True' in a future dials.data release. - Set to 'True' for forward compatibility. :return: A pathlib or py.path.local object pointing to the dataset, or False if the dataset is not available. """ if test_data not in self._cache: self._cache[test_data] = self._attempt_fetch(test_data) - if pathlib is None: + if not pathlib: warnings.warn( - "The DataFetcher currently returns py.path.local() objects. " - "This will in the future change to pathlib.Path() objects. " - "You can either add a pathlib=True argument to obtain a pathlib.Path() object, " - "or pathlib=False to silence this warning for now.", + "This DataFetcher request will return a py.path.local() object. " + "This is deprecated and the default behaviour is to return a " + "pathlib.Path() object. In future the pathlib parameter will be " + "removed and a pathlib.Path() will always be returned.", DeprecationWarning, stacklevel=2, ) From 686a91585c507f599a22e920820d1d0c4ae09bdd Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 16 May 2025 16:43:01 +0100 Subject: [PATCH 2/8] More explanation in the docstring --- dials_data/download.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dials_data/download.py b/dials_data/download.py index 9cbb4890..228013cd 100644 --- a/dials_data/download.py +++ b/dials_data/download.py @@ -308,6 +308,9 @@ def __call__(self, test_data: str, pathlib=True, **kwargs): function. :param test_data: name of the requested dataset. :param pathlib: Whether to return the result as a Python pathlib object. + Setting to a Falsy value will return a py.path.local() + object, however this is deprecated and this parameter + will be completely removed in a later version. :return: A pathlib or py.path.local object pointing to the dataset, or False if the dataset is not available. """ From dbb1581cdb11d2cb3dc1f87283b46e51fc39811b Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 16 May 2025 16:47:20 +0100 Subject: [PATCH 3/8] Change tack: remove pathlib parameter completely --- dials_data/download.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/dials_data/download.py b/dials_data/download.py index 228013cd..9b205a23 100644 --- a/dials_data/download.py +++ b/dials_data/download.py @@ -7,13 +7,11 @@ import hashlib import os import tarfile -import warnings import zipfile from pathlib import Path from typing import Any from urllib.parse import urlparse -import py.path import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry @@ -300,35 +298,25 @@ def result_filter(self, result, **kwargs): """ return result - def __call__(self, test_data: str, pathlib=True, **kwargs): + def __call__(self, test_data: str, **kwargs): """ Return the location of a dataset, transparently downloading it if necessary and possible. The return value can be manipulated by overriding the result_filter function. :param test_data: name of the requested dataset. - :param pathlib: Whether to return the result as a Python pathlib object. - Setting to a Falsy value will return a py.path.local() - object, however this is deprecated and this parameter - will be completely removed in a later version. :return: A pathlib or py.path.local object pointing to the dataset, or False if the dataset is not available. """ + if "pathlib" in kwargs: + raise ValueError( + "The pathlib parameter has been removed. The " + "DataFetcher always returns pathlib.Path() objects now." + ) if test_data not in self._cache: self._cache[test_data] = self._attempt_fetch(test_data) - if not pathlib: - warnings.warn( - "This DataFetcher request will return a py.path.local() object. " - "This is deprecated and the default behaviour is to return a " - "pathlib.Path() object. In future the pathlib parameter will be " - "removed and a pathlib.Path() will always be returned.", - DeprecationWarning, - stacklevel=2, - ) if not self._cache[test_data]: return self.result_filter(result=False) - elif not pathlib: - return self.result_filter(result=py.path.local(self._cache[test_data])) return self.result_filter(result=self._cache[test_data]) def _attempt_fetch(self, test_data: str) -> Path | None: From 98db7d7aeef7f92ae17373cc453c97bb21685d6c Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 16 May 2025 17:08:54 +0100 Subject: [PATCH 4/8] Major version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dec8455b..42e0382b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dials_data" -version = "2.4.0" +version = "3.0.0" description = "DIALS Regression Data Manager" authors = [ { name = "DIALS development team", email = "dials-support@lists.sourceforge.net" }, From 491457da2ba7521a1b353d55bb83f284da066dfb Mon Sep 17 00:00:00 2001 From: David Waterman Date: Fri, 16 May 2025 17:10:03 +0100 Subject: [PATCH 5/8] Update author email --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 42e0382b..cb0bf15f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "dials_data" version = "3.0.0" description = "DIALS Regression Data Manager" authors = [ - { name = "DIALS development team", email = "dials-support@lists.sourceforge.net" }, + { name = "DIALS development team", email = "dials-user-group@jiscmail.ac.uk" }, ] license = { text = "BSD 3-Clause License" } classifiers = [ From 5c2ae5f6c8dd1a5339387801d8feaa6a18c5eb17 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Tue, 4 Nov 2025 19:52:27 +0000 Subject: [PATCH 6/8] CI trigger From cf38d4c5f5146c81f1bec8af61b7462f09b17ed3 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Tue, 4 Nov 2025 19:58:54 +0000 Subject: [PATCH 7/8] attempt to fix tests --- tests/test_dials_data.py | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/tests/test_dials_data.py b/tests/test_dials_data.py index f9a203ed..4e764d18 100644 --- a/tests/test_dials_data.py +++ b/tests/test_dials_data.py @@ -22,14 +22,14 @@ def test_repository_location(): def test_fetching_undefined_datasets_does_not_crash(): df = dials_data.download.DataFetcher(read_only=True) - assert df("aardvark", pathlib=True) is False + assert df("aardvark") is False def test_requests_for_future_datasets_can_be_intercepted(): df = dials_data.download.DataFetcher(read_only=True) df.result_filter = mock.Mock() df.result_filter.return_value = False - assert df("aardvark", pathlib=True) is False + assert df("aardvark") is False df.result_filter.assert_called_once_with(result=False) @@ -48,34 +48,7 @@ def test_datafetcher_constructs_py_path(fetcher, root): "dataset", pre_scan=True, read_only=False, verify=True ) - ds = df("dataset", pathlib=False) + ds = df("dataset") assert pathlib.Path(ds).resolve() == pathlib.Path("/tmp/root/dataset").resolve() assert isinstance(ds, py.path.local) fetcher.assert_called_once() - - -@mock.patch("dials_data.datasets.repository_location") -@mock.patch("dials_data.download.fetch_dataset") -def test_datafetcher_constructs_path(fetcher, root): - test_path = pathlib.Path("/tmp/root") - root.return_value = test_path - fetcher.return_value = True - - df = dials_data.download.DataFetcher(read_only=True) - ds = df("dataset", pathlib=True) - assert ds == test_path / "dataset" - - assert isinstance(ds, pathlib.Path) - fetcher.assert_called_once_with( - "dataset", pre_scan=True, read_only=False, verify=True - ) - - with pytest.warns(DeprecationWarning): - ds = df("dataset") - assert pathlib.Path(ds).resolve() == test_path.joinpath("dataset").resolve() - assert not isinstance( - ds, pathlib.Path - ) # default is currently to return py.path.local() - fetcher.assert_called_once_with( - "dataset", pre_scan=True, read_only=False, verify=True - ) From 3cbee3c195846012d715d5049d3db01c67b11603 Mon Sep 17 00:00:00 2001 From: David Waterman Date: Tue, 4 Nov 2025 20:02:13 +0000 Subject: [PATCH 8/8] Remove not-needed test --- tests/test_dials_data.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/test_dials_data.py b/tests/test_dials_data.py index 4e764d18..c33b3fd4 100644 --- a/tests/test_dials_data.py +++ b/tests/test_dials_data.py @@ -1,11 +1,7 @@ from __future__ import annotations -import pathlib from unittest import mock -import py -import pytest - import dials_data import dials_data.datasets import dials_data.download @@ -31,24 +27,3 @@ def test_requests_for_future_datasets_can_be_intercepted(): df.result_filter.return_value = False assert df("aardvark") is False df.result_filter.assert_called_once_with(result=False) - - -@mock.patch("dials_data.datasets.repository_location") -@mock.patch("dials_data.download.fetch_dataset") -def test_datafetcher_constructs_py_path(fetcher, root): - root.return_value = pathlib.Path("/tmp/root") - fetcher.return_value = True - - df = dials_data.download.DataFetcher(read_only=True) - with pytest.warns(DeprecationWarning): - ds = df("dataset") - assert pathlib.Path(ds).resolve() == pathlib.Path("/tmp/root/dataset").resolve() - assert isinstance(ds, py.path.local) - fetcher.assert_called_once_with( - "dataset", pre_scan=True, read_only=False, verify=True - ) - - ds = df("dataset") - assert pathlib.Path(ds).resolve() == pathlib.Path("/tmp/root/dataset").resolve() - assert isinstance(ds, py.path.local) - fetcher.assert_called_once()