From 3661e548c55cec10acbfb48eb5644ee8c03d0906 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 11 Oct 2024 21:37:25 +0800 Subject: [PATCH 01/16] Refactor vectors_to_arrays and deprecate the array_to_datetime function --- pygmt/clib/conversion.py | 12 ++++++++++++ pygmt/clib/session.py | 20 +++++--------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 07f8756adcd..1db93768f51 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -224,6 +224,13 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: else: vec_dtype = str(getattr(vector, "dtype", "")) array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) + # Convert np.object_ to np.datetime64 or np.str_. + # If fails, then the array can't be recognized. + if array.dtype.type == np.object_: + try: + array = np.asarray(array, dtype=np.datetime64) + except ValueError: + array = np.asarray(array, dtype=np.str_) arrays.append(array) return arrays @@ -313,6 +320,11 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray: If the input array is not in legal datetime formats, raise a ValueError exception. + .. deprecated:: 0.14.0 + + The function is no longer used in the PyGMT project, but we keep this function + to document the supported datetime types. + Parameters ---------- array diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 903edfc12c7..eea6e0f6efc 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -18,7 +18,6 @@ import pandas as pd import xarray as xr from pygmt.clib.conversion import ( - array_to_datetime, dataarray_to_matrix, sequence_to_ctypes_array, strings_to_ctypes_array, @@ -854,22 +853,13 @@ def _check_dtype_and_dim(self, array, ndim): """ # Check that the array has the given number of dimensions if array.ndim != ndim: - raise GMTInvalidInput( - f"Expected a numpy {ndim}-D array, got {array.ndim}-D." - ) + msg = f"Expected a numpy {ndim}-D array, got {array.ndim}-D." + raise GMTInvalidInput(msg) # Check that the array has a valid/known data type if array.dtype.type not in DTYPES: - try: - if array.dtype.type is np.object_: - # Try to convert unknown object type to np.datetime64 - array = array_to_datetime(array) - else: - raise ValueError - except ValueError as e: - raise GMTInvalidInput( - f"Unsupported numpy data type '{array.dtype.type}'." - ) from e + msg = f"Unsupported numpy data type '{array.dtype.type}'." + raise GMTInvalidInput(msg) return self[DTYPES[array.dtype.type]] def put_vector(self, dataset, column, vector): @@ -917,7 +907,7 @@ def put_vector(self, dataset, column, vector): gmt_type = self._check_dtype_and_dim(vector, ndim=1) if gmt_type in {self["GMT_TEXT"], self["GMT_DATETIME"]}: if gmt_type == self["GMT_DATETIME"]: - vector = np.datetime_as_string(array_to_datetime(vector)) + vector = np.datetime_as_string(vector) vector_pointer = strings_to_ctypes_array(vector) else: vector_pointer = vector.ctypes.data_as(ctp.c_void_p) From 20b921574d72da7955c14c70a8d507cefe9ca352 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sun, 13 Oct 2024 22:56:30 +0800 Subject: [PATCH 02/16] No need to use pd.api.types.is_string_dtype anymore --- pygmt/clib/conversion.py | 4 ++-- pygmt/clib/session.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 1db93768f51..4c32323d7ff 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -228,9 +228,9 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: # If fails, then the array can't be recognized. if array.dtype.type == np.object_: try: - array = np.asarray(array, dtype=np.datetime64) + array = np.ascontiguousarray(array, dtype=np.datetime64) except ValueError: - array = np.asarray(array, dtype=np.str_) + array = np.ascontiguousarray(array, dtype=np.str_) arrays.append(array) return arrays diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index eea6e0f6efc..65b09252448 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1378,7 +1378,7 @@ def virtualfile_from_vectors(self, *vectors): # Assumes that first 2 columns contains coordinates like longitude # latitude, or datetime string types. for col, array in enumerate(arrays[2:]): - if pd.api.types.is_string_dtype(array.dtype): + if array.dtype.type == np.str_: columns = col + 2 break From 83673cf75530fe307089a30328e2e1e54055eee2 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 18:02:10 +0800 Subject: [PATCH 03/16] Add tests for vectors_to_arrays --- pygmt/clib/conversion.py | 37 +--------- pygmt/tests/test_clib_conversion.py | 108 ++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 36 deletions(-) create mode 100644 pygmt/tests/test_clib_conversion.py diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 4c32323d7ff..dc3b5b0e809 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -157,7 +157,6 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: >>> import numpy as np >>> import pandas as pd - >>> data = np.array([[1, 2], [3, 4], [5, 6]]) >>> vectors = [data[:, 0], data[:, 1], pd.Series(data=[-1, -2, -3])] >>> all(i.flags.c_contiguous for i in vectors) False @@ -168,41 +167,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: True >>> all(isinstance(i, np.ndarray) for i in arrays) True - - >>> data = [[1, 2], (3, 4), range(5, 7)] - >>> all(isinstance(i, np.ndarray) for i in vectors_to_arrays(data)) - True - - >>> # Sequence of scalars are converted to 1-D arrays - >>> data = vectors_to_arrays([1, 2, 3.0]) - >>> data - [array([1]), array([2]), array([3.])] - >>> [i.ndim for i in data] # Check that they are 1-D arrays - [1, 1, 1] - - >>> series = pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype()) - >>> vectors_to_arrays([series]) - [array([ 0., 4., nan, 8., 6.])] - - >>> import datetime - >>> import pytest - >>> pa = pytest.importorskip("pyarrow") - >>> vectors = [ - ... pd.Series( - ... data=[datetime.date(2020, 1, 1), datetime.date(2021, 12, 31)], - ... dtype="date32[day][pyarrow]", - ... ), - ... pd.Series( - ... data=[datetime.date(2022, 1, 1), datetime.date(2023, 12, 31)], - ... dtype="date64[ms][pyarrow]", - ... ), - ... ] - >>> arrays = vectors_to_arrays(vectors) - >>> all(a.flags.c_contiguous for a in arrays) - True - >>> all(isinstance(a, np.ndarray) for a in arrays) - True - >>> all(isinstance(a.dtype, np.dtypes.DateTime64DType) for a in arrays) + >>> all(i.ndim == 1 for i in arrays) True """ dtypes = { diff --git a/pygmt/tests/test_clib_conversion.py b/pygmt/tests/test_clib_conversion.py new file mode 100644 index 00000000000..79252744310 --- /dev/null +++ b/pygmt/tests/test_clib_conversion.py @@ -0,0 +1,108 @@ +""" +Test the functions in the clib.conversion module. +""" + +import datetime +import importlib + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest +from pygmt.clib.conversion import vectors_to_arrays + +try: + importlib.util.find_spec("pyarrow") + _HAS_PYARROW = True +except ImportError: + _HAS_PYARROW = False + + +def _check_arrays(arrays): + """ + A helper function to check the results of vectors_to_arrays. + + - Check if all arrays are C-contiguous + - Check if all arrays are numpy arrays + - Check if all arrays are 1-D + """ + # Check if all arrays are C-contiguous + assert all(i.flags.c_contiguous for i in arrays) + # Check if all arrays are numpy arrays + assert all(isinstance(i, np.ndarray) for i in arrays) + # Check if all arrays are 1-D + assert all(i.ndim == 1 for i in arrays) + + +@pytest.mark.parametrize( + "vectors", + [ + pytest.param([[1, 2], (3, 4), range(5, 7)], id="python_objects"), + pytest.param( + [np.array([1, 2]), np.array([3, 4]), np.array(range(5, 7))], + id="numpy_arrays", + ), + pytest.param([[1, 2], np.array([3, 4]), range(5, 7)], id="mixed"), + pytest.param([1, 2, 3.0], id="scalars"), + ], +) +def test_vectors_to_arrays(vectors): + """ + Test the vectors_to_arrays function for various input types. + """ + arrays = vectors_to_arrays(vectors) + _check_arrays(arrays) + + +def test_vectors_to_arrays_not_c_contiguous(): + """ + Test the vectors_to_arrays function with numpy arrays that are not C-contiguous. + """ + data = np.array([[1, 2], [3, 4], [5, 6]]) + vectors = [data[:, 0], data[:, 1]] + assert all(not i.flags.c_contiguous for i in vectors) + arrays = vectors_to_arrays(vectors) + _check_arrays(arrays) + + +def test_vectors_to_arrays_pandas_nan(): + """ + Test the vectors_to_arrays function with pandas Series containing NaNs. + """ + vectors = [pd.Series(data=[0, 4, pd.NA, 8, 6], dtype=pd.Int32Dtype())] + arrays = vectors_to_arrays(vectors) + npt.assert_equal(arrays[0], np.array([0, 4, np.nan, 8, 6], dtype=np.float64)) + _check_arrays(arrays) + + +def test_vectors_to_arrays_pandas_string(): + """ + Test the vectors_to_arrays function with pandas Series containing datetime64. + """ + vectors = [ + pd.Series(["abc", "defhig"]), + pd.Series(["abcdef", "123456"], dtype="string"), + ] + arrays = vectors_to_arrays(vectors) + assert all(isinstance(i.dtype, np.dtypes.StrDType) for i in arrays) + _check_arrays(arrays) + + +@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed.") +def test_vectors_to_arrays_pyarrow_datetime(): + """ + Test the vectors_to_arrays function with pyarrow arrays containing datetime64. + """ + vectors = [ + pd.Series( + data=[datetime.date(2020, 1, 1), datetime.date(2021, 12, 31)], + dtype="date32[day][pyarrow]", + ), + pd.Series( + data=[datetime.date(2022, 1, 1), datetime.date(2023, 12, 31)], + dtype="date64[ms][pyarrow]", + ), + ] + arrays = vectors_to_arrays(vectors) + assert all(isinstance(i.dtype, np.dtypes.DateTime64DType) for i in arrays) + _check_arrays(arrays) From 56a084172940d50ca1eec4d1076c934ae69da8a7 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 18:44:07 +0800 Subject: [PATCH 04/16] Rename test_clib_conversion.py to test_clib_vectors_to_arrays.py --- .../{test_clib_conversion.py => test_clib_vectors_to_arrays.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pygmt/tests/{test_clib_conversion.py => test_clib_vectors_to_arrays.py} (100%) diff --git a/pygmt/tests/test_clib_conversion.py b/pygmt/tests/test_clib_vectors_to_arrays.py similarity index 100% rename from pygmt/tests/test_clib_conversion.py rename to pygmt/tests/test_clib_vectors_to_arrays.py From 6338cde7c0f7d79310eb44ba35d99372987765c5 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 18:46:15 +0800 Subject: [PATCH 05/16] Add one line of code back --- pygmt/clib/conversion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index dc3b5b0e809..73c621f8ba3 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -157,6 +157,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: >>> import numpy as np >>> import pandas as pd + >>> data = np.array([[1, 2], [3, 4], [5, 6]]) >>> vectors = [data[:, 0], data[:, 1], pd.Series(data=[-1, -2, -3])] >>> all(i.flags.c_contiguous for i in vectors) False From 18645561f90501df1c6c2290168ced00fa646005 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 18:50:30 +0800 Subject: [PATCH 06/16] Explicitly checking array.dtype.type --- pygmt/tests/test_clib_vectors_to_arrays.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index 79252744310..53d8f8e0c0f 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -84,7 +84,7 @@ def test_vectors_to_arrays_pandas_string(): pd.Series(["abcdef", "123456"], dtype="string"), ] arrays = vectors_to_arrays(vectors) - assert all(isinstance(i.dtype, np.dtypes.StrDType) for i in arrays) + assert all(i.dtype.type == np.str_ for i in arrays) _check_arrays(arrays) @@ -104,5 +104,5 @@ def test_vectors_to_arrays_pyarrow_datetime(): ), ] arrays = vectors_to_arrays(vectors) - assert all(isinstance(i.dtype, np.dtypes.DateTime64DType) for i in arrays) + assert all(i.dtype.type == np.datetime64 for i in arrays) _check_arrays(arrays) From 54160bf5e17fff9b4f735321b3c2f59b0a76cdc5 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 19:09:02 +0800 Subject: [PATCH 07/16] Correctly skip the tests if pyarrow is not installed --- pygmt/tests/test_clib_vectors_to_arrays.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index 53d8f8e0c0f..d17359b24e2 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -11,11 +11,7 @@ import pytest from pygmt.clib.conversion import vectors_to_arrays -try: - importlib.util.find_spec("pyarrow") - _HAS_PYARROW = True -except ImportError: - _HAS_PYARROW = False +_HAS_PYARROW = bool(importlib.util.find_spec("pyarrow")) def _check_arrays(arrays): From f4e1a5f738e4138bfc8972a6a415d1d2417bb9ac Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 20:15:18 +0800 Subject: [PATCH 08/16] Explicitly specify how to convert pandas/pyarrow string dtype to numpy string dtype --- pygmt/clib/conversion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 73c621f8ba3..2b50c0d2413 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -174,6 +174,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: dtypes = { "date32[day][pyarrow]": np.datetime64, "date64[ms][pyarrow]": np.datetime64, + "string": np.str_, } arrays = [] for vector in vectors: @@ -188,7 +189,8 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: # we can remove the workaround in PyGMT v0.17.0. array = np.ascontiguousarray(vector.astype(float)) else: - vec_dtype = str(getattr(vector, "dtype", "")) + # NumPy/Pandas uses "dtype" and PyArrow uses "type". + vec_dtype = str(getattr(vector, "dtype", getattr(vector, "type", ""))) array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) # Convert np.object_ to np.datetime64 or np.str_. # If fails, then the array can't be recognized. From ed3be20b97ab9cc9e58aaeaa2bd9ef6a55055154 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 14 Oct 2024 20:17:10 +0800 Subject: [PATCH 09/16] Ensure the resulting numpy dtypes are supported by GMT --- pygmt/tests/test_clib_vectors_to_arrays.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index d17359b24e2..39706e01d2c 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -10,6 +10,7 @@ import pandas as pd import pytest from pygmt.clib.conversion import vectors_to_arrays +from pygmt.clib.session import DTYPES _HAS_PYARROW = bool(importlib.util.find_spec("pyarrow")) @@ -28,6 +29,8 @@ def _check_arrays(arrays): assert all(isinstance(i, np.ndarray) for i in arrays) # Check if all arrays are 1-D assert all(i.ndim == 1 for i in arrays) + # Check if all numpy dtypes can be recognized by GMT + assert all(i.dtype.type in DTYPES for i in arrays) @pytest.mark.parametrize( From e26afbf5fa6cb55c12f72390cc09127042f65a9c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 17 Oct 2024 16:52:39 +0800 Subject: [PATCH 10/16] Revert any changes that enhances the conversion process --- pygmt/clib/conversion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 2b50c0d2413..73c621f8ba3 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -174,7 +174,6 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: dtypes = { "date32[day][pyarrow]": np.datetime64, "date64[ms][pyarrow]": np.datetime64, - "string": np.str_, } arrays = [] for vector in vectors: @@ -189,8 +188,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: # we can remove the workaround in PyGMT v0.17.0. array = np.ascontiguousarray(vector.astype(float)) else: - # NumPy/Pandas uses "dtype" and PyArrow uses "type". - vec_dtype = str(getattr(vector, "dtype", getattr(vector, "type", ""))) + vec_dtype = str(getattr(vector, "dtype", "")) array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) # Convert np.object_ to np.datetime64 or np.str_. # If fails, then the array can't be recognized. From be3c93e15fc0e539a39e834aacf9d0d238a6cba3 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 17 Oct 2024 17:40:03 +0800 Subject: [PATCH 11/16] Rename array_to_datetime to _array_to_datetime --- pygmt/clib/conversion.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 73c621f8ba3..e7efbf9cef3 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -280,7 +280,7 @@ def strings_to_ctypes_array(strings: Sequence[str]) -> ctp.Array: return (ctp.c_char_p * len(strings))(*[s.encode() for s in strings]) -def array_to_datetime(array: Sequence[Any]) -> np.ndarray: +def _array_to_datetime(array: Sequence[Any]) -> np.ndarray: """ Convert a 1-D datetime array from various types into numpy.datetime64. @@ -321,20 +321,20 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray: ... ["2010-06-01", "2011-06-01T12", "2012-01-01T12:34:56"], ... dtype="datetime64[ns]", ... ) - >>> array_to_datetime(x) + >>> _array_to_datetime(x) array(['2010-06-01T00:00:00.000000000', '2011-06-01T12:00:00.000000000', '2012-01-01T12:34:56.000000000'], dtype='datetime64[ns]') >>> # pandas.DateTimeIndex array >>> import pandas as pd >>> x = pd.date_range("2013", freq="YS", periods=3) - >>> array_to_datetime(x) + >>> _array_to_datetime(x) array(['2013-01-01T00:00:00.000000000', '2014-01-01T00:00:00.000000000', '2015-01-01T00:00:00.000000000'], dtype='datetime64[ns]') >>> # Python's built-in date and datetime >>> x = [datetime.date(2018, 1, 1), datetime.datetime(2019, 1, 1)] - >>> array_to_datetime(x) + >>> _array_to_datetime(x) array(['2018-01-01T00:00:00.000000', '2019-01-01T00:00:00.000000'], dtype='datetime64[us]') @@ -345,7 +345,7 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray: ... "2018-03-01", ... "2018-04-01T01:02:03", ... ] - >>> array_to_datetime(x) + >>> _array_to_datetime(x) array(['2018-01-01T00:00:00', '2018-02-01T00:00:00', '2018-03-01T00:00:00', '2018-04-01T01:02:03'], dtype='datetime64[s]') @@ -356,7 +356,7 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray: ... np.datetime64("2018-01-01"), ... datetime.datetime(2018, 1, 1), ... ] - >>> array_to_datetime(x) + >>> _array_to_datetime(x) array(['2018-01-01T00:00:00.000000', '2018-01-01T00:00:00.000000', '2018-01-01T00:00:00.000000'], dtype='datetime64[us]') """ From 8f99a977c9f94182640738784947923b0c5e58e4 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Sat, 19 Oct 2024 11:53:22 +0800 Subject: [PATCH 12/16] Some numpy dtypes like np.float16 can't be recognized by GMT --- pygmt/tests/test_clib_vectors_to_arrays.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index 39706e01d2c..d17359b24e2 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -10,7 +10,6 @@ import pandas as pd import pytest from pygmt.clib.conversion import vectors_to_arrays -from pygmt.clib.session import DTYPES _HAS_PYARROW = bool(importlib.util.find_spec("pyarrow")) @@ -29,8 +28,6 @@ def _check_arrays(arrays): assert all(isinstance(i, np.ndarray) for i in arrays) # Check if all arrays are 1-D assert all(i.ndim == 1 for i in arrays) - # Check if all numpy dtypes can be recognized by GMT - assert all(i.dtype.type in DTYPES for i in arrays) @pytest.mark.parametrize( From d8777e5e8f4878a463a4524f65033c95a1352dc6 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 24 Oct 2024 11:10:46 +0800 Subject: [PATCH 13/16] Check three variants for string dtypes --- pygmt/tests/test_clib_vectors_to_arrays.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index 91efcd95a17..85e89a54ca0 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -10,6 +10,7 @@ import pandas as pd import pytest from pygmt.clib.conversion import vectors_to_arrays +from pygmt.helpers.testing import skip_if_no _HAS_PYARROW = bool(importlib.util.find_spec("pyarrow")) @@ -80,13 +81,21 @@ def test_vectors_to_arrays_pandas_nan(): _check_arrays(arrays) -def test_vectors_to_arrays_pandas_string(): +@pytest.mark.parametrize( + "dtype", + [ + "string[python]", + pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")), + pytest.param("string[pyarrow_numpy]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_vectors_to_arrays_pandas_string(dtype): """ - Test the vectors_to_arrays function with pandas Series containing datetime64. + Test the vectors_to_arrays function with pandas strings. """ vectors = [ - pd.Series(["abc", "defhig"]), - pd.Series(["abcdef", "123456"], dtype="string"), + pd.Series(["abc", "defg"], dtype=dtype), + pd.Series(["hijklmn", "123456"], dtype=dtype), ] arrays = vectors_to_arrays(vectors) assert all(i.dtype.type == np.str_ for i in arrays) From 38b839aaa1ff8eda7d89b546f95459d3e4c246b8 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Mon, 28 Oct 2024 12:33:28 +0800 Subject: [PATCH 14/16] Also check TypeError --- pygmt/clib/conversion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index 2dc48b1765e..3944938fd95 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -190,12 +190,12 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]: else: vec_dtype = str(getattr(vector, "dtype", "")) array = np.ascontiguousarray(vector, dtype=dtypes.get(vec_dtype)) - # Convert np.object_ to np.datetime64 or np.str_. - # If fails, then the array can't be recognized. + # Convert np.object_ to np.datetime64 or np.str_. If fails, then the array can't + # be recognized. if array.dtype.type == np.object_: try: array = np.ascontiguousarray(array, dtype=np.datetime64) - except ValueError: + except (ValueError, TypeError): array = np.ascontiguousarray(array, dtype=np.str_) arrays.append(array) return arrays @@ -289,7 +289,7 @@ def _array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray: .. deprecated:: 0.14.0 The function is no longer used in the PyGMT project, but we keep this function - to document the supported datetime types. + to docuemnt and test the supported datetime types. Parameters ---------- From bccae945bba4a7fdd6e9db55cd3cf59a9bd59a42 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 15 Nov 2024 15:14:09 +0800 Subject: [PATCH 15/16] Remove the tests for pandas string dtypes --- pygmt/tests/test_clib_vectors_to_arrays.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/pygmt/tests/test_clib_vectors_to_arrays.py b/pygmt/tests/test_clib_vectors_to_arrays.py index 85e89a54ca0..e59690069ca 100644 --- a/pygmt/tests/test_clib_vectors_to_arrays.py +++ b/pygmt/tests/test_clib_vectors_to_arrays.py @@ -10,7 +10,6 @@ import pandas as pd import pytest from pygmt.clib.conversion import vectors_to_arrays -from pygmt.helpers.testing import skip_if_no _HAS_PYARROW = bool(importlib.util.find_spec("pyarrow")) @@ -81,27 +80,6 @@ def test_vectors_to_arrays_pandas_nan(): _check_arrays(arrays) -@pytest.mark.parametrize( - "dtype", - [ - "string[python]", - pytest.param("string[pyarrow]", marks=skip_if_no(package="pyarrow")), - pytest.param("string[pyarrow_numpy]", marks=skip_if_no(package="pyarrow")), - ], -) -def test_vectors_to_arrays_pandas_string(dtype): - """ - Test the vectors_to_arrays function with pandas strings. - """ - vectors = [ - pd.Series(["abc", "defg"], dtype=dtype), - pd.Series(["hijklmn", "123456"], dtype=dtype), - ] - arrays = vectors_to_arrays(vectors) - assert all(i.dtype.type == np.str_ for i in arrays) - _check_arrays(arrays) - - @pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed.") def test_vectors_to_arrays_pyarrow_datetime(): """ From a4103579c1b485c855b30475dcbe2ed1bf9683a1 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Thu, 28 Nov 2024 15:29:30 +0800 Subject: [PATCH 16/16] Remove the array_to_datetime function --- pygmt/clib/conversion.py | 83 ---------------------------------------- 1 file changed, 83 deletions(-) diff --git a/pygmt/clib/conversion.py b/pygmt/clib/conversion.py index d8125133d0b..464595940b0 100644 --- a/pygmt/clib/conversion.py +++ b/pygmt/clib/conversion.py @@ -320,86 +320,3 @@ def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array: ['first', 'second', 'third'] """ return (ctp.c_char_p * len(strings))(*[s.encode() for s in strings]) - - -def _array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray: - """ - Convert a 1-D datetime array from various types into numpy.datetime64. - - If the input array is not in legal datetime formats, raise a ValueError exception. - - .. deprecated:: 0.14.0 - - The function is no longer used in the PyGMT project, but we keep this function - to docuemnt and test the supported datetime types. - - Parameters - ---------- - array - The input datetime array in various formats. - - Supported types: - - - str - - numpy.datetime64 - - pandas.DateTimeIndex - - datetime.datetime and datetime.date - - Returns - ------- - array - 1-D datetime array in numpy.datetime64. - - Raises - ------ - ValueError - If the datetime string is invalid. - - Examples - -------- - >>> import datetime - >>> # numpy.datetime64 array - >>> x = np.array( - ... ["2010-06-01", "2011-06-01T12", "2012-01-01T12:34:56"], - ... dtype="datetime64[ns]", - ... ) - >>> _array_to_datetime(x) - array(['2010-06-01T00:00:00.000000000', '2011-06-01T12:00:00.000000000', - '2012-01-01T12:34:56.000000000'], dtype='datetime64[ns]') - - >>> # pandas.DateTimeIndex array - >>> import pandas as pd - >>> x = pd.date_range("2013", freq="YS", periods=3) - >>> _array_to_datetime(x) - array(['2013-01-01T00:00:00.000000000', '2014-01-01T00:00:00.000000000', - '2015-01-01T00:00:00.000000000'], dtype='datetime64[ns]') - - >>> # Python's built-in date and datetime - >>> x = [datetime.date(2018, 1, 1), datetime.datetime(2019, 1, 1)] - >>> _array_to_datetime(x) - array(['2018-01-01T00:00:00.000000', '2019-01-01T00:00:00.000000'], - dtype='datetime64[us]') - - >>> # Raw datetime strings in various format - >>> x = [ - ... "2018", - ... "2018-02", - ... "2018-03-01", - ... "2018-04-01T01:02:03", - ... ] - >>> _array_to_datetime(x) - array(['2018-01-01T00:00:00', '2018-02-01T00:00:00', - '2018-03-01T00:00:00', '2018-04-01T01:02:03'], - dtype='datetime64[s]') - - >>> # Mixed datetime types - >>> x = [ - ... "2018-01-01", - ... np.datetime64("2018-01-01"), - ... datetime.datetime(2018, 1, 1), - ... ] - >>> _array_to_datetime(x) - array(['2018-01-01T00:00:00.000000', '2018-01-01T00:00:00.000000', - '2018-01-01T00:00:00.000000'], dtype='datetime64[us]') - """ - return np.asarray(array, dtype=np.datetime64)