Skip to content

Commit 0bc7ea3

Browse files
authored
BUG: Series.map with pyarrow timestamp/duration (#62278)
1 parent 46ede92 commit 0bc7ea3

File tree

6 files changed

+70
-1
lines changed

6 files changed

+70
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,8 @@ Indexing
969969
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
970970
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
971971
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
972+
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
973+
-
972974

973975
Missing
974976
^^^^^^^
@@ -1137,6 +1139,7 @@ Other
11371139
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
11381140
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
11391141
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
1142+
- Bug in :meth:`Series.map` with a ``timestamp[pyarrow]`` dtype or ``duration[pyarrow]`` dtype incorrectly returning all-``NaN`` entries (:issue:`61231`)
11401143
- Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`)
11411144
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
11421145
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`)
@@ -1151,6 +1154,7 @@ Other
11511154
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
11521155
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
11531156
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
1157+
-
11541158

11551159
.. ***DO NOT USE THIS SECTION***
11561160

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,6 +1616,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
16161616
if is_numeric_dtype(self.dtype):
16171617
return map_array(self.to_numpy(), mapper, na_action=na_action)
16181618
else:
1619+
# For "mM" cases, the super() method passes `self` without the
1620+
# to_numpy call, which inside map_array casts to ndarray[object].
1621+
# Without the to_numpy() call, NA is preserved instead of changed
1622+
# to None.
16191623
return super().map(mapper, na_action)
16201624

16211625
@doc(ExtensionArray.duplicated)

pandas/core/indexes/datetimes.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@
3333
)
3434

3535
from pandas.core.dtypes.common import is_scalar
36-
from pandas.core.dtypes.dtypes import DatetimeTZDtype
36+
from pandas.core.dtypes.dtypes import (
37+
ArrowDtype,
38+
DatetimeTZDtype,
39+
)
3740
from pandas.core.dtypes.generic import ABCSeries
3841
from pandas.core.dtypes.missing import is_valid_na_for_dtype
3942

@@ -384,6 +387,16 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
384387
"""
385388
Can we compare values of the given dtype to our own?
386389
"""
390+
if isinstance(dtype, ArrowDtype):
391+
# GH#62277
392+
if dtype.kind != "M":
393+
return False
394+
395+
pa_dtype = dtype.pyarrow_dtype
396+
if (pa_dtype.tz is None) ^ (self.tz is None):
397+
return False
398+
return True
399+
387400
if self.tz is not None:
388401
# If we have tz, we can compare to tzaware
389402
return isinstance(dtype, DatetimeTZDtype)

pandas/core/indexes/timedeltas.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
is_scalar,
2020
pandas_dtype,
2121
)
22+
from pandas.core.dtypes.dtypes import ArrowDtype
2223
from pandas.core.dtypes.generic import ABCSeries
2324

2425
from pandas.core.arrays.timedeltas import TimedeltaArray
@@ -194,6 +195,8 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
194195
"""
195196
Can we compare values of the given dtype to our own?
196197
"""
198+
if isinstance(dtype, ArrowDtype):
199+
return dtype.kind == "m"
197200
return lib.is_np_dtype(dtype, "m") # aka self._data._is_recognized_dtype
198201

199202
# -------------------------------------------------------------------

pandas/tests/indexes/datetimes/test_indexing.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from pandas._libs import index as libindex
1212
from pandas.compat.numpy import np_long
13+
import pandas.util._test_decorators as td
1314

1415
import pandas as pd
1516
from pandas import (
@@ -513,6 +514,26 @@ def test_contains_nonunique(self, vals):
513514

514515

515516
class TestGetIndexer:
517+
@td.skip_if_no("pyarrow")
518+
@pytest.mark.parametrize("as_td", [True, False])
519+
def test_get_indexer_pyarrow(self, as_td):
520+
# GH#62277
521+
index = date_range("2016-01-01", periods=3)
522+
target = index.astype("timestamp[ns][pyarrow]")[::-1]
523+
if as_td:
524+
# Test duration dtypes while we're here
525+
index = index - index[0]
526+
target = target - target[-1]
527+
528+
result = index.get_indexer(target)
529+
530+
expected = np.array([2, 1, 0], dtype=np.intp)
531+
tm.assert_numpy_array_equal(result, expected)
532+
533+
# Reversed op should work the same
534+
result2 = target.get_indexer(index)
535+
tm.assert_numpy_array_equal(result2, expected)
536+
516537
def test_get_indexer_date_objs(self):
517538
rng = date_range("1/1/2000", periods=20)
518539

pandas/tests/series/methods/test_map.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010

1111
from pandas.errors import Pandas4Warning
12+
import pandas.util._test_decorators as td
1213

1314
import pandas as pd
1415
from pandas import (
@@ -653,3 +654,26 @@ def test_map_engine_not_executor():
653654

654655
with pytest.raises(ValueError, match="Not a valid engine: 'something'"):
655656
s.map(lambda x: x, engine="something")
657+
658+
659+
@td.skip_if_no("pyarrow")
660+
@pytest.mark.parametrize("as_td", [True, False])
661+
def test_map_pyarrow_timestamp(as_td):
662+
# GH#61231
663+
dti = date_range("2018-01-01 00:00:00", "2018-01-07 00:00:00")
664+
ser = Series(dti, dtype="timestamp[ns][pyarrow]", name="a")
665+
if as_td:
666+
# duration dtype
667+
ser = ser - ser[0]
668+
669+
mapper = {date: i for i, date in enumerate(ser)}
670+
671+
res_series = ser.map(mapper)
672+
expected = Series(range(len(ser)), name="a", dtype="int64")
673+
tm.assert_series_equal(res_series, expected)
674+
675+
res_index = Index(ser).map(mapper)
676+
# For now (as of 2025-09-06) at least, we do inference on Index.map that
677+
# we don't for Series.map
678+
expected_index = Index(expected).astype("int64[pyarrow]")
679+
tm.assert_index_equal(res_index, expected_index)

0 commit comments

Comments
 (0)