Skip to content

Commit 22bae73

Browse files
authored
BUG: date-vs-datetime comparison with pyarrow dtypes (#62426)
1 parent 9100696 commit 22bae73

File tree

4 files changed

+92
-16
lines changed

4 files changed

+92
-16
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,8 @@ Datetimelike
914914
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
915915
- Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`)
916916
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
917+
- Bug in comparison between objects with ``np.datetime64`` dtype and ``timestamp[pyarrow]`` dtypes incorrectly raising ``TypeError`` (:issue:`60937`)
918+
- Bug in comparison between objects with pyarrow date dtype and ``timestamp[pyarrow]`` or ``np.datetime64`` dtype failing to consider these as non-comparable (:issue:`62157`)
917919
- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
918920
- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
919921
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

pandas/core/arrays/arrow/array.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
from __future__ import annotations
22

3+
from datetime import (
4+
date,
5+
datetime,
6+
)
37
import functools
48
import operator
59
from pathlib import Path
@@ -827,28 +831,46 @@ def __setstate__(self, state) -> None:
827831

828832
def _cmp_method(self, other, op) -> ArrowExtensionArray:
829833
pc_func = ARROW_CMP_FUNCS[op.__name__]
834+
ltype = self._pa_array.type
835+
830836
if isinstance(other, (ExtensionArray, np.ndarray, list)):
831-
try:
832-
result = pc_func(self._pa_array, self._box_pa(other))
833-
except pa.ArrowNotImplementedError:
834-
# TODO: could this be wrong if other is object dtype?
835-
# in which case we need to operate pointwise?
837+
boxed = self._box_pa(other)
838+
rtype = boxed.type
839+
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
840+
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
841+
):
842+
# GH#62157 match non-pyarrow behavior
836843
result = ops.invalid_comparison(self, other, op)
837844
result = pa.array(result, type=pa.bool_())
838-
elif is_scalar(other):
839-
try:
840-
result = pc_func(self._pa_array, self._box_pa(other))
841-
except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
842-
mask = isna(self) | isna(other)
843-
valid = ~mask
844-
result = np.zeros(len(self), dtype="bool")
845-
np_array = np.array(self)
845+
else:
846846
try:
847-
result[valid] = op(np_array[valid], other)
848-
except TypeError:
847+
result = pc_func(self._pa_array, boxed)
848+
except pa.ArrowNotImplementedError:
849+
# TODO: could this be wrong if other is object dtype?
850+
# in which case we need to operate pointwise?
849851
result = ops.invalid_comparison(self, other, op)
852+
result = pa.array(result, type=pa.bool_())
853+
elif is_scalar(other):
854+
if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
855+
type(other) is date and pa.types.is_timestamp(ltype)
856+
):
857+
# GH#62157 match non-pyarrow behavior
858+
result = ops.invalid_comparison(self, other, op)
850859
result = pa.array(result, type=pa.bool_())
851-
result = pc.if_else(valid, result, None)
860+
else:
861+
try:
862+
result = pc_func(self._pa_array, self._box_pa(other))
863+
except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
864+
mask = isna(self) | isna(other)
865+
valid = ~mask
866+
result = np.zeros(len(self), dtype="bool")
867+
np_array = np.array(self)
868+
try:
869+
result[valid] = op(np_array[valid], other)
870+
except TypeError:
871+
result = ops.invalid_comparison(self, other, op)
872+
result = pa.array(result, type=pa.bool_())
873+
result = pc.if_else(valid, result, None)
852874
else:
853875
raise NotImplementedError(
854876
f"{op.__name__} not implemented for {type(other)}"

pandas/core/arrays/datetimelike.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,8 @@ def _cmp_method(self, other, op):
971971
try:
972972
other = self._validate_comparison_value(other)
973973
except InvalidComparison:
974+
if hasattr(other, "dtype") and isinstance(other.dtype, ArrowDtype):
975+
return NotImplemented
974976
return invalid_comparison(self, other, op)
975977

976978
dtype = getattr(other, "dtype", None)

pandas/tests/extension/test_arrow.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3578,3 +3578,53 @@ def test_timestamp_dtype_matches_to_datetime():
35783578
expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")
35793579

35803580
tm.assert_series_equal(result, expected)
3581+
3582+
3583+
def test_timestamp_vs_dt64_comparison():
3584+
# GH#60937
3585+
left = pd.Series(["2016-01-01"], dtype="timestamp[ns][pyarrow]")
3586+
right = left.astype("datetime64[ns]")
3587+
3588+
result = left == right
3589+
expected = pd.Series([True], dtype="bool[pyarrow]")
3590+
tm.assert_series_equal(result, expected)
3591+
3592+
result = right == left
3593+
tm.assert_series_equal(result, expected)
3594+
3595+
3596+
# TODO: reuse assert_invalid_comparison?
3597+
def test_date_vs_timestamp_scalar_comparison():
3598+
# GH#62157 match non-pyarrow behavior
3599+
ser = pd.Series(["2016-01-01"], dtype="date32[pyarrow]")
3600+
ser2 = ser.astype("timestamp[ns][pyarrow]")
3601+
3602+
ts = ser2[0]
3603+
dt = ser[0]
3604+
3605+
# date dtype don't match a Timestamp object
3606+
assert not (ser == ts).any()
3607+
assert not (ts == ser).any()
3608+
3609+
# timestamp dtype doesn't match date object
3610+
assert not (ser2 == dt).any()
3611+
assert not (dt == ser2).any()
3612+
3613+
3614+
# TODO: reuse assert_invalid_comparison?
3615+
def test_date_vs_timestamp_array_comparison():
3616+
# GH#62157 match non-pyarrow behavior
3617+
# GH#
3618+
ser = pd.Series(["2016-01-01"], dtype="date32[pyarrow]")
3619+
ser2 = ser.astype("timestamp[ns][pyarrow]")
3620+
ser3 = ser.astype("datetime64[ns]")
3621+
3622+
assert not (ser == ser2).any()
3623+
assert not (ser2 == ser).any()
3624+
assert (ser != ser2).all()
3625+
assert (ser2 != ser).all()
3626+
3627+
assert not (ser == ser3).any()
3628+
assert not (ser3 == ser).any()
3629+
assert (ser != ser3).all()
3630+
assert (ser3 != ser).all()

0 commit comments

Comments
 (0)