diff --git a/pandas/__init__.py b/pandas/__init__.py index cc786d1141c48..209f606f7039d 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -134,6 +134,7 @@ from pandas import api, arrays, errors, io, plotting, tseries from pandas import testing from pandas.util._print_versions import show_versions +from pandas._libs.lib import Scalar from pandas.io.api import ( # excel @@ -268,6 +269,7 @@ "PeriodDtype", "PeriodIndex", "RangeIndex", + "Scalar", "Series", "SparseDtype", "StringDtype", diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..988f2e882da64 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -34,6 +34,9 @@ class _NoDefault(Enum): no_default: Final = _NoDefault.no_default NoDefault: TypeAlias = Literal[_NoDefault.no_default] +class Scalar: + item: object + i8max: int u8max: int diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 83a1b09f00a11..299d945672235 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -212,15 +212,18 @@ def is_scalar(val: object) -> bool: """ # Start with C-optimized checks - if (cnp.PyArray_IsAnyScalar(val) - # PyArray_IsAnyScalar is always False for bytearrays on Py3 - or PyDate_Check(val) - or PyDelta_Check(val) - or PyTime_Check(val) - # We differ from numpy, which claims that None is not scalar; - # see np.isscalar - or val is C_NA - or val is None): + if ( + cnp.PyArray_IsAnyScalar(val) + # PyArray_IsAnyScalar is always False for bytearrays on Py3 + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + # We differ from numpy, which claims that None is not scalar; + # see np.isscalar + or val is C_NA + or val is None + or type(val) is Scalar + ): return True # Next use C-optimized checks to exclude common non-scalars before falling @@ -3297,3 +3300,15 @@ def is_np_dtype(object dtype, str kinds=None) -> bool: if kinds is None: return True return dtype.kind in kinds + + +cdef class Scalar: + """ + Class for wrapping list-like objects to indicate they should be treated + as scalars for e.g. arithmetic operations. + """ + cdef: + readonly object item + + def __cinit__(self, obj): + self.item = obj diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..d312727fd4e9a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -126,6 +126,15 @@ def _cat_compare_op(op): @unpack_zerodim_and_defer(opname) def func(self, other): + if is_list_like(other) and not isinstance(other, (np.ndarray, ExtensionArray)): + warnings.warn( + "Comparison of Categorical to list-like objects intended " + "to be treated as scalars is deprecated. Wrap the scalar in " + "pd.Scalar(item) before comparing instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + hashable = is_hashable(other) if is_list_like(other) and len(other) != len(self) and not hashable: # in hashable case we may have a tuple that is itself a category @@ -158,7 +167,10 @@ def func(self, other): ret[mask] = fill_value return ret - if hashable: + if hashable or isinstance(other, lib.Scalar): + if isinstance(other, lib.Scalar): + other = other.item + if other in self.categories: i = self._unbox_scalar(other) ret = op(self._codes, i) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 849a81eaf56d9..3c9de7df64544 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -96,6 +96,7 @@ class TestPDApi(Base): "Float32Dtype", "Float64Dtype", "NamedAgg", + "Scalar", ] # these are already deprecated; awaiting removal diff --git a/pandas/tests/arithmetic/test_categorical.py b/pandas/tests/arithmetic/test_categorical.py index d6f3a13ce6705..c1d7942a3e696 100644 --- a/pandas/tests/arithmetic/test_categorical.py +++ b/pandas/tests/arithmetic/test_categorical.py @@ -1,7 +1,10 @@ import numpy as np +from pandas.errors import Pandas4Warning + from pandas import ( Categorical, + Scalar, Series, ) import pandas._testing as tm @@ -18,8 +21,14 @@ def test_categorical_tuple_equality(self): # GH 18050 ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)]) expected = Series([True, False, True, False, False]) + result = ser == (0, 0) tm.assert_series_equal(result, expected) - result = ser.astype("category") == (0, 0) + msg = "Comparison of Categorical to list-like objects" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = ser.astype("category") == (0, 0) + tm.assert_series_equal(result, expected) + + result = ser.astype("category") == Scalar((0, 0)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index dbc6cc7715744..503fc27ca19cd 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + import pandas as pd from pandas import ( Categorical, @@ -202,11 +204,21 @@ def test_comparison_with_tuple(self): expected = np.array([True, False, False, False], dtype=bool) tm.assert_numpy_array_equal(result, expected) - result = cat == (0, 1) + msg = "Comparison of Categorical to list-like objects" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = cat == (0, 1) expected = np.array([False, True, False, True], dtype=bool) tm.assert_numpy_array_equal(result, expected) - result = cat != (0, 1) + result = cat == pd.Scalar((0, 1)) + tm.assert_numpy_array_equal(result, expected) + + msg = "Comparison of Categorical to list-like objects" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + result = cat != (0, 1) + tm.assert_numpy_array_equal(result, ~expected) + + result = cat != pd.Scalar((0, 1)) tm.assert_numpy_array_equal(result, ~expected) @pytest.mark.filterwarnings("ignore::RuntimeWarning") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0955912e12c8..0d87d654dbf61 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -260,6 +260,14 @@ def test_is_list_like_native_container_types(): assert not inference.is_list_like(tuple[str]) +def test_scalar_list_not_listlike(): + item = lib.Scalar([0, 1]) + assert not inference.is_list_like(item) + + tup = lib.Scalar((0, 1)) + assert not inference.is_list_like(tup) + + def test_is_sequence(): is_seq = inference.is_sequence assert is_seq((1, 2)) @@ -1892,6 +1900,10 @@ def test_is_timedelta(self): class TestIsScalar: + def test_is_scalar_scalar_class(self): + assert is_scalar(lib.Scalar([0, 1])) + assert is_scalar(lib.Scalar((0, 1))) + def test_is_scalar_builtin_scalars(self): assert is_scalar(None) assert is_scalar(True) diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index f619ba4dd204b..82529ee5123e9 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -14,6 +14,15 @@ import pandas._testing as tm +class TestScalar: + def test_scalar_immutable(self): + scalar = lib.Scalar("foo") + + msg = "attribute 'item' of 'pandas._libs.lib.Scalar' objects is not writable" + with pytest.raises(AttributeError, match=msg): + scalar.item = 2 + + class TestMisc: def test_max_len_string_array(self): arr = a = np.array(["foo", "b", np.nan], dtype="object")