pandas-dev · mroeschke · Sep 25, 2025 · Jul 26, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst
@@ -22,6 +22,8 @@ become the default string dtype in pandas 3.0. See
 
 Bug fixes
 ^^^^^^^^^
+- Fix :meth:`~Series.str.isdigit` to correctly recognize unicode superscript
+  characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
 - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
   "string" type in the JSON Table Schema for :class:`StringDtype` columns
   (:issue:`61889`)

diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
@@ -15,6 +15,7 @@
 from pandas.compat import (
     HAS_PYARROW,
     pa_version_under17p0,
+    pa_version_under21p0,
 )
 
 if HAS_PYARROW:
@@ -267,6 +268,12 @@ def _str_isdecimal(self):
         return self._convert_bool_result(result)
 
     def _str_isdigit(self):
+        if pa_version_under21p0:
+            # https://github.com/pandas-dev/pandas/issues/61466
+            res_list = self._apply_elementwise(str.isdigit)
+            return self._convert_bool_result(
+                pa.chunked_array(res_list, type=pa.bool_())
+            )
         result = pc.utf8_is_digit(self._pa_array)
         return self._convert_bool_result(result)
 

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -3602,16 +3602,26 @@ def casefold(self):
     Series.str.isupper : Check whether all characters are uppercase.
     Series.str.istitle : Check whether all characters are titlecase.
 
-    Examples
-    --------
+    Notes
+    -----
     Similar to ``str.isdecimal`` but also includes special digits, like
     superscripted and subscripted digits in unicode.
 
+    The exact behavior of this method, i.e. which unicode characters are
+    considered as digits, depends on the backend used for string operations,
+    and there can be small differences.
+    For example, Python considers the ³ superscript character as a digit, but
+    not the ⅕ fraction character, while PyArrow considers both as digits. For
+    simple (ascii) decimal numbers, the behaviour is consistent.
+
+    Examples
+    --------
+
     >>> s3 = pd.Series(['23', '³', '⅕', ''])
     >>> s3.str.isdigit()
     0     True
-    1    False
-    2    False
+    1     True
+    2     True
     3    False
     dtype: bool
     """

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import pa_version_under21p0
 from pandas.errors import Pandas4Warning
 
 from pandas import (
@@ -15,6 +16,7 @@
     Index,
     MultiIndex,
     Series,
+    StringDtype,
     option_context,
 )
 import pandas._testing as tm
@@ -249,8 +251,9 @@ def test_ismethods(method, expected, any_string_dtype):
 @pytest.mark.parametrize(
     "method, expected",
     [
-        ("isnumeric", [False, True, True, False, True, True, False]),
-        ("isdecimal", [False, True, False, False, False, True, False]),
+        ("isnumeric", [False, True, True, True, False, True, True, False]),
+        ("isdecimal", [False, True, False, False, False, False, True, False]),
+        ("isdigit", [False, True, True, False, False, False, True, False]),
     ],
 )
 def test_isnumeric_unicode(method, expected, any_string_dtype):
@@ -259,19 +262,35 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
     # 0xFF13: ３ Em 3  # noqa: RUF003
     ser = Series(
-        ["A", "3", "¼", "★", "፸", "３", "four"],  # noqa: RUF001
+        ["A", "3", "³", "¼", "★", "፸", "３", "four"],  # noqa: RUF001
         dtype=any_string_dtype,
     )
     expected_dtype = (
         "bool" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
     )
     expected = Series(expected, dtype=expected_dtype)
+    if (
+        method == "isdigit"
+        and isinstance(ser.dtype, StringDtype)
+        and ser.dtype.storage == "pyarrow"
+        and not pa_version_under21p0
+    ):
+        # known difference in behavior between python and pyarrow unicode handling
+        # pyarrow 21+ considers ¼ and ፸ as a digit, while python does not
+        expected.iloc[3] = True
+        expected.iloc[5] = True
+
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)
 
     # compare with standard library
-    expected = [getattr(item, method)() for item in ser]
-    assert list(result) == expected
+    # (only for non-pyarrow storage given the above differences)
+    if any_string_dtype == "object" or (
+        isinstance(any_string_dtype, StringDtype)
+        and any_string_dtype.storage == "python"
+    ):
+        expected = [getattr(item, method)() for item in ser]
+        assert list(result) == expected
 
 
 @pytest.mark.parametrize(