Merge branch 'pandas-dev:main' into main

hasanrashid · web-flow · commit 147505ab2471 · 2025-08-27T20:25:02.000-04:00
diff --git a/doc/source/user_guide/migration-3-strings.rst b/doc/source/user_guide/migration-3-strings.rst
@@ -315,6 +315,37 @@ the :meth:`~pandas.Series.str.decode` method now has a ``dtype`` parameter to be
 able to specify object dtype instead of the default of string dtype for this use
 case.
 
+:meth:`Series.values` now returns an :class:`~pandas.api.extensions.ExtensionArray`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With object dtype, using ``.values`` on a Series will return the underlying NumPy array.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", np.nan], dtype="object")
+   >>> type(ser.values)
+   <class 'numpy.ndarray'>
+
+However with the new string dtype, the underlying ExtensionArray is returned instead.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
+   >>> ser.values
+   <ArrowStringArray>
+   ['a', 'b', nan]
+   Length: 3, dtype: str
+
+If your code requires a NumPy array, you should use :meth:`Series.to_numpy`.
+
+.. code-block:: python
+
+   >>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
+   >>> ser.to_numpy()
+   ['a' 'b' nan]
+
+In general, you should always prefer :meth:`Series.to_numpy` to get a NumPy array or :meth:`Series.array` to get an ExtensionArray over using :meth:`Series.values`.
+
 Notable bug fixes
 ~~~~~~~~~~~~~~~~~
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -71,7 +71,6 @@
 )
 from pandas.util._exceptions import (
     find_stack_level,
-    rewrite_warning,
 )
 from pandas.util._validators import (
     validate_ascending,
@@ -11926,25 +11925,13 @@ def _get_data() -> DataFrame:
                     row_index = np.tile(np.arange(nrows), ncols)
                     col_index = np.repeat(np.arange(ncols), nrows)
                     ser = Series(arr, index=col_index, copy=False)
-                    # GroupBy will raise a warning with SeriesGroupBy as the object,
-                    # likely confusing users
-                    with rewrite_warning(
-                        target_message=(
-                            f"The behavior of SeriesGroupBy.{name} with all-NA values"
-                        ),
-                        target_category=FutureWarning,
-                        new_message=(
-                            f"The behavior of {type(self).__name__}.{name} with all-NA "
-                            "values, or any-NA and skipna=False, is deprecated. In "
-                            "a future version this will raise ValueError"
-                        ),
-                    ):
-                        result = ser.groupby(row_index).agg(name, **kwds)
+                    if name == "all":
+                        # Behavior here appears incorrect; preserving
+                        # for backwards compatibility for now.
+                        # See https://github.com/pandas-dev/pandas/issues/57171
+                        skipna = True
+                    result = ser.groupby(row_index).agg(name, **kwds, skipna=skipna)
                     result.index = df.index
-                    if not skipna and name not in ("any", "all"):
-                        mask = df.isna().to_numpy(dtype=np.bool_).any(axis=1)
-                        other = -1 if name in ("idxmax", "idxmin") else lib.no_default
-                        result = result.mask(mask, other)
                     return result
 
             df = df.T
@@ -13258,13 +13245,11 @@ def idxmin(
         # indices will always be np.ndarray since axis is not N
 
         if (indices == -1).any():
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmin with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            if skipna:
+                msg = "Encountered all NA values"
+            else:
+                msg = "Encountered an NA values with skipna=False"
+            raise ValueError(msg)
 
         index = data._get_axis(axis)
         result = algorithms.take(
@@ -13365,13 +13350,11 @@ def idxmax(
         # indices will always be 1d array since axis is not None
 
         if (indices == -1).any():
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmax with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            if skipna:
+                msg = "Encountered all NA values"
+            else:
+                msg = "Encountered an NA values with skipna=False"
+            raise ValueError(msg)
 
         index = data._get_axis(axis)
         result = algorithms.take(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -5703,10 +5703,7 @@ def _idxmax_idxmin(
                     "Specify observed=True in groupby instead."
                 )
         elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
-            raise ValueError(
-                f"{type(self).__name__}.{how} with skipna=False encountered an NA "
-                f"value."
-            )
+            raise ValueError(f"{how} with skipna=False encountered an NA value.")
 
         result = self._agg_general(
             numeric_only=numeric_only,
@@ -5724,8 +5721,7 @@ def _wrap_idxmax_idxmin(
             result = res.astype(index.dtype)
         elif skipna and res.lt(0).any(axis=None):
             raise ValueError(
-                f"{type(self).__name__}.{how} with skipna=True encountered all NA "
-                f"values in a group."
+                f"{how} with skipna=True encountered all NA values in a group."
             )
         else:
             if isinstance(index, MultiIndex):
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -2160,9 +2160,7 @@ def test_numeric_ea_axis_1(method, skipna, min_count, any_numeric_ea_dtype):
         kwargs["min_count"] = min_count
 
     if not skipna and method in ("idxmax", "idxmin"):
-        # GH#57745 - EAs use groupby for axis=1 which still needs a proper deprecation.
-        msg = f"The behavior of DataFrame.{method} with all-NA values"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        with pytest.raises(ValueError, match="encountered an NA value"):
             getattr(df, method)(axis=1, **kwargs)
         with pytest.raises(ValueError, match="Encountered an NA value"):
             getattr(expected_df, method)(axis=1, **kwargs)
diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py
@@ -285,7 +285,7 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
     )
 
 
-def test_cython_group_mean_Inf_at_begining_and_end():
+def test_cython_group_mean_Inf_at_beginning_and_end():
     # GH 50367
     actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
     counts = np.array([0, 0], dtype="int64")
@@ -314,7 +314,7 @@ def test_cython_group_mean_Inf_at_begining_and_end():
         ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]),
     ],
 )
-def test_cython_group_sum_Inf_at_begining_and_end(values, out):
+def test_cython_group_sum_Inf_at_beginning_and_end(values, out):
     # GH #53606
     actual = np.array([[np.nan], [np.nan]], dtype="float64")
     counts = np.array([0, 0], dtype="int64")
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
@@ -291,7 +291,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
     gb = df.groupby("a")
 
     if not skipna:
-        msg = f"DataFrameGroupBy.{how} with skipna=False"
+        msg = f"{how} with skipna=False"
         with pytest.raises(ValueError, match=msg):
             getattr(gb, how)(skipna=skipna)
         return
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -1494,7 +1494,7 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
         expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
         tm.assert_frame_equal(result, expected)
     else:
-        msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value"
+        msg = f"{how} with skipna=False encountered an NA value"
         with pytest.raises(ValueError, match=msg):
             gb.transform(how, skipna, numeric_only)
 
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
@@ -100,7 +100,7 @@ def test_error_contains_non_dummies():
         from_dummies(dummies)
 
 
-def test_error_with_prefix_multiple_seperators():
+def test_error_with_prefix_multiple_separators():
     dummies = DataFrame(
         {
             "col1_a": [1, 0, 1],
diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py
@@ -10,34 +10,36 @@
     pin_min_versions_to_yaml_file,
 )
 
+DATA_PATH = pathlib.Path(__file__).parents[2] / "scripts/tests/data/"
+
 
 @pytest.mark.parametrize(
     "src_toml, src_yaml, expected_yaml",
     [
         (
-            pathlib.Path("scripts/tests/data/deps_minimum.toml"),
-            pathlib.Path("scripts/tests/data/deps_unmodified_random.yaml"),
-            pathlib.Path("scripts/tests/data/deps_expected_random.yaml"),
+            DATA_PATH / "deps_minimum.toml",
+            DATA_PATH / "deps_unmodified_random.yaml",
+            DATA_PATH / "deps_expected_random.yaml",
         ),
         (
-            pathlib.Path("scripts/tests/data/deps_minimum.toml"),
-            pathlib.Path("scripts/tests/data/deps_unmodified_same_version.yaml"),
-            pathlib.Path("scripts/tests/data/deps_expected_same_version.yaml"),
+            DATA_PATH / "deps_minimum.toml",
+            DATA_PATH / "deps_unmodified_same_version.yaml",
+            DATA_PATH / "deps_expected_same_version.yaml",
         ),
         (
-            pathlib.Path("scripts/tests/data/deps_minimum.toml"),
-            pathlib.Path("scripts/tests/data/deps_unmodified_duplicate_package.yaml"),
-            pathlib.Path("scripts/tests/data/deps_expected_duplicate_package.yaml"),
+            DATA_PATH / "deps_minimum.toml",
+            DATA_PATH / "deps_unmodified_duplicate_package.yaml",
+            DATA_PATH / "deps_expected_duplicate_package.yaml",
         ),
         (
-            pathlib.Path("scripts/tests/data/deps_minimum.toml"),
-            pathlib.Path("scripts/tests/data/deps_unmodified_no_version.yaml"),
-            pathlib.Path("scripts/tests/data/deps_expected_no_version.yaml"),
+            DATA_PATH / "deps_minimum.toml",
+            DATA_PATH / "deps_unmodified_no_version.yaml",
+            DATA_PATH / "deps_expected_no_version.yaml",
         ),
         (
-            pathlib.Path("scripts/tests/data/deps_minimum.toml"),
-            pathlib.Path("scripts/tests/data/deps_unmodified_range.yaml"),
-            pathlib.Path("scripts/tests/data/deps_expected_range.yaml"),
+            DATA_PATH / "deps_minimum.toml",
+            DATA_PATH / "deps_unmodified_range.yaml",
+            DATA_PATH / "deps_expected_range.yaml",
         ),
     ],
 )
diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py
@@ -10,7 +10,7 @@ class TestStringsWithWrongPlacedWhitespace:
         "data",
         [
             (
-                """
+                r"""
     msg = (
         "foo\n"
         " bar"
diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py
@@ -23,19 +23,20 @@
 
 from scripts.generate_pip_deps_from_conda import CONDA_TO_PIP
 
-DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
+BASE_PATH = pathlib.Path(__file__).parents[1]
+DOC_PATH = (BASE_PATH / "doc/source/getting_started/install.rst").resolve()
 CI_PATH = next(
-    pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
+    (BASE_PATH / "ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
 )
-CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
-SETUP_PATH = pathlib.Path("pyproject.toml").resolve()
-YAML_PATH = pathlib.Path("ci/deps")
-ENV_PATH = pathlib.Path("environment.yml")
+CODE_PATH = (BASE_PATH / "pandas/compat/_optional.py").resolve()
+SETUP_PATH = (BASE_PATH / "pyproject.toml").resolve()
+YAML_PATH = BASE_PATH / "ci/deps"
+ENV_PATH = BASE_PATH / "environment.yml"
 EXCLUDE_DEPS = {"tzdata", "pyqt", "pyqt5"}
 # pandas package is not available
 # in pre-commit environment
-sys.path.append("pandas/compat")
-sys.path.append("pandas/util")
+sys.path.append(str(BASE_PATH / "pandas/compat"))
+sys.path.append(str(BASE_PATH / "pandas/util"))
 import _exceptions
 import version
 

Original file line number	Diff line number	Diff line change
`@@ -100,7 +100,7 @@ def test_error_contains_non_dummies():`
`100`	`100`	`from_dummies(dummies)`
`101`	`101`
`102`	`102`
`103`		`-def test_error_with_prefix_multiple_seperators():`
	`103`	`+def test_error_with_prefix_multiple_separators():`
`104`	`104`	`dummies = DataFrame(`
`105`	`105`	`{`
`106`	`106`	`"col1_a": [1, 0, 1],`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ class TestStringsWithWrongPlacedWhitespace:`
`10`	`10`	`"data",`
`11`	`11`	`[`
`12`	`12`	`(`
`13`		`- """`
	`13`	`+ r"""`
`14`	`14`	`msg = (`
`15`	`15`	`"foo\n"`
`16`	`16`	`" bar"`