Skip to content

Commit 147505a

Browse files
authored
Merge branch 'pandas-dev:main' into main
2 parents adf55a0 + 4088ec2 commit 147505a

File tree

11 files changed

+82
-71
lines changed

11 files changed

+82
-71
lines changed

doc/source/user_guide/migration-3-strings.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,37 @@ the :meth:`~pandas.Series.str.decode` method now has a ``dtype`` parameter to be
315315
able to specify object dtype instead of the default of string dtype for this use
316316
case.
317317

318+
:meth:`Series.values` now returns an :class:`~pandas.api.extensions.ExtensionArray`
319+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
320+
321+
With object dtype, using ``.values`` on a Series will return the underlying NumPy array.
322+
323+
.. code-block:: python
324+
325+
>>> ser = pd.Series(["a", "b", np.nan], dtype="object")
326+
>>> type(ser.values)
327+
<class 'numpy.ndarray'>
328+
329+
However with the new string dtype, the underlying ExtensionArray is returned instead.
330+
331+
.. code-block:: python
332+
333+
>>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
334+
>>> ser.values
335+
<ArrowStringArray>
336+
['a', 'b', nan]
337+
Length: 3, dtype: str
338+
339+
If your code requires a NumPy array, you should use :meth:`Series.to_numpy`.
340+
341+
.. code-block:: python
342+
343+
>>> ser = pd.Series(["a", "b", pd.NA], dtype="str")
344+
>>> ser.to_numpy()
345+
['a' 'b' nan]
346+
347+
In general, you should always prefer :meth:`Series.to_numpy` to get a NumPy array or :meth:`Series.array` to get an ExtensionArray over using :meth:`Series.values`.
348+
318349
Notable bug fixes
319350
~~~~~~~~~~~~~~~~~
320351

pandas/core/frame.py

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@
7171
)
7272
from pandas.util._exceptions import (
7373
find_stack_level,
74-
rewrite_warning,
7574
)
7675
from pandas.util._validators import (
7776
validate_ascending,
@@ -11926,25 +11925,13 @@ def _get_data() -> DataFrame:
1192611925
row_index = np.tile(np.arange(nrows), ncols)
1192711926
col_index = np.repeat(np.arange(ncols), nrows)
1192811927
ser = Series(arr, index=col_index, copy=False)
11929-
# GroupBy will raise a warning with SeriesGroupBy as the object,
11930-
# likely confusing users
11931-
with rewrite_warning(
11932-
target_message=(
11933-
f"The behavior of SeriesGroupBy.{name} with all-NA values"
11934-
),
11935-
target_category=FutureWarning,
11936-
new_message=(
11937-
f"The behavior of {type(self).__name__}.{name} with all-NA "
11938-
"values, or any-NA and skipna=False, is deprecated. In "
11939-
"a future version this will raise ValueError"
11940-
),
11941-
):
11942-
result = ser.groupby(row_index).agg(name, **kwds)
11928+
if name == "all":
11929+
# Behavior here appears incorrect; preserving
11930+
# for backwards compatibility for now.
11931+
# See https://github.com/pandas-dev/pandas/issues/57171
11932+
skipna = True
11933+
result = ser.groupby(row_index).agg(name, **kwds, skipna=skipna)
1194311934
result.index = df.index
11944-
if not skipna and name not in ("any", "all"):
11945-
mask = df.isna().to_numpy(dtype=np.bool_).any(axis=1)
11946-
other = -1 if name in ("idxmax", "idxmin") else lib.no_default
11947-
result = result.mask(mask, other)
1194811935
return result
1194911936

1195011937
df = df.T
@@ -13258,13 +13245,11 @@ def idxmin(
1325813245
# indices will always be np.ndarray since axis is not N
1325913246

1326013247
if (indices == -1).any():
13261-
warnings.warn(
13262-
f"The behavior of {type(self).__name__}.idxmin with all-NA "
13263-
"values, or any-NA and skipna=False, is deprecated. In a future "
13264-
"version this will raise ValueError",
13265-
FutureWarning,
13266-
stacklevel=find_stack_level(),
13267-
)
13248+
if skipna:
13249+
msg = "Encountered all NA values"
13250+
else:
13251+
msg = "Encountered an NA values with skipna=False"
13252+
raise ValueError(msg)
1326813253

1326913254
index = data._get_axis(axis)
1327013255
result = algorithms.take(
@@ -13365,13 +13350,11 @@ def idxmax(
1336513350
# indices will always be 1d array since axis is not None
1336613351

1336713352
if (indices == -1).any():
13368-
warnings.warn(
13369-
f"The behavior of {type(self).__name__}.idxmax with all-NA "
13370-
"values, or any-NA and skipna=False, is deprecated. In a future "
13371-
"version this will raise ValueError",
13372-
FutureWarning,
13373-
stacklevel=find_stack_level(),
13374-
)
13353+
if skipna:
13354+
msg = "Encountered all NA values"
13355+
else:
13356+
msg = "Encountered an NA values with skipna=False"
13357+
raise ValueError(msg)
1337513358

1337613359
index = data._get_axis(axis)
1337713360
result = algorithms.take(

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5703,10 +5703,7 @@ def _idxmax_idxmin(
57035703
"Specify observed=True in groupby instead."
57045704
)
57055705
elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
5706-
raise ValueError(
5707-
f"{type(self).__name__}.{how} with skipna=False encountered an NA "
5708-
f"value."
5709-
)
5706+
raise ValueError(f"{how} with skipna=False encountered an NA value.")
57105707

57115708
result = self._agg_general(
57125709
numeric_only=numeric_only,
@@ -5724,8 +5721,7 @@ def _wrap_idxmax_idxmin(
57245721
result = res.astype(index.dtype)
57255722
elif skipna and res.lt(0).any(axis=None):
57265723
raise ValueError(
5727-
f"{type(self).__name__}.{how} with skipna=True encountered all NA "
5728-
f"values in a group."
5724+
f"{how} with skipna=True encountered all NA values in a group."
57295725
)
57305726
else:
57315727
if isinstance(index, MultiIndex):

pandas/tests/frame/test_reductions.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2160,9 +2160,7 @@ def test_numeric_ea_axis_1(method, skipna, min_count, any_numeric_ea_dtype):
21602160
kwargs["min_count"] = min_count
21612161

21622162
if not skipna and method in ("idxmax", "idxmin"):
2163-
# GH#57745 - EAs use groupby for axis=1 which still needs a proper deprecation.
2164-
msg = f"The behavior of DataFrame.{method} with all-NA values"
2165-
with tm.assert_produces_warning(FutureWarning, match=msg):
2163+
with pytest.raises(ValueError, match="encountered an NA value"):
21662164
getattr(df, method)(axis=1, **kwargs)
21672165
with pytest.raises(ValueError, match="Encountered an NA value"):
21682166
getattr(expected_df, method)(axis=1, **kwargs)

pandas/tests/groupby/test_libgroupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
285285
)
286286

287287

288-
def test_cython_group_mean_Inf_at_begining_and_end():
288+
def test_cython_group_mean_Inf_at_beginning_and_end():
289289
# GH 50367
290290
actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
291291
counts = np.array([0, 0], dtype="int64")
@@ -314,7 +314,7 @@ def test_cython_group_mean_Inf_at_begining_and_end():
314314
([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]),
315315
],
316316
)
317-
def test_cython_group_sum_Inf_at_begining_and_end(values, out):
317+
def test_cython_group_sum_Inf_at_beginning_and_end(values, out):
318318
# GH #53606
319319
actual = np.array([[np.nan], [np.nan]], dtype="float64")
320320
counts = np.array([0, 0], dtype="int64")

pandas/tests/groupby/test_reductions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
291291
gb = df.groupby("a")
292292

293293
if not skipna:
294-
msg = f"DataFrameGroupBy.{how} with skipna=False"
294+
msg = f"{how} with skipna=False"
295295
with pytest.raises(ValueError, match=msg):
296296
getattr(gb, how)(skipna=skipna)
297297
return

pandas/tests/groupby/transform/test_transform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1494,7 +1494,7 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
14941494
expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
14951495
tm.assert_frame_equal(result, expected)
14961496
else:
1497-
msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value"
1497+
msg = f"{how} with skipna=False encountered an NA value"
14981498
with pytest.raises(ValueError, match=msg):
14991499
gb.transform(how, skipna, numeric_only)
15001500

pandas/tests/reshape/test_from_dummies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def test_error_contains_non_dummies():
100100
from_dummies(dummies)
101101

102102

103-
def test_error_with_prefix_multiple_seperators():
103+
def test_error_with_prefix_multiple_separators():
104104
dummies = DataFrame(
105105
{
106106
"col1_a": [1, 0, 1],

scripts/tests/test_validate_min_versions_in_sync.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,36 @@
1010
pin_min_versions_to_yaml_file,
1111
)
1212

13+
DATA_PATH = pathlib.Path(__file__).parents[2] / "scripts/tests/data/"
14+
1315

1416
@pytest.mark.parametrize(
1517
"src_toml, src_yaml, expected_yaml",
1618
[
1719
(
18-
pathlib.Path("scripts/tests/data/deps_minimum.toml"),
19-
pathlib.Path("scripts/tests/data/deps_unmodified_random.yaml"),
20-
pathlib.Path("scripts/tests/data/deps_expected_random.yaml"),
20+
DATA_PATH / "deps_minimum.toml",
21+
DATA_PATH / "deps_unmodified_random.yaml",
22+
DATA_PATH / "deps_expected_random.yaml",
2123
),
2224
(
23-
pathlib.Path("scripts/tests/data/deps_minimum.toml"),
24-
pathlib.Path("scripts/tests/data/deps_unmodified_same_version.yaml"),
25-
pathlib.Path("scripts/tests/data/deps_expected_same_version.yaml"),
25+
DATA_PATH / "deps_minimum.toml",
26+
DATA_PATH / "deps_unmodified_same_version.yaml",
27+
DATA_PATH / "deps_expected_same_version.yaml",
2628
),
2729
(
28-
pathlib.Path("scripts/tests/data/deps_minimum.toml"),
29-
pathlib.Path("scripts/tests/data/deps_unmodified_duplicate_package.yaml"),
30-
pathlib.Path("scripts/tests/data/deps_expected_duplicate_package.yaml"),
30+
DATA_PATH / "deps_minimum.toml",
31+
DATA_PATH / "deps_unmodified_duplicate_package.yaml",
32+
DATA_PATH / "deps_expected_duplicate_package.yaml",
3133
),
3234
(
33-
pathlib.Path("scripts/tests/data/deps_minimum.toml"),
34-
pathlib.Path("scripts/tests/data/deps_unmodified_no_version.yaml"),
35-
pathlib.Path("scripts/tests/data/deps_expected_no_version.yaml"),
35+
DATA_PATH / "deps_minimum.toml",
36+
DATA_PATH / "deps_unmodified_no_version.yaml",
37+
DATA_PATH / "deps_expected_no_version.yaml",
3638
),
3739
(
38-
pathlib.Path("scripts/tests/data/deps_minimum.toml"),
39-
pathlib.Path("scripts/tests/data/deps_unmodified_range.yaml"),
40-
pathlib.Path("scripts/tests/data/deps_expected_range.yaml"),
40+
DATA_PATH / "deps_minimum.toml",
41+
DATA_PATH / "deps_unmodified_range.yaml",
42+
DATA_PATH / "deps_expected_range.yaml",
4143
),
4244
],
4345
)

scripts/tests/test_validate_unwanted_patterns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class TestStringsWithWrongPlacedWhitespace:
1010
"data",
1111
[
1212
(
13-
"""
13+
r"""
1414
msg = (
1515
"foo\n"
1616
" bar"

0 commit comments

Comments
 (0)