Skip to content

Commit 9fe33bc

Browse files
authored
DEPR: Enforce deprecation of include_groups in groupby.apply (#60566)
* DEPR: Enforce deprecation of include_groups in groupby.apply * Fixup * Inline _apply
1 parent 1e530b6 commit 9fe33bc

24 files changed

+271
-683
lines changed

doc/source/user_guide/cookbook.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
459459
df
460460
461461
# List the size of the animals with the highest weight.
462-
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
462+
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
463463
464464
`Using get_group
465465
<https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
482482
return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
483483
484484
485-
expected_df = gb.apply(GrowUp, include_groups=False)
485+
expected_df = gb.apply(GrowUp)
486486
expected_df
487487
488488
`Expanding apply

doc/source/user_guide/groupby.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ missing values with the ``ffill()`` method.
10741074
).set_index("date")
10751075
df_re
10761076
1077-
df_re.groupby("group").resample("1D", include_groups=False).ffill()
1077+
df_re.groupby("group").resample("1D").ffill()
10781078
10791079
.. _groupby.filter:
10801080

@@ -1252,13 +1252,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
12521252

12531253
.. ipython:: python
12541254
1255-
df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
1255+
df.groupby("A", group_keys=True).apply(lambda x: x)
12561256
12571257
with
12581258

12591259
.. ipython:: python
12601260
1261-
df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
1261+
df.groupby("A", group_keys=False).apply(lambda x: x)
12621262
12631263
12641264
Numba accelerated routines
@@ -1742,7 +1742,7 @@ column index name will be used as the name of the inserted column:
17421742
result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
17431743
return pd.Series(result, name="metrics")
17441744
1745-
result = df.groupby("a").apply(compute_metrics, include_groups=False)
1745+
result = df.groupby("a").apply(compute_metrics)
17461746
17471747
result
17481748

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ Other Removals
554554
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
555555
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
556556
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
557+
- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)
557558

558559
.. ---------------------------------------------------------------------------
559560
.. _whatsnew_300.performance:

pandas/core/groupby/groupby.py

+23-66
Original file line numberDiff line numberDiff line change
@@ -1393,7 +1393,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
13931393
# -----------------------------------------------------------------
13941394
# apply/agg/transform
13951395

1396-
def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
1396+
def apply(self, func, *args, include_groups: bool = False, **kwargs) -> NDFrameT:
13971397
"""
13981398
Apply function ``func`` group-wise and combine the results together.
13991399
@@ -1419,18 +1419,17 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
14191419
*args : tuple
14201420
Optional positional arguments to pass to ``func``.
14211421
1422-
include_groups : bool, default True
1422+
include_groups : bool, default False
14231423
When True, will attempt to apply ``func`` to the groupings in
14241424
the case that they are columns of the DataFrame. If this raises a
14251425
TypeError, the result will be computed with the groupings excluded.
14261426
When False, the groupings will be excluded when applying ``func``.
14271427
14281428
.. versionadded:: 2.2.0
14291429
1430-
.. deprecated:: 2.2.0
1430+
.. versionchanged:: 3.0.0
14311431
1432-
Setting include_groups to True is deprecated. Only the value
1433-
False will be allowed in a future version of pandas.
1432+
The default changed from True to False, and True is no longer allowed.
14341433
14351434
**kwargs : dict
14361435
Optional keyword arguments to pass to ``func``.
@@ -1520,7 +1519,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
15201519
each group together into a Series, including setting the index as
15211520
appropriate:
15221521
1523-
>>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
1522+
>>> g1.apply(lambda x: x.C.max() - x.B.min())
15241523
A
15251524
a 5
15261525
b 2
@@ -1529,11 +1528,13 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
15291528
Example 4: The function passed to ``apply`` returns ``None`` for one of the
15301529
group. This group is filtered from the result:
15311530
1532-
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1531+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x)
15331532
B C
15341533
0 1 4
15351534
1 2 6
15361535
"""
1536+
if include_groups:
1537+
raise ValueError("include_groups=True is no longer allowed.")
15371538
if isinstance(func, str):
15381539
if hasattr(self, func):
15391540
res = getattr(self, func)
@@ -1560,33 +1561,7 @@ def f(g):
15601561
else:
15611562
f = func
15621563

1563-
if not include_groups:
1564-
return self._python_apply_general(f, self._obj_with_exclusions)
1565-
1566-
try:
1567-
result = self._python_apply_general(f, self._selected_obj)
1568-
if (
1569-
not isinstance(self.obj, Series)
1570-
and self._selection is None
1571-
and self._selected_obj.shape != self._obj_with_exclusions.shape
1572-
):
1573-
warnings.warn(
1574-
message=_apply_groupings_depr.format(type(self).__name__, "apply"),
1575-
category=DeprecationWarning,
1576-
stacklevel=find_stack_level(),
1577-
)
1578-
except TypeError:
1579-
# gh-20949
1580-
# try again, with .apply acting as a filtering
1581-
# operation, by excluding the grouping column
1582-
# This would normally not be triggered
1583-
# except if the udf is trying an operation that
1584-
# fails on *some* columns, e.g. a numeric operation
1585-
# on a string grouper column
1586-
1587-
return self._python_apply_general(f, self._obj_with_exclusions)
1588-
1589-
return result
1564+
return self._python_apply_general(f, self._obj_with_exclusions)
15901565

15911566
@final
15921567
def _python_apply_general(
@@ -3424,7 +3399,9 @@ def describe(
34243399
return result
34253400

34263401
@final
3427-
def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
3402+
def resample(
3403+
self, rule, *args, include_groups: bool = False, **kwargs
3404+
) -> Resampler:
34283405
"""
34293406
Provide resampling when using a TimeGrouper.
34303407
@@ -3449,10 +3426,9 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34493426
34503427
.. versionadded:: 2.2.0
34513428
3452-
.. deprecated:: 2.2.0
3429+
.. versionchanged:: 3.0
34533430
3454-
Setting include_groups to True is deprecated. Only the value
3455-
False will be allowed in a future version of pandas.
3431+
The default was changed to False, and True is no longer allowed.
34563432
34573433
**kwargs
34583434
Possible arguments are `how`, `fill_method`, `limit`, `kind` and
@@ -3485,7 +3461,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34853461
Downsample the DataFrame into 3 minute bins and sum the values of
34863462
the timestamps falling into a bin.
34873463
3488-
>>> df.groupby("a").resample("3min", include_groups=False).sum()
3464+
>>> df.groupby("a").resample("3min").sum()
34893465
b
34903466
a
34913467
0 2000-01-01 00:00:00 2
@@ -3494,7 +3470,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34943470
34953471
Upsample the series into 30 second bins.
34963472
3497-
>>> df.groupby("a").resample("30s", include_groups=False).sum()
3473+
>>> df.groupby("a").resample("30s").sum()
34983474
b
34993475
a
35003476
0 2000-01-01 00:00:00 1
@@ -3508,7 +3484,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35083484
35093485
Resample by month. Values are assigned to the month of the period.
35103486
3511-
>>> df.groupby("a").resample("ME", include_groups=False).sum()
3487+
>>> df.groupby("a").resample("ME").sum()
35123488
b
35133489
a
35143490
0 2000-01-31 3
@@ -3517,11 +3493,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35173493
Downsample the series into 3 minute bins as above, but close the right
35183494
side of the bin interval.
35193495
3520-
>>> (
3521-
... df.groupby("a")
3522-
... .resample("3min", closed="right", include_groups=False)
3523-
... .sum()
3524-
... )
3496+
>>> (df.groupby("a").resample("3min", closed="right").sum())
35253497
b
35263498
a
35273499
0 1999-12-31 23:57:00 1
@@ -3532,11 +3504,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35323504
the bin interval, but label each bin using the right edge instead of
35333505
the left.
35343506
3535-
>>> (
3536-
... df.groupby("a")
3537-
... .resample("3min", closed="right", label="right", include_groups=False)
3538-
... .sum()
3539-
... )
3507+
>>> (df.groupby("a").resample("3min", closed="right", label="right").sum())
35403508
b
35413509
a
35423510
0 2000-01-01 00:00:00 1
@@ -3545,11 +3513,10 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35453513
"""
35463514
from pandas.core.resample import get_resampler_for_grouping
35473515

3548-
# mypy flags that include_groups could be specified via `*args` or `**kwargs`
3549-
# GH#54961 would resolve.
3550-
return get_resampler_for_grouping( # type: ignore[misc]
3551-
self, rule, *args, include_groups=include_groups, **kwargs
3552-
)
3516+
if include_groups:
3517+
raise ValueError("include_groups=True is no longer allowed.")
3518+
3519+
return get_resampler_for_grouping(self, rule, *args, **kwargs)
35533520

35543521
@final
35553522
def rolling(
@@ -5561,13 +5528,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
55615528
mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])
55625529

55635530
return mi
5564-
5565-
5566-
# GH#7155
5567-
_apply_groupings_depr = (
5568-
"{}.{} operated on the grouping columns. This behavior is deprecated, "
5569-
"and in a future version of pandas the grouping columns will be excluded "
5570-
"from the operation. Either pass `include_groups=False` to exclude the "
5571-
"groupings or explicitly select the grouping columns after groupby to silence "
5572-
"this warning."
5573-
)

0 commit comments

Comments
 (0)