From f169b68323de87e916c3feedded681b03f35962e Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 15:42:45 -0500 Subject: [PATCH 01/23] Initial test case --- pandas/tests/frame/test_arithmetic.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index a9a98a5005bb3..d26c620195086 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2192,3 +2192,19 @@ def test_mixed_col_index_dtype(string_dtype_no_object): expected.columns = expected.columns.astype(string_dtype_no_object) tm.assert_frame_equal(result, expected) + + +def test_df_mul_series_fill_value(): + # GH 61581 + data = np.arange(50).reshape(10, 5) + columns = list("ABCDE") + df = DataFrame(data, columns=columns) + for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = np.nan + df.iat[i + 4, i] = np.nan + + df_result = df[["A", "B", "C", "D"]].mul(df["E"], axis=0, fill_value=5) + df_expected = df[["A", "B", "C", "D"]].mul(df["E"].fillna(5), axis=0) + + tm.assert_frame_equal(df_result, df_expected) From fe7e8c8f32f4a31a56be8a12234489645884d844 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:05:27 -0500 Subject: [PATCH 02/23] Updated test case to account for results of mul being NaN if both inputs are NaN --- pandas/tests/frame/test_arithmetic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d26c620195086..ac1761bac2371 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2204,7 +2204,11 @@ def test_df_mul_series_fill_value(): df.iat[i + 1, i] = np.nan df.iat[i + 4, i] = np.nan - df_result = df[["A", "B", "C", "D"]].mul(df["E"], axis=0, fill_value=5) - df_expected = df[["A", "B", "C", "D"]].mul(df["E"].fillna(5), axis=0) + df_a = df.iloc[:, :-1] + df_b = df.iloc[:, -1] + nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) + + df_result = df_a.mul(df_b, axis=0, fill_value=5) + df_expected = (df_a.fillna(5).mul(df_b.fillna(5), axis=0)).mask(nan_mask, np.nan) tm.assert_frame_equal(df_result, df_expected) From 54217613679dd995add40607af7270b834cfcb5e Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:21:49 -0500 Subject: [PATCH 03/23] Removed test cases which expect an error from fill_value --- pandas/tests/frame/test_arithmetic.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index ac1761bac2371..7401656194d01 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -626,12 +626,6 @@ def test_arith_flex_frame_corner(self, float_frame): expected = float_frame.sort_index() * np.nan tm.assert_frame_equal(result, expected) - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], fill_value=3) - - with pytest.raises(NotImplementedError, match="fill_value"): - float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) - @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) def test_arith_flex_series_ops(self, simple_frame, op): # after arithmetic refactor, add truediv here @@ -665,19 +659,6 @@ def test_arith_flex_series_broadcasting(self, any_real_numpy_dtype): result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) - def test_arith_flex_zero_len_raises(self): - # GH 19522 passing fill_value to frame flex arith methods should - # raise even in the zero-length special cases - ser_len0 = Series([], dtype=object) - df_len0 = DataFrame(columns=["A", "B"]) - df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) - - with pytest.raises(NotImplementedError, match="fill_value"): - df.add(ser_len0, fill_value="E") - - with pytest.raises(NotImplementedError, match="fill_value"): - df_len0.sub(df["A"], axis=None, fill_value=3) - def test_flex_add_scalar_fill_value(self): # GH#12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") From 32a0f777a886fbd000cebc791a3dd37cce57e273 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:46:37 -0500 Subject: [PATCH 04/23] Updated test case to include other operators which included fill_value --- pandas/tests/frame/test_arithmetic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 7401656194d01..249467cec516b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2175,7 +2175,8 @@ def test_mixed_col_index_dtype(string_dtype_no_object): tm.assert_frame_equal(result, expected) -def test_df_mul_series_fill_value(): +@pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) +def test_df_series_fill_value(op): # GH 61581 data = np.arange(50).reshape(10, 5) columns = list("ABCDE") @@ -2189,7 +2190,9 @@ def test_df_mul_series_fill_value(): df_b = df.iloc[:, -1] nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) - df_result = df_a.mul(df_b, axis=0, fill_value=5) - df_expected = (df_a.fillna(5).mul(df_b.fillna(5), axis=0)).mask(nan_mask, np.nan) + df_result = getattr(df_a, op)(df_b, axis=0, fill_value=5) + df_expected = getattr(df_a.fillna(5), op)(df_b.fillna(5), axis=0).mask( + nan_mask, np.nan + ) tm.assert_frame_equal(df_result, df_expected) From 42a8b7611cfa6245ed3a78cfc69f2106a1c9cf0a Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 10 Jul 2025 17:48:36 -0500 Subject: [PATCH 05/23] Removed restriction on using fill_value with series Updated docs --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d721213dc38e7..11378b3e5ab07 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -998,6 +998,7 @@ MultiIndex - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) +- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) - Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`) - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91f5cd1679a61..e80fe47feeec7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8498,11 +8498,6 @@ def _flex_arith_method( if self._should_reindex_frame_op(other, op, axis, fill_value, level): return self._arith_method_with_reindex(other, op) - if isinstance(other, Series) and fill_value is not None: - # TODO: We could allow this in cases where we end up going - # through the DataFrame path - raise NotImplementedError(f"fill_value {fill_value} not supported.") - other = ops.maybe_prepare_scalar_for_op(other, self.shape) self, other = self._align_for_op(other, axis, flex=True, level=level) From 654c2f3bd3a059d295435fbba3e6fbdabc5e44f2 Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 15 Jul 2025 14:13:20 -0500 Subject: [PATCH 06/23] Included PR suggestions, added seperate dtype test (WIP) --- pandas/tests/frame/test_arithmetic.py | 51 +++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 249467cec516b..0f04b8ed8f0b4 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2176,23 +2176,54 @@ def test_mixed_col_index_dtype(string_dtype_no_object): @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) -def test_df_series_fill_value(op): +def test_df_fill_value_operations(op): # GH 61581 - data = np.arange(50).reshape(10, 5) + input_data = np.arange(50).reshape(10, 5) + fill_val = 5 columns = list("ABCDE") - df = DataFrame(data, columns=columns) + df = DataFrame(input_data, columns=columns) for i in range(5): df.iat[i, i] = np.nan df.iat[i + 1, i] = np.nan df.iat[i + 4, i] = np.nan - df_a = df.iloc[:, :-1] - df_b = df.iloc[:, -1] - nan_mask = df_a.isna().astype(int).mul(df_b.isna().astype(int), axis=0).astype(bool) + df_base = df.iloc[:, :-1] + df_mult = df.iloc[:, -1] + mask = df.isna().values + mask = mask[:, :-1] & mask[:, [-1]] - df_result = getattr(df_a, op)(df_b, axis=0, fill_value=5) - df_expected = getattr(df_a.fillna(5), op)(df_b.fillna(5), axis=0).mask( - nan_mask, np.nan - ) + df_result = getattr(df_base, op)(df_mult, axis=0, fill_value=fill_val) + df_expected = getattr(df_base.fillna(fill_val), op)( + df_mult.fillna(fill_val), axis=0 + ).mask(mask, np.nan) tm.assert_frame_equal(df_result, df_expected) + + +# ! Currently implementing +# @pytest.mark.parametrize("input_data, fill_val", +# [ +# (np.arange(50).reshape(10, 5), 5), #Numpy +# (pd.array(np.random.choice([True, False], size=(10, 5)), +# dtype="boolean"), True), +# ] +# ) +# def test_df_fill_value_dtype(input_data, fill_val): +# # GH 61581 +# columns = list("ABCDE") +# df = DataFrame(input_data, columns=columns) +# for i in range(5): +# df.iat[i, i] = np.nan +# df.iat[i + 1, i] = np.nan +# df.iat[i + 4, i] = np.nan + +# df_base = df.iloc[:, :-1] +# df_mult = df.iloc[:, -1] +# mask = df.isna().values +# mask = mask[:, :-1] & mask[:, [-1]] + +# df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) +# df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), +# axis=0)).mask(mask, np.nan) + +# tm.assert_frame_equal(df_result, df_expected) From a360dafd697668930c25b9d78824c3270812b7a3 Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 15 Jul 2025 21:50:27 -0500 Subject: [PATCH 07/23] temp files --- test.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test2.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 test.py create mode 100644 test2.py diff --git a/test.py b/test.py new file mode 100644 index 0000000000000..be4c57afe3aa7 --- /dev/null +++ b/test.py @@ -0,0 +1,60 @@ +# mypy: ignore-errors +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +def print_side_by_side(df1, df2): + # Convert to string and split into lines + df1_str = df1.to_string(index=False).split("\n") + df2_str = df2.to_string(index=False).split("\n") + + # Pad lines to the same length for alignment + max_len_1 = max(len(line) for line in df1_str) + padded_df1 = [line.ljust(max_len_1) for line in df1_str] + + # Print side-by-side + print("Result".ljust(max_len_1) + " | Expected") + for line1, line2 in zip(padded_df1, df2_str): + print(f"{line1} | {line2}") + + +# data = np.arange(50).reshape(10, 5) +# fill_val = 5 + +# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") +# fill_val = True + +data = np.arange(50).reshape(10, 5) +# data_mult = pd.array([i for i in range(10)], dtype=tm.SIGNED_INT_NUMPY_DTYPES[0]) +data_mult = pd.array(list(range(10)), dtype=tm.SIGNED_INT_EA_DTYPES[0]) +fill_val = 5 + +# print(tm.ALL_INT_DTYPES) +# print(tm.SIGNED_INT_EA_DTYPES) +# tm.SIGNED_INT_NUMPY_DTYPES[0] +print(type(data_mult)) + +# TODO masking not working with EA with dim > 1 +# NOTE currently trying to get EA testing set up + +columns = list("ABCDE") +df_base = pd.DataFrame(data, columns=columns) +for i in range(5): + df_base.iat[i, i] = np.nan + df_base.iat[i + 1, i] = np.nan + df_base.iat[i + 4, i] = np.nan + +mask = df_base.isna().values + +data_mult_re = data_mult.reshape(10, 1) +mask = mask[:, :-1] & data_mult_re + +df_result = df_base.mul(data_mult, axis=0, fill_value=fill_val) +print(df_result) +# df_expected = (df_base.fillna(fill_val).mul(data_mult.fillna(fill_val), +# axis=0)).mask(mask, np.nan) + +# print_side_by_side(df_result, df_expected) +# # tm.assert_frame_equal(df_result, df_expected) diff --git a/test2.py b/test2.py new file mode 100644 index 0000000000000..a060a8229f1b4 --- /dev/null +++ b/test2.py @@ -0,0 +1,52 @@ +# mypy: ignore-errors +import numpy as np + +import pandas as pd + + +def print_side_by_side(df1, df2): + # Convert to string and split into lines + df1_str = df1.to_string(index=False).split("\n") + df2_str = df2.to_string(index=False).split("\n") + + # Pad lines to the same length for alignment + max_len_1 = max(len(line) for line in df1_str) + padded_df1 = [line.ljust(max_len_1) for line in df1_str] + + # Print side-by-side + print("Result".ljust(max_len_1) + " | Expected") + for line1, line2 in zip(padded_df1, df2_str): + print(f"{line1} | {line2}") + + +data = np.arange(50).reshape(10, 5) +fill_val = 5 + +# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") +# fill_val = True + +# data = pd.array([i for i in range(50)], dtype="int") +# fill_val = 5 + +print(type(data)) + +columns = list("ABCDE") +df = pd.DataFrame(data, columns=columns) +for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = np.nan + df.iat[i + 4, i] = np.nan + +df_base = df.iloc[:, :-1] +df_mult = df.iloc[:, [-1]] + +mask = df.isna().values +mask = mask[:, :-1] & mask[:, [-1]] + +df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) +df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( + mask, np.nan +) + +print_side_by_side(df_result, df_expected) +# tm.assert_frame_equal(df_result, df_expected) From e72c1285421425efc91b6c7d8f6e656632542415 Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 18 Aug 2025 16:46:07 -0500 Subject: [PATCH 08/23] Added test case to test EA and NUMPY dtypes --- pandas/tests/frame/test_arithmetic.py | 60 +++++++++++++++------------ 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 0f04b8ed8f0b4..353d3fe9324ab 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2200,30 +2200,36 @@ def test_df_fill_value_operations(op): tm.assert_frame_equal(df_result, df_expected) -# ! Currently implementing -# @pytest.mark.parametrize("input_data, fill_val", -# [ -# (np.arange(50).reshape(10, 5), 5), #Numpy -# (pd.array(np.random.choice([True, False], size=(10, 5)), -# dtype="boolean"), True), -# ] -# ) -# def test_df_fill_value_dtype(input_data, fill_val): -# # GH 61581 -# columns = list("ABCDE") -# df = DataFrame(input_data, columns=columns) -# for i in range(5): -# df.iat[i, i] = np.nan -# df.iat[i + 1, i] = np.nan -# df.iat[i + 4, i] = np.nan - -# df_base = df.iloc[:, :-1] -# df_mult = df.iloc[:, -1] -# mask = df.isna().values -# mask = mask[:, :-1] & mask[:, [-1]] - -# df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) -# df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), -# axis=0)).mask(mask, np.nan) - -# tm.assert_frame_equal(df_result, df_expected) +dt_params = [ + (tm.ALL_INT_NUMPY_DTYPES, 5), + (tm.ALL_INT_EA_DTYPES, 5), + (tm.FLOAT_NUMPY_DTYPES, 4.9), + (tm.FLOAT_EA_DTYPES, 4.9), +] + +dt_param_flat = [(dt, val) for lst, val in dt_params for dt in lst] + + +@pytest.mark.parametrize("data_type, fill_val", dt_param_flat) +def test_df_fill_value_dtype(data_type, fill_val): + # GH 61581 + base_data = np.arange(50).reshape(10, 5) + df_data = pd.array(base_data, dtype=data_type) + columns = list("ABCDE") + df = DataFrame(df_data, columns=columns) + for i in range(5): + df.iat[i, i] = np.nan + df.iat[i + 1, i] = pd.NA + df.iat[i + 4, i] = pd.NA + + df_base = df.iloc[:, :-1] + df_mult = df.iloc[:, -1] + mask = df.isna().values + mask = mask[:, :-1] & mask[:, [-1]] + + df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) + df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( + mask, np.nan + ) + + tm.assert_frame_equal(df_result, df_expected) From 51b08989591d5a90f301db57647f6e3499cfd75f Mon Sep 17 00:00:00 2001 From: eicchen Date: Wed, 20 Aug 2025 20:20:37 -0500 Subject: [PATCH 09/23] addressed changes brought up in PR, converted test cases to not use non-1D EAs --- pandas/core/frame.py | 7 +- pandas/tests/frame/test_arithmetic.py | 96 +++++++++++++-------------- test.py | 60 ----------------- test2.py | 52 --------------- 4 files changed, 48 insertions(+), 167 deletions(-) delete mode 100644 test.py delete mode 100644 test2.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e80fe47feeec7..60889a1b25065 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8469,12 +8469,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): """ rvalues = series._values if not isinstance(rvalues, np.ndarray): - # TODO(EA2D): no need to special-case with 2D EAs - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"): - # We can losslessly+cheaply cast to ndarray - rvalues = np.asarray(rvalues) - else: - return series + rvalues = np.asarray(rvalues) if axis == 0: rvalues = rvalues.reshape(-1, 1) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 353d3fe9324ab..228d62878fc38 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2175,61 +2175,59 @@ def test_mixed_col_index_dtype(string_dtype_no_object): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "mod", "truediv", "pow"]) -def test_df_fill_value_operations(op): - # GH 61581 - input_data = np.arange(50).reshape(10, 5) - fill_val = 5 - columns = list("ABCDE") - df = DataFrame(input_data, columns=columns) - for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = np.nan - df.iat[i + 4, i] = np.nan - - df_base = df.iloc[:, :-1] - df_mult = df.iloc[:, -1] - mask = df.isna().values - mask = mask[:, :-1] & mask[:, [-1]] - - df_result = getattr(df_base, op)(df_mult, axis=0, fill_value=fill_val) - df_expected = getattr(df_base.fillna(fill_val), op)( - df_mult.fillna(fill_val), axis=0 - ).mask(mask, np.nan) - - tm.assert_frame_equal(df_result, df_expected) - - dt_params = [ - (tm.ALL_INT_NUMPY_DTYPES, 5), - (tm.ALL_INT_EA_DTYPES, 5), - (tm.FLOAT_NUMPY_DTYPES, 4.9), - (tm.FLOAT_EA_DTYPES, 4.9), + (tm.ALL_INT_NUMPY_DTYPES[0], 5), + (tm.ALL_INT_EA_DTYPES[0], 5), + (tm.FLOAT_NUMPY_DTYPES[0], 4.9), + (tm.FLOAT_EA_DTYPES[0], 4.9), ] -dt_param_flat = [(dt, val) for lst, val in dt_params for dt in lst] +axes = [0, 1] -@pytest.mark.parametrize("data_type, fill_val", dt_param_flat) -def test_df_fill_value_dtype(data_type, fill_val): +@pytest.mark.parametrize( + "data_type,fill_val, axis", + [(dt, val, axis) for axis in axes for dt, val in dt_params], +) +def test_df_fill_value_dtype(data_type, fill_val, axis): # GH 61581 - base_data = np.arange(50).reshape(10, 5) - df_data = pd.array(base_data, dtype=data_type) + base_data = np.arange(25).reshape(5, 5) + mult_list = [1, np.nan, 5, np.nan, 3] + np_int_flag = 0 + + try: + mult_data = pd.array(mult_list, dtype=data_type) + except ValueError as e: + # Numpy int type cannot represent NaN, it will end up here + if "cannot convert float NaN to integer" in str(e): + mult_data = np.asarray(mult_list) + np_int_flag = 1 + columns = list("ABCDE") - df = DataFrame(df_data, columns=columns) - for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = pd.NA - df.iat[i + 4, i] = pd.NA - - df_base = df.iloc[:, :-1] - df_mult = df.iloc[:, -1] - mask = df.isna().values - mask = mask[:, :-1] & mask[:, [-1]] - - df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) - df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( - mask, np.nan - ) + df = DataFrame(base_data, columns=columns) + + for i in range(df.shape[0]): + try: + df.iat[i, i] = np.nan + df.iat[i + 1, i] = pd.NA + df.iat[i + 3, i] = pd.NA + except IndexError: + pass + + mult_mat = np.broadcast_to(mult_data, df.shape) + if axis == 0: + mask = np.isnan(mult_mat).T + else: + mask = np.isnan(mult_mat) + mask = df.isna().values & mask + + df_result = df.mul(mult_data, axis=axis, fill_value=fill_val) + if np_int_flag == 1: + mult_np = np.nan_to_num(mult_data, nan=fill_val) + df_expected = (df.fillna(fill_val).mul(mult_np, axis=axis)).mask(mask, np.nan) + else: + df_expected = ( + df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis) + ).mask(mask, np.nan) tm.assert_frame_equal(df_result, df_expected) diff --git a/test.py b/test.py deleted file mode 100644 index be4c57afe3aa7..0000000000000 --- a/test.py +++ /dev/null @@ -1,60 +0,0 @@ -# mypy: ignore-errors -import numpy as np - -import pandas as pd -import pandas._testing as tm - - -def print_side_by_side(df1, df2): - # Convert to string and split into lines - df1_str = df1.to_string(index=False).split("\n") - df2_str = df2.to_string(index=False).split("\n") - - # Pad lines to the same length for alignment - max_len_1 = max(len(line) for line in df1_str) - padded_df1 = [line.ljust(max_len_1) for line in df1_str] - - # Print side-by-side - print("Result".ljust(max_len_1) + " | Expected") - for line1, line2 in zip(padded_df1, df2_str): - print(f"{line1} | {line2}") - - -# data = np.arange(50).reshape(10, 5) -# fill_val = 5 - -# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") -# fill_val = True - -data = np.arange(50).reshape(10, 5) -# data_mult = pd.array([i for i in range(10)], dtype=tm.SIGNED_INT_NUMPY_DTYPES[0]) -data_mult = pd.array(list(range(10)), dtype=tm.SIGNED_INT_EA_DTYPES[0]) -fill_val = 5 - -# print(tm.ALL_INT_DTYPES) -# print(tm.SIGNED_INT_EA_DTYPES) -# tm.SIGNED_INT_NUMPY_DTYPES[0] -print(type(data_mult)) - -# TODO masking not working with EA with dim > 1 -# NOTE currently trying to get EA testing set up - -columns = list("ABCDE") -df_base = pd.DataFrame(data, columns=columns) -for i in range(5): - df_base.iat[i, i] = np.nan - df_base.iat[i + 1, i] = np.nan - df_base.iat[i + 4, i] = np.nan - -mask = df_base.isna().values - -data_mult_re = data_mult.reshape(10, 1) -mask = mask[:, :-1] & data_mult_re - -df_result = df_base.mul(data_mult, axis=0, fill_value=fill_val) -print(df_result) -# df_expected = (df_base.fillna(fill_val).mul(data_mult.fillna(fill_val), -# axis=0)).mask(mask, np.nan) - -# print_side_by_side(df_result, df_expected) -# # tm.assert_frame_equal(df_result, df_expected) diff --git a/test2.py b/test2.py deleted file mode 100644 index a060a8229f1b4..0000000000000 --- a/test2.py +++ /dev/null @@ -1,52 +0,0 @@ -# mypy: ignore-errors -import numpy as np - -import pandas as pd - - -def print_side_by_side(df1, df2): - # Convert to string and split into lines - df1_str = df1.to_string(index=False).split("\n") - df2_str = df2.to_string(index=False).split("\n") - - # Pad lines to the same length for alignment - max_len_1 = max(len(line) for line in df1_str) - padded_df1 = [line.ljust(max_len_1) for line in df1_str] - - # Print side-by-side - print("Result".ljust(max_len_1) + " | Expected") - for line1, line2 in zip(padded_df1, df2_str): - print(f"{line1} | {line2}") - - -data = np.arange(50).reshape(10, 5) -fill_val = 5 - -# data = pd.array(np.random.choice([True, False], size=(10, 5)), dtype="boolean") -# fill_val = True - -# data = pd.array([i for i in range(50)], dtype="int") -# fill_val = 5 - -print(type(data)) - -columns = list("ABCDE") -df = pd.DataFrame(data, columns=columns) -for i in range(5): - df.iat[i, i] = np.nan - df.iat[i + 1, i] = np.nan - df.iat[i + 4, i] = np.nan - -df_base = df.iloc[:, :-1] -df_mult = df.iloc[:, [-1]] - -mask = df.isna().values -mask = mask[:, :-1] & mask[:, [-1]] - -df_result = df_base.mul(df_mult, axis=0, fill_value=fill_val) -df_expected = (df_base.fillna(fill_val).mul(df_mult.fillna(fill_val), axis=0)).mask( - mask, np.nan -) - -print_side_by_side(df_result, df_expected) -# tm.assert_frame_equal(df_result, df_expected) From a230b57a0175e114f6105557bab2ec53191ab233 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 01:58:01 -0500 Subject: [PATCH 10/23] Limit np conversion to IntegerArray and FloatArray --- pandas/core/frame.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 60889a1b25065..e198402043a72 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -125,6 +125,10 @@ notna, ) +from pandas.arrays import ( + FloatingArray, + IntegerArray, +) from pandas.core import ( algorithms, common as com, @@ -8469,7 +8473,12 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): """ rvalues = series._values if not isinstance(rvalues, np.ndarray): - rvalues = np.asarray(rvalues) + if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]") or isinstance( + rvalues, (IntegerArray, FloatingArray) + ): + rvalues = np.asarray(rvalues) + else: + return series if axis == 0: rvalues = rvalues.reshape(-1, 1) From 320705730ba6b9585a724d6a3eb227e0afbad4b5 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 13:16:12 -0500 Subject: [PATCH 11/23] Updated EA catch method in _maybe_align_series_as_frame --- pandas/core/frame.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e198402043a72..6c63687620fab 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -125,10 +125,6 @@ notna, ) -from pandas.arrays import ( - FloatingArray, - IntegerArray, -) from pandas.core import ( algorithms, common as com, @@ -8472,13 +8468,23 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): blockwise. """ rvalues = series._values - if not isinstance(rvalues, np.ndarray): - if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]") or isinstance( - rvalues, (IntegerArray, FloatingArray) - ): - rvalues = np.asarray(rvalues) + if isinstance(rvalues, PeriodArray): + return series + if not isinstance(rvalues, (np.ndarray,)) and rvalues.dtype not in ( + "datetime64[ns]", + "timedelta64[ns]", + ): + if axis == 0: + df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) else: - return series + nrows = self.shape[0] + df = DataFrame( + {i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])}, + dtype=rvalues.dtype, + ) + df.index = self.index + df.columns = self.columns + return df if axis == 0: rvalues = rvalues.reshape(-1, 1) From eb753a2b79e52d04a852582a5e8058f2c17b1684 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 15:39:10 -0500 Subject: [PATCH 12/23] Addressed errors from changes in som tests --- pandas/core/frame.py | 2 +- pandas/tests/arithmetic/test_period.py | 8 ++------ pandas/tests/arrays/string_/test_string.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c63687620fab..d4f1d5dd0c7ef 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8470,7 +8470,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): rvalues = series._values if isinstance(rvalues, PeriodArray): return series - if not isinstance(rvalues, (np.ndarray,)) and rvalues.dtype not in ( + if not isinstance(rvalues, np.ndarray) and rvalues.dtype not in ( "datetime64[ns]", "timedelta64[ns]", ): diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 67762e0b89c73..8c6825eb6b567 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1361,12 +1361,8 @@ def test_period_add_timestamp_raises(self, box_with_array): arr + ts with pytest.raises(TypeError, match=msg): ts + arr - if box_with_array is pd.DataFrame: - # TODO: before implementing resolution-inference we got the same - # message with DataFrame and non-DataFrame. Why did that change? - msg = "cannot add PeriodArray and Timestamp" - else: - msg = "cannot add PeriodArray and DatetimeArray" + msg = "cannot add PeriodArray and DatetimeArray" + print(box_with_array) with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index f10ebda94dc6a..1cf32ea62ed4a 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -249,7 +249,7 @@ def test_mul(dtype): tm.assert_extension_array_equal(result, expected) -@pytest.mark.xfail(reason="GH-28527") +# @pytest.mark.xfail(reason="GH-28527") def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) From 550dddeb9497fbb451cecce43701cd32c9a5bee9 Mon Sep 17 00:00:00 2001 From: eicchen Date: Thu, 21 Aug 2025 16:34:59 -0500 Subject: [PATCH 13/23] removed comment and errant print statement --- pandas/tests/arithmetic/test_period.py | 1 - pandas/tests/arrays/string_/test_string.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 8c6825eb6b567..48bbcc81f8dfd 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1362,7 +1362,6 @@ def test_period_add_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): ts + arr msg = "cannot add PeriodArray and DatetimeArray" - print(box_with_array) with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 1cf32ea62ed4a..01db9a9b9e2e2 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -249,7 +249,6 @@ def test_mul(dtype): tm.assert_extension_array_equal(result, expected) -# @pytest.mark.xfail(reason="GH-28527") def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) From 23afb07025ec633da733025b1be7f946ad88b6d5 Mon Sep 17 00:00:00 2001 From: eicchen Date: Sat, 23 Aug 2025 11:35:56 -0500 Subject: [PATCH 14/23] Commented out test_add_frame's xfail to test CI --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 01db9a9b9e2e2..68130ad25ee07 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -263,7 +263,7 @@ def test_add_strings(dtype): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(reason="GH-28527") +# @pytest.mark.xfail(reason="GH-28527") def test_add_frame(dtype): arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) df = pd.DataFrame([["x", np.nan, "y", np.nan]]) From 81f4f185db5c1e8c21c9b72933d14046477efb3f Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 25 Aug 2025 13:20:21 -0500 Subject: [PATCH 15/23] Allows frames to be added to strings, with modifications to tests that catch for invalid messages --- pandas/core/arrays/arrow/array.py | 6 ++-- .../tests/arrays/boolean/test_arithmetic.py | 2 +- .../tests/arrays/floating/test_arithmetic.py | 34 +++++++++++++++++-- .../tests/arrays/integer/test_arithmetic.py | 16 +++++++++ pandas/tests/arrays/string_/test_string.py | 8 +++-- 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 2eed608908440..7b0121fb2c687 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -890,7 +890,9 @@ def _op_method_error_message(self, other, op) -> str: def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: pa_type = self._pa_array.type other_original = other - other = self._box_pa(other) + other_NA = self._box_pa(other) + # pyarrow gets upset if you try to join a NullArray + other = other_NA.cast(pa_type) if ( pa.types.is_string(pa_type) @@ -911,7 +913,7 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: return self._from_pyarrow_array(result) elif op in [operator.mul, roperator.rmul]: binary = self._pa_array - integral = other + integral = other_NA if not pa.types.is_integer(integral.type): raise TypeError("Can only string multiply by an integer.") pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 312dfb72e0950..555e69dc82589 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators): ops(pd.Timestamp("20180101")) # invalid array-likes - if op not in ("__mul__", "__rmul__"): + if op not in ("__mul__", "__rmul__", "__add__", "__radd__"): # TODO(extension) numpy's mul with object array sees booleans as numbers msg = "|".join( [ diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index 777099e76fc73..7eec2db003909 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -152,8 +152,38 @@ def test_error_invalid_values(data, all_arithmetic_operators): ops(pd.Timestamp("20180101")) # invalid array-likes - with pytest.raises(TypeError, match=msg): - ops(pd.Series("foo", index=s.index)) + str_ser = pd.Series("foo", index=s.index) + if all_arithmetic_operators in [ + "__add__", + "__radd__", + ]: + res = ops(str_ser) + if all_arithmetic_operators == "__radd__": + data_expected = [] + for i in data: + if pd.isna(i): + data_expected.append(i) + elif i.is_integer(): + data_expected.append("foo" + str(int(i))) + else: + data_expected.append("foo" + str(i)) + + expected = pd.Series(data_expected, index=s.index) + else: + data_expected = [] + for i in data: + if pd.isna(i): + data_expected.append(i) + elif i.is_integer(): + data_expected.append(str(int(i)) + "foo") + else: + data_expected.append(str(i) + "foo") + + expected = pd.Series(data_expected, index=s.index) + tm.assert_series_equal(res, expected) + else: + with pytest.raises(TypeError, match=msg): + ops(str_ser) msg = "|".join( [ diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index aeceb9b8a3cb1..623b70092f424 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -197,6 +197,22 @@ def test_error_invalid_values(data, all_arithmetic_operators): # assert_almost_equal stricter, but the expected with pd.NA seems # more-correct than np.nan here. tm.assert_series_equal(res, expected) + elif all_arithmetic_operators in [ + "__add__", + "__radd__", + ]: + res = ops(str_ser) + if all_arithmetic_operators == "__radd__": + expected = pd.Series( + [np.nan if pd.isna(x) == 1 else "foo" + str(x) for x in data], + index=s.index, + ) + else: + expected = pd.Series( + [np.nan if pd.isna(x) == 1 else str(x) + "foo" for x in data], + index=s.index, + ) + tm.assert_series_equal(res, expected) else: with tm.external_error_raised(TypeError): ops(str_ser) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 68130ad25ee07..660c661851b4b 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -263,20 +263,22 @@ def test_add_strings(dtype): tm.assert_frame_equal(result, expected) -# @pytest.mark.xfail(reason="GH-28527") def test_add_frame(dtype): arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) df = pd.DataFrame([["x", np.nan, "y", np.nan]]) assert arr.__add__(df) is NotImplemented + # TODO + # pyarrow returns a different dtype despite the values being the same + # could be addressed this PR if needed result = arr + df expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=False) result = df + arr expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=False) def test_comparison_methods_scalar(comparison_op, dtype): From 4a9f4db8848adb01ab0509975c2bbb4fed34df97 Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 26 Aug 2025 13:38:18 -0500 Subject: [PATCH 16/23] Moved type conversion within add and radd if statement, removed datearray and timedelta catch --- pandas/core/arrays/arrow/array.py | 13 +++++++++---- pandas/core/frame.py | 5 +---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 7b0121fb2c687..53aa3213c0034 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -890,9 +890,7 @@ def _op_method_error_message(self, other, op) -> str: def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: pa_type = self._pa_array.type other_original = other - other_NA = self._box_pa(other) - # pyarrow gets upset if you try to join a NullArray - other = other_NA.cast(pa_type) + other = self._box_pa(other) if ( pa.types.is_string(pa_type) @@ -900,6 +898,13 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: or pa.types.is_binary(pa_type) ): if op in [operator.add, roperator.radd]: + # pyarrow gets upset if you try to join a NullArray + if ( + pa.types.is_integer(other.type) + or pa.types.is_floating(other.type) + or pa.types.is_null(other.type) + ): + other = other.cast(pa_type) sep = pa.scalar("", type=pa_type) try: if op is operator.add: @@ -913,7 +918,7 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: return self._from_pyarrow_array(result) elif op in [operator.mul, roperator.rmul]: binary = self._pa_array - integral = other_NA + integral = other if not pa.types.is_integer(integral.type): raise TypeError("Can only string multiply by an integer.") pa_integral = pc.if_else(pc.less(integral, 0), 0, integral) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4f1d5dd0c7ef..daa98360f7c23 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8470,10 +8470,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): rvalues = series._values if isinstance(rvalues, PeriodArray): return series - if not isinstance(rvalues, np.ndarray) and rvalues.dtype not in ( - "datetime64[ns]", - "timedelta64[ns]", - ): + if not isinstance(rvalues, np.ndarray): if axis == 0: df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) else: From 76df45256bb26a26ba25dde5446809712ba33b57 Mon Sep 17 00:00:00 2001 From: eicchen Date: Wed, 27 Aug 2025 12:38:12 -0500 Subject: [PATCH 17/23] Removed PeriodArray special casing and modified test case --- pandas/core/frame.py | 2 -- pandas/tests/arithmetic/test_period.py | 11 +++-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index daa98360f7c23..3f2087cfc1e6c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8468,8 +8468,6 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): blockwise. """ rvalues = series._values - if isinstance(rvalues, PeriodArray): - return series if not isinstance(rvalues, np.ndarray): if axis == 0: df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 48bbcc81f8dfd..573d791daaecb 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1371,16 +1371,11 @@ def test_period_add_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): pd.Index([ts]) + arr - if box_with_array is pd.DataFrame: - msg = "cannot add PeriodArray and DatetimeArray" - else: - msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray" + msg = "cannot add PeriodArray and DatetimeArray" + with pytest.raises(TypeError, match=msg): arr + pd.DataFrame([ts]) - if box_with_array is pd.DataFrame: - msg = "cannot add PeriodArray and DatetimeArray" - else: - msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'" + with pytest.raises(TypeError, match=msg): pd.DataFrame([ts]) + arr From 17664e2a3b8c305e74a53fc4ea3f7e8117c280c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Sep 2025 07:44:20 -0700 Subject: [PATCH 18/23] ENH: fill_value in frame+series flex ops --- pandas/core/frame.py | 38 ++++++++++++++------------ pandas/tests/arithmetic/test_period.py | 1 - pandas/tests/frame/test_arithmetic.py | 13 +++++++++ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3f2087cfc1e6c..524240ba3ac66 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8468,7 +8468,23 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): blockwise. """ rvalues = series._values - if not isinstance(rvalues, np.ndarray): + if lib.is_np_dtype(rvalues.dtype): + # We can losslessly+cheaply cast to ndarray + # i.e. ndarray or dt64[naive], td64 + # TODO(EA2D): no need to special case with 2D EAs + rvalues = np.asarray(rvalues) + + if axis == 0: + rvalues = rvalues.reshape(-1, 1) + else: + rvalues = rvalues.reshape(1, -1) + + rvalues = np.broadcast_to(rvalues, self.shape) + # pass dtype to avoid doing inference + df = self._constructor(rvalues, dtype=rvalues.dtype) + + else: + # GH#61581 if axis == 0: df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues)) else: @@ -8477,23 +8493,9 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt): {i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])}, dtype=rvalues.dtype, ) - df.index = self.index - df.columns = self.columns - return df - - if axis == 0: - rvalues = rvalues.reshape(-1, 1) - else: - rvalues = rvalues.reshape(1, -1) - - rvalues = np.broadcast_to(rvalues, self.shape) - # pass dtype to avoid doing inference - return self._constructor( - rvalues, - index=self.index, - columns=self.columns, - dtype=rvalues.dtype, - ).__finalize__(series) + df.index = self.index + df.columns = self.columns + return df.__finalize__(series) def _flex_arith_method( self, other, op, *, axis: Axis = "columns", level=None, fill_value=None diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 573d791daaecb..86f9bae51557b 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1375,7 +1375,6 @@ def test_period_add_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): arr + pd.DataFrame([ts]) - with pytest.raises(TypeError, match=msg): pd.DataFrame([ts]) + arr diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 228d62878fc38..48880469173cb 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -659,6 +659,19 @@ def test_arith_flex_series_broadcasting(self, any_real_numpy_dtype): result = df.div(df[0], axis="index") tm.assert_frame_equal(result, expected) + def test_arith_flex_zero_len_raises(self): + # GH 19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = Series([], dtype=object) + df_len0 = DataFrame(columns=["A", "B"]) + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + + msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'" + with pytest.raises(TypeError, match=msg): + df.add(ser_len0, fill_value="E") + + df_len0.sub(df["A"], axis=None, fill_value=3) + def test_flex_add_scalar_fill_value(self): # GH#12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") From 23767fe320220119ad7befd7cfa0a18be7b1ca7a Mon Sep 17 00:00:00 2001 From: eicchen Date: Tue, 23 Sep 2025 20:54:21 -0500 Subject: [PATCH 19/23] Fixed issue by adding a conversion clause to _cmp_method in string_.py --- pandas/core/arrays/string_.py | 23 +++++++++++++++---- .../tests/arrays/floating/test_arithmetic.py | 1 + pandas/tests/arrays/string_/test_string.py | 16 ++++++++++--- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 7a61a252d86a6..730b6a8eed47c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -45,6 +45,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, + is_float_dtype, is_integer_dtype, is_object_dtype, is_string_dtype, @@ -1110,10 +1111,24 @@ def _cmp_method(self, other, op): if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") result[mask] = self.dtype.na_value - result[valid] = op(self._ndarray[valid], other) - if isinstance(other, Path): - # GH#61940 - return result + try: + result[valid] = op(self._ndarray[valid], other) + if isinstance(other, Path): + # GH#61940 + return result + except TypeError: + if is_array_like(other): + if is_float_dtype(other.dtype): + # Shorten whole numbers to be ints to match pyarrow behavior + other = [ + str(int(x)) if x.is_integer() else str(x) for x in other + ] + else: + other = other.astype(str) + result[valid] = op(self._ndarray[valid], other) + else: + raise + return self._from_backing_data(result) else: # logical diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index 7eec2db003909..d01c2c1f1e6f6 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -144,6 +144,7 @@ def test_error_invalid_values(data, all_arithmetic_operators): "not implemented", "not supported for dtype", "Can only string multiply by an integer", + "can't multiply sequence by non-int of type 'str'", ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 660c661851b4b..3bc3579c646b3 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -269,9 +269,6 @@ def test_add_frame(dtype): assert arr.__add__(df) is NotImplemented - # TODO - # pyarrow returns a different dtype despite the values being the same - # could be addressed this PR if needed result = arr + df expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -281,6 +278,19 @@ def test_add_frame(dtype): tm.assert_frame_equal(result, expected, check_dtype=False) +def test_add_frame_mixed_type(dtype): + arr = pd.array(["a", "bc", 3, np.nan], dtype=dtype) + df = pd.DataFrame([[1, 2, 3.3, 4]]) + + result = arr + df + expected = pd.DataFrame([["a1", "bc2", "33.3", np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df + arr + expected = pd.DataFrame([["1a", "2bc", "3.33", np.nan]]).astype(dtype) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_comparison_methods_scalar(comparison_op, dtype): op_name = f"__{comparison_op.__name__}__" a = pd.array(["a", None, "c"], dtype=dtype) From 5e698717da5f06cd5bda4f03cf683d9ecc5d24ac Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 29 Sep 2025 03:42:59 -0500 Subject: [PATCH 20/23] Added testcases for StringArray addition and fixes --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/arrays/arrow/array.py | 44 +++++++++---- pandas/core/arrays/string_.py | 26 ++++---- pandas/tests/arrays/string_/test_string.py | 76 ++++++++++++++++++++-- pandas/tests/extension/base/ops.py | 3 + 5 files changed, 123 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 11378b3e5ab07..13ec91343f644 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -200,6 +200,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings (:issue:`61581`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) @@ -227,7 +228,6 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 53aa3213c0034..322f118da6d10 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -890,7 +890,17 @@ def _op_method_error_message(self, other, op) -> str: def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: pa_type = self._pa_array.type other_original = other - other = self._box_pa(other) + try: + other = self._box_pa(other) + except pa.lib.ArrowTypeError: + # was expecting time dtype but received non-temporal dtype (time offset) + from pandas.core.tools.timedeltas import to_timedelta + + other = self._box_pa(to_timedelta(other)) + except ValueError as err: + raise TypeError( + "Incompatible type when converting to PyArrow dtype for operation." + ) from err if ( pa.types.is_string(pa_type) @@ -903,19 +913,31 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self: pa.types.is_integer(other.type) or pa.types.is_floating(other.type) or pa.types.is_null(other.type) + or pa.types.is_string(other.type) + or pa.types.is_large_string(other.type) + or pa.types.is_binary(other.type) ): other = other.cast(pa_type) - sep = pa.scalar("", type=pa_type) - try: - if op is operator.add: - result = pc.binary_join_element_wise(self._pa_array, other, sep) - elif op is roperator.radd: - result = pc.binary_join_element_wise(other, self._pa_array, sep) - except pa.ArrowNotImplementedError as err: + sep = pa.scalar("", type=pa_type) + try: + if op is operator.add: + result = pc.binary_join_element_wise( + self._pa_array, other, sep + ) + elif op is roperator.radd: + result = pc.binary_join_element_wise( + other, self._pa_array, sep + ) + except pa.ArrowNotImplementedError as err: + raise TypeError( + self._op_method_error_message(other_original, op) + ) from err + return self._from_pyarrow_array(result) + else: raise TypeError( - self._op_method_error_message(other_original, op) - ) from err - return self._from_pyarrow_array(result) + "Can only add string arrays to dtypes " + "null, int, float, str, and binary." + ) elif op in [operator.mul, roperator.rmul]: binary = self._pa_array integral = other diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 730b6a8eed47c..7665182587e25 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1111,23 +1111,27 @@ def _cmp_method(self, other, op): if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") result[mask] = self.dtype.na_value - try: - result[valid] = op(self._ndarray[valid], other) - if isinstance(other, Path): - # GH#61940 - return result - except TypeError: - if is_array_like(other): - if is_float_dtype(other.dtype): - # Shorten whole numbers to be ints to match pyarrow behavior + if op.__name__ in ["add", "radd"]: + if isinstance(other, str) or is_string_dtype(other): + pass + elif is_float_dtype(other) or is_integer_dtype(other): + if is_float_dtype(other): + # Shorten whole number floats to match pyarrow behavior other = [ str(int(x)) if x.is_integer() else str(x) for x in other ] else: other = other.astype(str) - result[valid] = op(self._ndarray[valid], other) else: - raise + raise TypeError( + f"Only supports op({op.__name__}) between StringArray and " + "dtypes int, float, and str." + ) + + result[valid] = op(self._ndarray[valid], other) + if isinstance(other, Path): + # GH#61940 + return result return self._from_backing_data(result) else: diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 3bc3579c646b3..20f1e32d00ebe 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -4,6 +4,7 @@ """ import operator +from re import escape import numpy as np import pytest @@ -249,6 +250,32 @@ def test_mul(dtype): tm.assert_extension_array_equal(result, expected) +def test_add_series(dtype): + arr = pd.array(["a", "b", "c", "d"], dtype=dtype) + df = pd.Series(["t", "y", "v", "w"], dtype=object) + + result = arr + df + expected = pd.Series(["at", "by", "cv", "dw"]).astype(dtype) + tm.assert_series_equal(result, expected) + + result = df + arr + expected = pd.Series(["ta", "yb", "vc", "wd"]).astype(dtype) + tm.assert_series_equal(result, expected) + + +def test_add_series_float(dtype): + arr = pd.array(["a", "b", "c", "d"], dtype=dtype) + df = pd.Series([1, 2.0, 3.5, 4]) + + result = arr + df + expected = pd.Series(["a1", "b2", "c3.5", "d4"]).astype(dtype) + tm.assert_series_equal(result, expected) + + result = df + arr + expected = pd.Series(["1a", "2b", "3.5c", "4d"]).astype(dtype) + tm.assert_series_equal(result, expected) + + def test_add_strings(dtype): arr = pd.array(["a", "b", "c", "d"], dtype=dtype) df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) @@ -278,19 +305,58 @@ def test_add_frame(dtype): tm.assert_frame_equal(result, expected, check_dtype=False) -def test_add_frame_mixed_type(dtype): - arr = pd.array(["a", "bc", 3, np.nan], dtype=dtype) - df = pd.DataFrame([[1, 2, 3.3, 4]]) +def test_add_frame_int(dtype): + arr = pd.array(["a", "b", "c", np.nan], dtype=dtype) + df = pd.DataFrame([[1, np.nan, 3, np.nan]]) result = arr + df - expected = pd.DataFrame([["a1", "bc2", "33.3", np.nan]]).astype(dtype) + expected = pd.DataFrame([["a1", np.nan, "c3", np.nan]]).astype(dtype) tm.assert_frame_equal(result, expected, check_dtype=False) result = df + arr - expected = pd.DataFrame([["1a", "2bc", "3.33", np.nan]]).astype(dtype) + expected = pd.DataFrame([["1a", np.nan, "3c", np.nan]]).astype(dtype) tm.assert_frame_equal(result, expected, check_dtype=False) +@pytest.mark.parametrize( + "invalid", + [ + pd.Timedelta(hours=31), + pd.Timestamp("2021-01-01"), + np.datetime64("NaT", "ns"), + pd.NaT, + True, + pd.Period("2025-09"), + pd.Categorical(["test"]), + pd.offsets.Minute(3), + pd.Interval(1, 2, closed="right"), + ], +) +def test_add_frame_invalid(dtype, invalid): + arr = pd.array(["a", np.nan], dtype=dtype) + df = pd.DataFrame([[invalid, invalid]]) + + if dtype.storage == "pyarrow": + if invalid == pd.Categorical(["test"]): + msg = ( + "Incompatible type found when converting " + "to PyArrow dtype for operation." + ) + else: + msg = ( + "Can only add string arrays to dtypes " + "null, int, float, str, and binary." + ) + with pytest.raises(TypeError, match=msg): + arr + df + else: + msg = escape( + "Only supports op(add) between StringArray and dtypes int, float, and str." + ) + with pytest.raises(TypeError, match=msg): + arr + df + + def test_comparison_methods_scalar(comparison_op, dtype): op_name = f"__{comparison_op.__name__}__" a = pd.array(["a", None, "c"], dtype=dtype) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 583435b674ba1..888beb3bb5354 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -152,6 +152,9 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) + if op_name in ["__add__", "__radd__"]: + pytest.mark.xfail(reason="Failed: DID NOT RAISE ") + self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) def test_divmod(self, data): From 5380aba59d085aafecc98949f146a4d76df43135 Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 29 Sep 2025 04:22:48 -0500 Subject: [PATCH 21/23] Fixed regex modified during pre-commit --- pandas/tests/arrays/string_/test_string.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 20f1e32d00ebe..65c2715600e56 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -338,10 +338,7 @@ def test_add_frame_invalid(dtype, invalid): if dtype.storage == "pyarrow": if invalid == pd.Categorical(["test"]): - msg = ( - "Incompatible type found when converting " - "to PyArrow dtype for operation." - ) + msg = "Incompatible type when converting to PyArrow dtype for operation." else: msg = ( "Can only add string arrays to dtypes " From 01e49591f47bad7142cd05608455d420ed6f3d20 Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 29 Sep 2025 13:10:20 -0500 Subject: [PATCH 22/23] Added FUTURE_INFER_STRING catch, edited documentation --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/arrays/floating/test_arithmetic.py | 12 ++++++++---- pandas/tests/arrays/integer/test_arithmetic.py | 12 ++++++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 13ec91343f644..7cebde784b4ca 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -200,7 +200,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) -- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings (:issue:`61581`) +- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings(Only for future string) (:issue:`61581`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`) diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py index d01c2c1f1e6f6..d14e507a79643 100644 --- a/pandas/tests/arrays/floating/test_arithmetic.py +++ b/pandas/tests/arrays/floating/test_arithmetic.py @@ -154,10 +154,14 @@ def test_error_invalid_values(data, all_arithmetic_operators): # invalid array-likes str_ser = pd.Series("foo", index=s.index) - if all_arithmetic_operators in [ - "__add__", - "__radd__", - ]: + if ( + all_arithmetic_operators + in [ + "__add__", + "__radd__", + ] + and pd.options.future.infer_string + ): res = ops(str_ser) if all_arithmetic_operators == "__radd__": data_expected = [] diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 623b70092f424..9aece48f2ea38 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -197,10 +197,14 @@ def test_error_invalid_values(data, all_arithmetic_operators): # assert_almost_equal stricter, but the expected with pd.NA seems # more-correct than np.nan here. tm.assert_series_equal(res, expected) - elif all_arithmetic_operators in [ - "__add__", - "__radd__", - ]: + elif ( + all_arithmetic_operators + in [ + "__add__", + "__radd__", + ] + and pd.options.future.infer_string + ): res = ops(str_ser) if all_arithmetic_operators == "__radd__": expected = pd.Series( From cfb4e25637bcda1d87859bb8429200d645093398 Mon Sep 17 00:00:00 2001 From: eicchen Date: Mon, 29 Sep 2025 18:45:32 -0500 Subject: [PATCH 23/23] Edited documentation due to docstring error --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7cebde784b4ca..13ec91343f644 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -200,7 +200,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) -- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings(Only for future string) (:issue:`61581`) +- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings (:issue:`61581`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)