Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
99ae672
ENH: fill_value in frame+series flex ops
jbrockmendel Sep 11, 2025
4e77fb7
Updated version of 62317
eicchen Oct 1, 2025
eb12b34
WIP frame addition with fill_value test
eicchen Oct 5, 2025
7e23b65
Completed flex frame fill_value testcase
eicchen Oct 5, 2025
4617108
Removed type-casting shenanigans in array.py
eicchen Oct 5, 2025
bca56fe
Removed float addition test, reintroduced check for float type nulls
eicchen Oct 6, 2025
7273396
Edited pyarrow catch to be more specific
eicchen Oct 6, 2025
4493e08
Updated fill_value test case, Updated whatsnew
eicchen Oct 8, 2025
406cd15
Applied PR feedback
eicchen Oct 13, 2025
ad9614b
Applied various suggestions from jbrock to testcases
eicchen Oct 18, 2025
5a64e5c
Merge branch 'main' into BUG-#61581-DataFrame.mul
eicchen Oct 20, 2025
73d168b
Updated catch for str[python[ in test_add_frame
eicchen Oct 20, 2025
13ed0d6
Merge branch 'BUG-#61581-DataFrame.mul' of https://github.com/eicchen…
eicchen Oct 20, 2025
3d3a2a6
reupdated str[python] catch after git shenanigans
eicchen Oct 20, 2025
76a122e
Update python test_add_frame catch to better reflect issues across di…
eicchen Oct 21, 2025
1d3260e
I have no idea where that test came from
eicchen Oct 21, 2025
32360a8
Merge branch 'main' into BUG-#61581-DataFrame.mul
eicchen Oct 28, 2025
30d4a07
Edited original merge to be inline with #62869, added 'object' as dty…
eicchen Oct 28, 2025
771a19c
Returned accidentally removed update note, modified test_add_frame to…
eicchen Nov 3, 2025
a321daf
Removed unnecessary change in test_string
eicchen Nov 3, 2025
c79c210
Simplified test case for mul*array with fill_value
eicchen Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,10 @@ Other enhancements
- :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
- :class:`StringDtype` now supports addition while maintaining element typing (:issue:`61581`)
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.add` now supports string addition with null-likes (:issue:`61581`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
Expand Down Expand Up @@ -998,6 +1000,7 @@ MultiIndex
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,14 @@ def _op_method_error_message(self, other, op) -> str:
def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
pa_type = self._pa_array.type
other_original = other
other = self._box_pa(other)
try:
other = self._box_pa(other)
except (ValueError, pa.lib.ArrowTypeError) as err:
# Categorical and Interval dtype raises errors in self._box_pa
# Could be fixed in the future if needed
raise TypeError(
"Incompatible type when converting to PyArrow dtype for operation."
) from err

if (
pa.types.is_string(pa_type)
Expand All @@ -899,6 +906,13 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
):
if op in [operator.add, roperator.radd]:
sep = pa.scalar("", type=pa_type)
if (
pa.types.is_string(other.type)
or pa.types.is_large_string(other.type)
or pa.types.is_binary(other.type)
or isna(other).all()
):
other = other.cast(pa_type)
try:
if op is operator.add:
result = pc.binary_join_element_wise(self._pa_array, other, sep)
Expand Down
48 changes: 25 additions & 23 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8468,27 +8468,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
if lib.is_np_dtype(rvalues.dtype):
# We can losslessly+cheaply cast to ndarray
# i.e. ndarray or dt64[naive], td64
# TODO(EA2D): no need to special case with 2D EAs
rvalues = np.asarray(rvalues)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
return series
rvalues = rvalues.reshape(1, -1)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
df = self._constructor(rvalues, dtype=rvalues.dtype)

rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
return self._constructor(
rvalues,
index=self.index,
columns=self.columns,
dtype=rvalues.dtype,
).__finalize__(series)
else:
# GH#61581
if axis == 0:
df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
else:
nrows = self.shape[0]
df = DataFrame(
{i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
dtype=rvalues.dtype,
)
df.index = self.index
df.columns = self.columns
return df.__finalize__(series)

def _flex_arith_method(
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
Expand All @@ -8498,11 +8505,6 @@ def _flex_arith_method(
if self._should_reindex_frame_op(other, op, axis, fill_value, level):
return self._arith_method_with_reindex(other, op)

if isinstance(other, Series) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

Expand Down
16 changes: 2 additions & 14 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand All @@ -1376,16 +1371,9 @@ def test_period_add_timestamp_raises(self, box_with_array):
with pytest.raises(TypeError, match=msg):
pd.Index([ts]) + arr

if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + pd.DataFrame([ts])
if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'"
with pytest.raises(TypeError, match=msg):
pd.DataFrame([ts]) + arr

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = "|".join(
[
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
str_ser = pd.Series("foo", index=s.index)
with pytest.raises(TypeError, match=msg):
ops(pd.Series("foo", index=s.index))
ops(str_ser)

msg = "|".join(
[
Expand Down
50 changes: 45 additions & 5 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,19 @@ def test_mul(dtype):
tm.assert_extension_array_equal(result, expected)


@pytest.mark.xfail(reason="GH-28527")
def test_add_series(dtype):
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.Series(["t", "y", "v", "w"], dtype=object)

result = arr + df
expected = pd.Series(["at", "by", "cv", "dw"]).astype(dtype)
tm.assert_series_equal(result, expected)

result = df + arr
expected = pd.Series(["ta", "yb", "vc", "wd"]).astype(dtype)
tm.assert_series_equal(result, expected)


def test_add_strings(dtype):
arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
Expand All @@ -264,20 +276,48 @@ def test_add_strings(dtype):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(reason="GH-28527")
def test_add_frame(dtype):
arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
df = pd.DataFrame([["x", np.nan, "y", np.nan]])

assert arr.__add__(df) is NotImplemented

result = arr + df
expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df + arr
expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_dtype=False)


@pytest.mark.parametrize(
"invalid",
[
10,
1.5,
pd.Timedelta(hours=31),
pd.Timestamp("2021-01-01"),
True,
pd.Period("2025-09"),
pd.Categorical(["test"]),
pd.offsets.Minute(3),
pd.Interval(1, 2, closed="right"),
],
)
def test_add_frame_invalid(dtype, invalid):
arr = pd.array(["a", np.nan], dtype=dtype)
df = pd.DataFrame([[invalid, invalid]])

msg = "|".join(
[
r"can only concatenate str \(not \".+\"\) to str",
r"unsupported operand type\(s\) for \+: '.+' and 'str'",
r"operation 'add' not supported for dtype 'str|string' with dtype '.+'",
"Incompatible type when converting to PyArrow dtype for operation.",
]
)
with pytest.raises(TypeError, match=msg):
arr + df


def test_comparison_methods_scalar(comparison_op, dtype):
Expand Down
107 changes: 100 additions & 7 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,11 +626,43 @@ def test_arith_flex_frame_corner(self, float_frame):
expected = float_frame.sort_index() * np.nan
tm.assert_frame_equal(result, expected)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], fill_value=3)
@pytest.mark.parametrize("axis", [0, 1])
def test_arith_flex_frame_fill_value_series(self, float_frame, axis):
rng = np.random.default_rng(60)
mask = rng.random(float_frame.shape) < 0.2
left = float_frame.mask(mask)
right = left.iloc[0]

result = left.add(right, axis=axis, fill_value=3)

if axis == 0: # axis = index, vertical
pad_num = abs(result.shape[0] - len(right))
mult_num = result.shape[1]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, columns=result.index, index=result.columns
).T

left = left.reindex_like(result)

else: # axis = columns, horizontal
pad_num = abs(result.shape[1] - len(right))
mult_num = result.shape[0]
right_pad = np.pad(
right, (0, pad_num), mode="constant", constant_values=(np.nan)
)
right_df = DataFrame(
[right_pad] * mult_num, index=result.index, columns=result.columns
)

with pytest.raises(NotImplementedError, match="fill_value"):
float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
left_filled = left.fillna(3)
right_filled = right_df.fillna(3)
expected = right_filled + left_filled
expected = expected.mask(expected == 6, pd.NA)

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
def test_arith_flex_series_ops(self, simple_frame, op):
Expand Down Expand Up @@ -672,11 +704,21 @@ def test_arith_flex_zero_len_raises(self):
df_len0 = DataFrame(columns=["A", "B"])
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])

with pytest.raises(NotImplementedError, match="fill_value"):
msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'"
with pytest.raises(TypeError, match=msg):
df.add(ser_len0, fill_value="E")

with pytest.raises(NotImplementedError, match="fill_value"):
df_len0.sub(df["A"], axis=None, fill_value=3)
result = df_len0.sub(df, axis=None, fill_value=3)
expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=0, fill_value=3)
expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"])
tm.assert_frame_equal(result, expected, check_dtype=False)

result = df_len0.sub(df["A"], axis=1, fill_value=3)
expected = DataFrame([], columns=["A", "B", 0, 1])
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_flex_add_scalar_fill_value(self):
# GH#12723
Expand Down Expand Up @@ -2192,3 +2234,54 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)


dt_params = [
(tm.ALL_INT_NUMPY_DTYPES[0], 10),
(tm.ALL_INT_EA_DTYPES[0], 10),
(tm.FLOAT_NUMPY_DTYPES[0], 4.9),
(tm.FLOAT_EA_DTYPES[0], 4.9),
]

axes = [0, 1]


@pytest.mark.parametrize(
"data_type,fill_val, axis",
[(dt, val, axis) for axis in axes for dt, val in dt_params],
)
def test_df_mul_array_fill_value(data_type, fill_val, axis):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i remain super-skeptical that this test (or test_arith_flex_frame_fill_value_series) are minimally-complex for what you are trying to do.

# GH 61581
base_data = np.arange(12).reshape(4, 3)
df = DataFrame(base_data)
mult_list = [np.nan, 1, 5, np.nan]
mult_list = mult_list[: df.shape[axis]]

if data_type in tm.ALL_INT_NUMPY_DTYPES:
# Numpy int type cannot represent NaN
mult_np = np.asarray(mult_list)
mult_list = np.nan_to_num(mult_np, nan=fill_val)

mult_data = pd.array(mult_list, dtype=data_type)

for i in range(df.shape[0]):
try:
df.iat[i, i] = np.nan
df.iat[i + 2, i] = pd.NA
except IndexError:
pass

if axis == 0:
mult_mat = np.broadcast_to(mult_data.reshape(-1, 1), df.shape)
mask = np.isnan(mult_mat)
else:
mult_mat = np.broadcast_to(mult_data.reshape(1, -1), df.shape)
mask = np.isnan(mult_mat)
mask = df.isna().values & mask

df_result = df.mul(mult_data, axis=axis, fill_value=fill_val)
df_expected = (df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)).mask(
mask, np.nan
)

tm.assert_frame_equal(df_result, df_expected)
Loading