pandas-dev · eicchen · Sep 11, 2025 · Oct 1, 2025 · Oct 5, 2025 · Oct 5, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -200,8 +200,10 @@ Other enhancements
 - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
 - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
 - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
+- :class:`StringDtype` now supports addition while maintaining element typing (:issue:`61581`)
 - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
+- :meth:`DataFrame.add` now supports string addition with null-likes (:issue:`61581`)
 - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
 - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
@@ -998,6 +1000,7 @@ MultiIndex
 - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
 - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
 - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
+- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
 - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
 - Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
 - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -890,7 +890,14 @@ def _op_method_error_message(self, other, op) -> str:
     def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
         pa_type = self._pa_array.type
         other_original = other
-        other = self._box_pa(other)
+        try:
+            other = self._box_pa(other)
+        except (ValueError, pa.lib.ArrowTypeError) as err:
+            # Categorical and Interval dtype raises errors in self._box_pa
+            # Could be fixed in the future if needed
+            raise TypeError(
+                "Incompatible type when converting to PyArrow dtype for operation."
+            ) from err
 
         if (
             pa.types.is_string(pa_type)
@@ -899,6 +906,13 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
         ):
             if op in [operator.add, roperator.radd]:
                 sep = pa.scalar("", type=pa_type)
+                if (
+                    pa.types.is_string(other.type)
+                    or pa.types.is_large_string(other.type)
+                    or pa.types.is_binary(other.type)
+                    or isna(other).all()
+                ):
+                    other = other.cast(pa_type)
                 try:
                     if op is operator.add:
                         result = pc.binary_join_element_wise(self._pa_array, other, sep)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8468,27 +8468,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
         blockwise.
         """
         rvalues = series._values
-        if not isinstance(rvalues, np.ndarray):
-            # TODO(EA2D): no need to special-case with 2D EAs
-            if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
-                # We can losslessly+cheaply cast to ndarray
-                rvalues = np.asarray(rvalues)
+        if lib.is_np_dtype(rvalues.dtype):
+            # We can losslessly+cheaply cast to ndarray
+            # i.e. ndarray or dt64[naive], td64
+            # TODO(EA2D): no need to special case with 2D EAs
+            rvalues = np.asarray(rvalues)
+
+            if axis == 0:
+                rvalues = rvalues.reshape(-1, 1)
             else:
-                return series
+                rvalues = rvalues.reshape(1, -1)
 
-        if axis == 0:
-            rvalues = rvalues.reshape(-1, 1)
-        else:
-            rvalues = rvalues.reshape(1, -1)
+            rvalues = np.broadcast_to(rvalues, self.shape)
+            # pass dtype to avoid doing inference
+            df = self._constructor(rvalues, dtype=rvalues.dtype)
 
-        rvalues = np.broadcast_to(rvalues, self.shape)
-        # pass dtype to avoid doing inference
-        return self._constructor(
-            rvalues,
-            index=self.index,
-            columns=self.columns,
-            dtype=rvalues.dtype,
-        ).__finalize__(series)
+        else:
+            # GH#61581
+            if axis == 0:
+                df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
+            else:
+                nrows = self.shape[0]
+                df = DataFrame(
+                    {i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
+                    dtype=rvalues.dtype,
+                )
+        df.index = self.index
+        df.columns = self.columns
+        return df.__finalize__(series)
 
     def _flex_arith_method(
         self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
@@ -8498,11 +8505,6 @@ def _flex_arith_method(
         if self._should_reindex_frame_op(other, op, axis, fill_value, level):
             return self._arith_method_with_reindex(other, op)
 
-        if isinstance(other, Series) and fill_value is not None:
-            # TODO: We could allow this in cases where we end up going
-            #  through the DataFrame path
-            raise NotImplementedError(f"fill_value {fill_value} not supported.")
-
         other = ops.maybe_prepare_scalar_for_op(other, self.shape)
         self, other = self._align_for_op(other, axis, flex=True, level=level)
 

diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
@@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
             arr + ts
         with pytest.raises(TypeError, match=msg):
             ts + arr
-        if box_with_array is pd.DataFrame:
-            # TODO: before implementing resolution-inference we got the same
-            #  message with DataFrame and non-DataFrame.  Why did that change?
-            msg = "cannot add PeriodArray and Timestamp"
-        else:
-            msg = "cannot add PeriodArray and DatetimeArray"
+        msg = "cannot add PeriodArray and DatetimeArray"
         with pytest.raises(TypeError, match=msg):
             arr + Series([ts])
         with pytest.raises(TypeError, match=msg):
@@ -1376,16 +1371,9 @@ def test_period_add_timestamp_raises(self, box_with_array):
         with pytest.raises(TypeError, match=msg):
             pd.Index([ts]) + arr
 
-        if box_with_array is pd.DataFrame:
-            msg = "cannot add PeriodArray and DatetimeArray"
-        else:
-            msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray"
+        msg = "cannot add PeriodArray and DatetimeArray"
         with pytest.raises(TypeError, match=msg):
             arr + pd.DataFrame([ts])
-        if box_with_array is pd.DataFrame:
-            msg = "cannot add PeriodArray and DatetimeArray"
-        else:
-            msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'"
         with pytest.raises(TypeError, match=msg):
             pd.DataFrame([ts]) + arr
 

diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
-    if op not in ("__mul__", "__rmul__"):
+    if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
         # TODO(extension) numpy's mul with object array sees booleans as numbers
         msg = "|".join(
             [

diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -152,8 +152,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
+    str_ser = pd.Series("foo", index=s.index)
     with pytest.raises(TypeError, match=msg):
-        ops(pd.Series("foo", index=s.index))
+        ops(str_ser)
 
     msg = "|".join(
         [

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -249,7 +249,19 @@ def test_mul(dtype):
     tm.assert_extension_array_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH-28527")
+def test_add_series(dtype):
+    arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
+    df = pd.Series(["t", "y", "v", "w"], dtype=object)
+
+    result = arr + df
+    expected = pd.Series(["at", "by", "cv", "dw"]).astype(dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = df + arr
+    expected = pd.Series(["ta", "yb", "vc", "wd"]).astype(dtype)
+    tm.assert_series_equal(result, expected)
+
+
 def test_add_strings(dtype):
     arr = pd.array(["a", "b", "c", "d"], dtype=dtype)
     df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object)
@@ -264,20 +276,48 @@ def test_add_strings(dtype):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH-28527")
 def test_add_frame(dtype):
     arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype)
     df = pd.DataFrame([["x", np.nan, "y", np.nan]])
-
     assert arr.__add__(df) is NotImplemented
 
     result = arr + df
     expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype)
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_dtype=False)
 
     result = df + arr
     expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype)
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "invalid",
+    [
+        10,
+        1.5,
+        pd.Timedelta(hours=31),
+        pd.Timestamp("2021-01-01"),
+        True,
+        pd.Period("2025-09"),
+        pd.Categorical(["test"]),
+        pd.offsets.Minute(3),
+        pd.Interval(1, 2, closed="right"),
+    ],
+)
+def test_add_frame_invalid(dtype, invalid):
+    arr = pd.array(["a", np.nan], dtype=dtype)
+    df = pd.DataFrame([[invalid, invalid]])
+
+    msg = "|".join(
+        [
+            r"can only concatenate str \(not \".+\"\) to str",
+            r"unsupported operand type\(s\) for \+: '.+' and 'str'",
+            r"operation 'add' not supported for dtype 'str|string' with dtype '.+'",
+            "Incompatible type when converting to PyArrow dtype for operation.",
+        ]
+    )
+    with pytest.raises(TypeError, match=msg):
+        arr + df
 
 
 def test_comparison_methods_scalar(comparison_op, dtype):

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -626,11 +626,43 @@ def test_arith_flex_frame_corner(self, float_frame):
         expected = float_frame.sort_index() * np.nan
         tm.assert_frame_equal(result, expected)
 
-        with pytest.raises(NotImplementedError, match="fill_value"):
-            float_frame.add(float_frame.iloc[0], fill_value=3)
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_arith_flex_frame_fill_value_series(self, float_frame, axis):
+        rng = np.random.default_rng(60)
+        mask = rng.random(float_frame.shape) < 0.2
+        left = float_frame.mask(mask)
+        right = left.iloc[0]
+
+        result = left.add(right, axis=axis, fill_value=3)
+
+        if axis == 0:  # axis = index, vertical
+            pad_num = abs(result.shape[0] - len(right))
+            mult_num = result.shape[1]
+            right_pad = np.pad(
+                right, (0, pad_num), mode="constant", constant_values=(np.nan)
+            )
+            right_df = DataFrame(
+                [right_pad] * mult_num, columns=result.index, index=result.columns
+            ).T
+
+            left = left.reindex_like(result)
+
+        else:  # axis = columns, horizontal
+            pad_num = abs(result.shape[1] - len(right))
+            mult_num = result.shape[0]
+            right_pad = np.pad(
+                right, (0, pad_num), mode="constant", constant_values=(np.nan)
+            )
+            right_df = DataFrame(
+                [right_pad] * mult_num, index=result.index, columns=result.columns
+            )
 
-        with pytest.raises(NotImplementedError, match="fill_value"):
-            float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
+        left_filled = left.fillna(3)
+        right_filled = right_df.fillna(3)
+        expected = right_filled + left_filled
+        expected = expected.mask(expected == 6, pd.NA)
+
+        tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
     def test_arith_flex_series_ops(self, simple_frame, op):
@@ -672,11 +704,21 @@ def test_arith_flex_zero_len_raises(self):
         df_len0 = DataFrame(columns=["A", "B"])
         df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
 
-        with pytest.raises(NotImplementedError, match="fill_value"):
+        msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'"
+        with pytest.raises(TypeError, match=msg):
             df.add(ser_len0, fill_value="E")
 
-        with pytest.raises(NotImplementedError, match="fill_value"):
-            df_len0.sub(df["A"], axis=None, fill_value=3)
+        result = df_len0.sub(df, axis=None, fill_value=3)
+        expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"])
+        tm.assert_frame_equal(result, expected, check_dtype=False)
+
+        result = df_len0.sub(df["A"], axis=0, fill_value=3)
+        expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"])
+        tm.assert_frame_equal(result, expected, check_dtype=False)
+
+        result = df_len0.sub(df["A"], axis=1, fill_value=3)
+        expected = DataFrame([], columns=["A", "B", 0, 1])
+        tm.assert_frame_equal(result, expected, check_dtype=False)
 
     def test_flex_add_scalar_fill_value(self):
         # GH#12723
@@ -2192,3 +2234,54 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
     expected.columns = expected.columns.astype(string_dtype_no_object)
 
     tm.assert_frame_equal(result, expected)
+
+
+dt_params = [
+    (tm.ALL_INT_NUMPY_DTYPES[0], 10),
+    (tm.ALL_INT_EA_DTYPES[0], 10),
+    (tm.FLOAT_NUMPY_DTYPES[0], 4.9),
+    (tm.FLOAT_EA_DTYPES[0], 4.9),
+]
+
+axes = [0, 1]
+
+
+@pytest.mark.parametrize(
+    "data_type,fill_val, axis",
+    [(dt, val, axis) for axis in axes for dt, val in dt_params],
+)
+def test_df_mul_array_fill_value(data_type, fill_val, axis):
+    # GH 61581
+    base_data = np.arange(12).reshape(4, 3)
+    df = DataFrame(base_data)
+    mult_list = [np.nan, 1, 5, np.nan]
+    mult_list = mult_list[: df.shape[axis]]
+
+    if data_type in tm.ALL_INT_NUMPY_DTYPES:
+        # Numpy int type cannot represent NaN
+        mult_np = np.asarray(mult_list)
+        mult_list = np.nan_to_num(mult_np, nan=fill_val)
+
+    mult_data = pd.array(mult_list, dtype=data_type)
+
+    for i in range(df.shape[0]):
+        try:
+            df.iat[i, i] = np.nan
+            df.iat[i + 2, i] = pd.NA
+        except IndexError:
+            pass
+
+    if axis == 0:
+        mult_mat = np.broadcast_to(mult_data.reshape(-1, 1), df.shape)
+        mask = np.isnan(mult_mat)
+    else:
+        mult_mat = np.broadcast_to(mult_data.reshape(1, -1), df.shape)
+        mask = np.isnan(mult_mat)
+    mask = df.isna().values & mask
+
+    df_result = df.mul(mult_data, axis=axis, fill_value=fill_val)
+    df_expected = (df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)).mask(
+        mask, np.nan
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)