DOC: whatsnew entry for on_bad_lines regression fix (GH#61837)

skalwaghe-56 · skalwaghe-56 · commit 072926797029 · 2025-09-12T17:43:34.000+05:30
diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst
@@ -24,6 +24,8 @@ Bug fixes
 ^^^^^^^^^
 - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
   with a compiled regex and custom flags (:issue:`62240`)
+- Fix regression in ``on_bad_lines`` callable when returning too many fields: now emits
+  ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_233.contributors:
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -621,12 +621,15 @@ def _check_data_length(
             empty_str_or_na = empty_str | isna(data[-1])  # type: ignore[operator]
             if len(columns) == len(data) - 1 and np.all(empty_str_or_na):
                 return
-            warnings.warn(
-                "Length of header or names does not match length of data. This leads "
-                "to a loss of data with index_col=False.",
-                ParserWarning,
-                stacklevel=find_stack_level(),
-            )
+            # Don't warn if on_bad_lines is set to handle bad lines
+            if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
+                warnings.warn(
+                    "Length of header or names does not match length of data. "
+                    "This leads "
+                    "to a loss of data with index_col=False.",
+                    ParserWarning,
+                    stacklevel=find_stack_level(),
+                )
 
     @final
     def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1196,21 +1196,25 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
                             # Truncate extra elements and warn.
                             if len(new_l) > col_len:
                                 warnings.warn(
-                                    "Header/names length != data length. "
-                                    "Extra fields dropped.",
+                                    "Length of header or names does not match length "
+                                    "of data. This leads "
+                                    "to a loss of data with index_col=False.",
                                     ParserWarning,
                                     stacklevel=find_stack_level(),
                                 )
                                 new_l = new_l[:col_len]
                             content.append(new_l)  # pyright: ignore[reportArgumentType]
+                    elif self.on_bad_lines == self.BadLineHandleMethod.ERROR:
+                        row_num = self.pos - (content_len - i + footers)
+                        bad_lines.append((row_num, actual_len))
+                        break
                     elif self.on_bad_lines in (
-                        self.BadLineHandleMethod.ERROR,
                         self.BadLineHandleMethod.WARN,
+                        self.BadLineHandleMethod.SKIP,
                     ):
                         row_num = self.pos - (content_len - i + footers)
                         bad_lines.append((row_num, actual_len))
-                        if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
-                            break
+                        # For WARN and SKIP, don't append the bad content
                     else:
                         content.append(_content)
                 else: