From dc4fd2e5c77b729ed3d356061bf5b654c5bb8fbf Mon Sep 17 00:00:00 2001 From: Ayush Bansal Date: Sun, 21 Sep 2025 19:51:27 +0530 Subject: [PATCH 1/4] add quoting_header option to pyarrow WriterOptions --- python/pyarrow/_csv.pyx | 15 ++++++++++++++- python/pyarrow/includes/libarrow.pxd | 1 + python/pyarrow/tests/test_csv.py | 13 +++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index 62cb75fa6ea..07c1d00a11d 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -1370,7 +1370,7 @@ cdef class WriteOptions(_Weakrefable): __slots__ = () def __init__(self, *, include_header=None, batch_size=None, - delimiter=None, quoting_style=None): + delimiter=None, quoting_style=None, quoting_header=None): self.options.reset(new CCSVWriteOptions(CCSVWriteOptions.Defaults())) if include_header is not None: self.include_header = include_header @@ -1380,6 +1380,8 @@ cdef class WriteOptions(_Weakrefable): self.delimiter = delimiter if quoting_style is not None: self.quoting_style = quoting_style + if quoting_header is not None: + self.quoting_header = quoting_header @property def include_header(self): @@ -1433,6 +1435,17 @@ cdef class WriteOptions(_Weakrefable): def quoting_style(self, value): deref(self.options).quoting_style = unwrap_quoting_style(value) + @property + def quoting_header(self): + """ + Same as quoting_style, but for header column names + """ + return wrap_quoting_style(deref(self.options).quoting_header) + + @quoting_header.setter + def quoting_header(self, value): + deref(self.options).quoting_header = unwrap_quoting_style(value) + @staticmethod cdef WriteOptions wrap(CCSVWriteOptions options): out = WriteOptions() diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 39dc3a77d98..f294ee4d50b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2147,6 +2147,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil: int32_t batch_size unsigned char delimiter CQuotingStyle quoting_style + CQuotingStyle quoting_header CIOContext io_context CCSVWriteOptions() diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 2794d07e87c..d7a5555de7c 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -2002,6 +2002,19 @@ def test_write_quoting_style(): assert buf.getvalue() == res buf.seek(0) +def test_write_quoting_header(): + t = pa.Table.from_arrays([[1, 2, None], ["a", None, "c"]], ["c1", "c2"]) + buf = io.BytesIO() + for write_options, res in [ + (WriteOptions(quoting_header='none'), b'c1,c2\n1,"a"\n2,\n,"c"\n'), + (WriteOptions(), b'"c1","c2"\n1,"a"\n2,\n,"c"\n'), + (WriteOptions(quoting_header='all_valid'), + b'"c1","c2"\n1,"a"\n2,\n,"c"\n'), + ]: + with CSVWriter(buf, t.schema, write_options=write_options) as writer: + writer.write_table(t) + assert buf.getvalue() == res + buf.seek(0) def test_read_csv_reference_cycle(): # ARROW-13187 From a86da1ae8684dc7a358fa4d959f1461b0f8a2cc7 Mon Sep 17 00:00:00 2001 From: Ayush Bansal Date: Sun, 21 Sep 2025 20:24:40 +0530 Subject: [PATCH 2/4] linted --- python/pyarrow/tests/test_csv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index d7a5555de7c..f510c6dbe23 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -2002,6 +2002,7 @@ def test_write_quoting_style(): assert buf.getvalue() == res buf.seek(0) + def test_write_quoting_header(): t = pa.Table.from_arrays([[1, 2, None], ["a", None, "c"]], ["c1", "c2"]) buf = io.BytesIO() @@ -2016,6 +2017,7 @@ def test_write_quoting_header(): assert buf.getvalue() == res buf.seek(0) + def test_read_csv_reference_cycle(): # ARROW-13187 def inner(): From b4892a2086512015ef5bd3c9b8802f452df1a2d4 Mon Sep 17 00:00:00 2001 From: Ayush Bansal Date: Mon, 22 Sep 2025 20:29:14 +0530 Subject: [PATCH 3/4] documentation improvements --- python/pyarrow/_csv.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index 07c1d00a11d..ed084358d17 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -1364,6 +1364,8 @@ cdef class WriteOptions(_Weakrefable): - "none": do not enclose any values in quotes; values containing special characters (such as quotes, cell delimiters or line endings) will raise an error. + quoting_header : str, optional (default "needed") + Same as quoting_style, but for header column names. Accepts same values. """ # Avoid mistakingly creating attributes @@ -1438,7 +1440,8 @@ cdef class WriteOptions(_Weakrefable): @property def quoting_header(self): """ - Same as quoting_style, but for header column names + Same as quoting_style, but for header column names. + Note : both "needed" and "all_valid" have the same effect of quoting all column names. """ return wrap_quoting_style(deref(self.options).quoting_header) From ae575250f54be927069624fb5150f9675f728592 Mon Sep 17 00:00:00 2001 From: Ayush Bansal Date: Mon, 22 Sep 2025 22:19:50 +0530 Subject: [PATCH 4/4] add cppdoc note to WriteOptions docstring too --- python/pyarrow/_csv.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx index ed084358d17..ed9d20beb6b 100644 --- a/python/pyarrow/_csv.pyx +++ b/python/pyarrow/_csv.pyx @@ -1366,6 +1366,7 @@ cdef class WriteOptions(_Weakrefable): will raise an error. quoting_header : str, optional (default "needed") Same as quoting_style, but for header column names. Accepts same values. + Note : both "needed" and "all_valid" have the same effect of quoting all column names. """ # Avoid mistakingly creating attributes