Skip to content

Commit 37c87db

Browse files
GH-47575: [Python] add quoting_header option to pyarrow WriterOptions (#47610)
### Rationale for this change Expose CSV writer option quoting_header for pyarrow. Addresses #47575 ### What changes are included in this PR? Cython changes for parsing quoting_header option in a manner similar to quoting_style ### Are these changes tested? Yes, added a unit test under test_csv.py ### Are there any user-facing changes? Add QuotingStyle quoting_header option in WriteOptions for pyarrow * GitHub Issue: #47575 Authored-by: Ayush Bansal <[email protected]> Signed-off-by: AlenkaF <[email protected]>
1 parent bf4fc65 commit 37c87db

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

python/pyarrow/_csv.pyx

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,13 +1364,16 @@ cdef class WriteOptions(_Weakrefable):
13641364
- "none": do not enclose any values in quotes; values containing
13651365
special characters (such as quotes, cell delimiters or line endings)
13661366
will raise an error.
1367+
quoting_header : str, optional (default "needed")
1368+
Same as quoting_style, but for header column names. Accepts same values.
1369+
Note : both "needed" and "all_valid" have the same effect of quoting all column names.
13671370
"""
13681371

13691372
# Avoid mistakingly creating attributes
13701373
__slots__ = ()
13711374

13721375
def __init__(self, *, include_header=None, batch_size=None,
1373-
delimiter=None, quoting_style=None):
1376+
delimiter=None, quoting_style=None, quoting_header=None):
13741377
self.options.reset(new CCSVWriteOptions(CCSVWriteOptions.Defaults()))
13751378
if include_header is not None:
13761379
self.include_header = include_header
@@ -1380,6 +1383,8 @@ cdef class WriteOptions(_Weakrefable):
13801383
self.delimiter = delimiter
13811384
if quoting_style is not None:
13821385
self.quoting_style = quoting_style
1386+
if quoting_header is not None:
1387+
self.quoting_header = quoting_header
13831388

13841389
@property
13851390
def include_header(self):
@@ -1433,6 +1438,18 @@ cdef class WriteOptions(_Weakrefable):
14331438
def quoting_style(self, value):
14341439
deref(self.options).quoting_style = unwrap_quoting_style(value)
14351440

1441+
@property
1442+
def quoting_header(self):
1443+
"""
1444+
Same as quoting_style, but for header column names.
1445+
Note : both "needed" and "all_valid" have the same effect of quoting all column names.
1446+
"""
1447+
return wrap_quoting_style(deref(self.options).quoting_header)
1448+
1449+
@quoting_header.setter
1450+
def quoting_header(self, value):
1451+
deref(self.options).quoting_header = unwrap_quoting_style(value)
1452+
14361453
@staticmethod
14371454
cdef WriteOptions wrap(CCSVWriteOptions options):
14381455
out = WriteOptions()

python/pyarrow/includes/libarrow.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,6 +2147,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
21472147
int32_t batch_size
21482148
unsigned char delimiter
21492149
CQuotingStyle quoting_style
2150+
CQuotingStyle quoting_header
21502151
CIOContext io_context
21512152

21522153
CCSVWriteOptions()

python/pyarrow/tests/test_csv.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,21 @@ def test_write_quoting_style():
20032003
buf.seek(0)
20042004

20052005

2006+
def test_write_quoting_header():
2007+
t = pa.Table.from_arrays([[1, 2, None], ["a", None, "c"]], ["c1", "c2"])
2008+
buf = io.BytesIO()
2009+
for write_options, res in [
2010+
(WriteOptions(quoting_header='none'), b'c1,c2\n1,"a"\n2,\n,"c"\n'),
2011+
(WriteOptions(), b'"c1","c2"\n1,"a"\n2,\n,"c"\n'),
2012+
(WriteOptions(quoting_header='all_valid'),
2013+
b'"c1","c2"\n1,"a"\n2,\n,"c"\n'),
2014+
]:
2015+
with CSVWriter(buf, t.schema, write_options=write_options) as writer:
2016+
writer.write_table(t)
2017+
assert buf.getvalue() == res
2018+
buf.seek(0)
2019+
2020+
20062021
def test_read_csv_reference_cycle():
20072022
# ARROW-13187
20082023
def inner():

0 commit comments

Comments
 (0)