From b713ab7f29605bf86d5bd68e7db9c8d02140ec96 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 5 Oct 2023 17:16:09 +0200 Subject: [PATCH] GH-37145: [Python] support boolean columns with bitsize 1 in from_dataframe (#37975) ### Rationale for this change Bit-packed booleans are currently not supported in the `from_dataframe` of the Dataframe Interchange Protocol. Note: We currently represent booleans in the pyarrow implementation as `uint8` which will also need to be changed in a follow-up PR (see https://github.com/data-apis/dataframe-api/issues/227). ### What changes are included in this PR? This PR adds the support for bit-packed booleans when consuming a dataframe interchange object. ### Are these changes tested? Only locally, currently! * Closes: #37145 Lead-authored-by: AlenkaF Co-authored-by: Alenka Frim Signed-off-by: AlenkaF --- python/pyarrow/interchange/from_dataframe.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/interchange/from_dataframe.py b/python/pyarrow/interchange/from_dataframe.py index d653054e910a6..e97e91e44fb52 100644 --- a/python/pyarrow/interchange/from_dataframe.py +++ b/python/pyarrow/interchange/from_dataframe.py @@ -54,7 +54,8 @@ DtypeKind.FLOAT: {16: pa.float16(), 32: pa.float32(), 64: pa.float64()}, - DtypeKind.BOOL: {8: pa.uint8()}, + DtypeKind.BOOL: {1: pa.bool_(), + 8: pa.uint8()}, DtypeKind.STRING: {8: pa.string()}, } @@ -232,19 +233,23 @@ def bool_column_to_array( ------- pa.Array """ - if not allow_copy: + buffers = col.get_buffers() + size = buffers["data"][1][1] + + # If booleans are byte-packed a copy to bit-packed will be made + if size == 8 and not allow_copy: raise RuntimeError( "Boolean column will be casted from uint8 and a copy " "is required which is forbidden by allow_copy=False" ) - buffers = col.get_buffers() data_type = col.dtype data = buffers_to_array(buffers, data_type, col.size(), col.describe_null, col.offset) - data = pc.cast(data, pa.bool_()) + if size == 8: + data = pc.cast(data, pa.bool_()) return data