From 7af2ad47b096766dda69ee84470d7ac34e3df163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Dec 2024 09:37:34 +0100 Subject: [PATCH] GH-44795: [C++] Use arrow::util::span on arrow::util::bitmap_builders_utilities instead of std::vector (#44796) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change `arrow::util::span` (a backport of C++20 `std::span`) is more generally applicable than `std::vector`, so any public API currently accepting a vector const-ref argument should instead accept a span argument. ### What changes are included in this PR? `arrow::util::BytesToBits` accepts `arrow::util::span` instead of `std::vector` ### Are these changes tested? Yes, existing C++ tests via CI ### Are there any user-facing changes? Yes, from `Result> BytesToBits(const std::vector&, MemoryPool* pool)` to `Result> BytesToBits(util::span bytes, MemoryPool* pool)` * GitHub Issue: #44795 Authored-by: Raúl Cumplido Signed-off-by: Antoine Pitrou --- cpp/src/arrow/array/array_list_test.cc | 3 ++- cpp/src/arrow/ipc/json_simple_test.cc | 3 ++- cpp/src/arrow/util/bitmap_builders.cc | 4 ++-- cpp/src/arrow/util/bitmap_builders.h | 3 ++- cpp/src/parquet/column_writer_test.cc | 4 ++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 3d18d5f967b72..226f5fc4649af 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1186,7 +1186,8 @@ TEST_F(TestMapArray, BuildingStringToInt) { std::vector offsets = {0, 2, 2, 3, 3}; auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])"); auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]"); - ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, internal::BytesToBits({1, 0, 1, 1})); + ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, + internal::BytesToBits(std::vector({1, 0, 1, 1}))); MapArray expected(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values, expected_null_bitmap, 1); diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index 7a45f0906639a..31312f1ac6948 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -857,7 +857,8 @@ TEST(TestMap, StringToInteger) { ASSERT_OK_AND_ASSIGN(auto expected_keys, ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])")); ASSERT_OK_AND_ASSIGN(auto expected_values, ArrayFromJSON(int32(), "[0, null, 8]")); - ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BytesToBits({1, 0, 1, 1})); + ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, + BytesToBits(std::vector({1, 0, 1, 1}))); auto expected = std::make_shared(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values, expected_null_bitmap, 1); diff --git a/cpp/src/arrow/util/bitmap_builders.cc b/cpp/src/arrow/util/bitmap_builders.cc index c5cf3d2bc72b5..000dda718d0da 100644 --- a/cpp/src/arrow/util/bitmap_builders.cc +++ b/cpp/src/arrow/util/bitmap_builders.cc @@ -33,7 +33,7 @@ namespace internal { namespace { -void FillBitsFromBytes(const std::vector& bytes, uint8_t* bits) { +void FillBitsFromBytes(util::span bytes, uint8_t* bits) { for (size_t i = 0; i < bytes.size(); ++i) { if (bytes[i] > 0) { bit_util::SetBit(bits, i); @@ -43,7 +43,7 @@ void FillBitsFromBytes(const std::vector& bytes, uint8_t* bits) { } // namespace -Result> BytesToBits(const std::vector& bytes, +Result> BytesToBits(util::span bytes, MemoryPool* pool) { int64_t bit_length = bit_util::BytesForBits(bytes.size()); diff --git a/cpp/src/arrow/util/bitmap_builders.h b/cpp/src/arrow/util/bitmap_builders.h index 5bd2ad4414083..4bf2edfdcbd69 100644 --- a/cpp/src/arrow/util/bitmap_builders.h +++ b/cpp/src/arrow/util/bitmap_builders.h @@ -23,6 +23,7 @@ #include "arrow/result.h" #include "arrow/type_fwd.h" +#include "arrow/util/span.h" #include "arrow/util/visibility.h" namespace arrow { @@ -36,7 +37,7 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length /// \brief Convert vector of bytes to bitmap buffer ARROW_EXPORT -Result> BytesToBits(const std::vector&, +Result> BytesToBits(util::span bytes, MemoryPool* pool = default_memory_pool()); } // namespace internal diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index d2b3aa0dff003..25446aefd6814 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1001,8 +1001,8 @@ TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) { auto values_data = reinterpret_cast(values_buffer->data()); std::shared_ptr valid_bits; - ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits( - {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1})); + std::vector bitmap_bytes = {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1}; + ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(bitmap_bytes)); // valgrind will warn about out of bounds access into def_levels_data typed_writer->WriteBatchSpaced(14, def_levels.data(), rep_levels.data(),