Skip to content

Commit

Permalink
GH-44795: [C++] Use arrow::util::span on arrow::util::bitmap_builders…
Browse files Browse the repository at this point in the history
…_utilities instead of std::vector (#44796)

### Rationale for this change

`arrow::util::span` (a backport of C++20 `std::span`) is more generally applicable than `std::vector`, so any public API currently accepting a vector const-ref argument should instead accept a span argument.

### What changes are included in this PR?

`arrow::util::BytesToBits` accepts `arrow::util::span`  instead of `std::vector`

### Are these changes tested?

Yes, existing C++ tests via CI

### Are there any user-facing changes?

Yes,  from `Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>&, MemoryPool* pool)` to `Result<std::shared_ptr<Buffer>> BytesToBits(util::span<const uint8_t> bytes, MemoryPool* pool)`
* GitHub Issue: #44795

Authored-by: Raúl Cumplido <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
raulcd authored Dec 18, 2024
1 parent 1e5d6e5 commit 7af2ad4
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 7 deletions.
3 changes: 2 additions & 1 deletion cpp/src/arrow/array/array_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,8 @@ TEST_F(TestMapArray, BuildingStringToInt) {
std::vector<int32_t> offsets = {0, 2, 2, 3, 3};
auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])");
auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]");
ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, internal::BytesToBits({1, 0, 1, 1}));
ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap,
internal::BytesToBits(std::vector<uint8_t>({1, 0, 1, 1})));
MapArray expected(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values,
expected_null_bitmap, 1);

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/ipc/json_simple_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,8 @@ TEST(TestMap, StringToInteger) {
ASSERT_OK_AND_ASSIGN(auto expected_keys,
ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])"));
ASSERT_OK_AND_ASSIGN(auto expected_values, ArrayFromJSON(int32(), "[0, null, 8]"));
ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BytesToBits({1, 0, 1, 1}));
ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap,
BytesToBits(std::vector<uint8_t>({1, 0, 1, 1})));
auto expected =
std::make_shared<MapArray>(type, 4, Buffer::Wrap(offsets), expected_keys,
expected_values, expected_null_bitmap, 1);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/bitmap_builders.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace internal {

namespace {

void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {
void FillBitsFromBytes(util::span<const uint8_t> bytes, uint8_t* bits) {
for (size_t i = 0; i < bytes.size(); ++i) {
if (bytes[i] > 0) {
bit_util::SetBit(bits, i);
Expand All @@ -43,7 +43,7 @@ void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {

} // namespace

Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>& bytes,
Result<std::shared_ptr<Buffer>> BytesToBits(util::span<const uint8_t> bytes,
MemoryPool* pool) {
int64_t bit_length = bit_util::BytesForBits(bytes.size());

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/util/bitmap_builders.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/span.h"
#include "arrow/util/visibility.h"

namespace arrow {
Expand All @@ -36,7 +37,7 @@ Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length

/// \brief Convert vector of bytes to bitmap buffer
ARROW_EXPORT
Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>&,
Result<std::shared_ptr<Buffer>> BytesToBits(util::span<const uint8_t> bytes,
MemoryPool* pool = default_memory_pool());

} // namespace internal
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/column_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1001,8 +1001,8 @@ TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {
auto values_data = reinterpret_cast<const int32_t*>(values_buffer->data());

std::shared_ptr<Buffer> valid_bits;
ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(
{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1}));
std::vector<uint8_t> bitmap_bytes = {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1};
ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(bitmap_bytes));

// valgrind will warn about out of bounds access into def_levels_data
typed_writer->WriteBatchSpaced(14, def_levels.data(), rep_levels.data(),
Expand Down

0 comments on commit 7af2ad4

Please sign in to comment.