Skip to content

Commit 1044022

Browse files
GH-48218: [C++][Parquet] Fix Util & Level Conversion logic on big-endian (#48219)
### Rationale for this change This PR is intended to enable Parquet DB support on Big-endian (s390x) systems. The fix in this PR fixes the "util & level_conversion" logic. ### What changes are included in this PR? The fix includes changes to following files: cpp/src/parquet/level_conversion_inc.h cpp/src/parquet/test_util.h ### Are these changes tested? Yes. The changes are tested on s390x arch to make sure things are working fine. The fix is also tested on x86 arch, to make sure there is no new regression introduced. ### Are there any user-facing changes? No. GitHub main Issue link: #48151 * GitHub Issue: #48218 Authored-by: Vishwanatha-HD <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent 55587ef commit 1044022

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

cpp/src/parquet/geospatial/util_internal.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ void WKBGeometryBounder::MergeGeometry(::arrow::util::span<const uint8_t> bytes_
162162

163163
void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type) {
164164
uint8_t endian = src->ReadUInt8();
165-
#if defined(ARROW_LITTLE_ENDIAN)
165+
#if ARROW_LITTLE_ENDIAN
166166
bool swap = endian != 0x01;
167167
#else
168168
bool swap = endian != 0x00;

cpp/src/parquet/level_conversion_inc.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,14 +299,15 @@ int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_si
299299
ARROW_DCHECK_LE(batch_size, kExtractBitsSize);
300300

301301
// Greater than level_info.def_level - 1 implies >= the def_level
302-
auto defined_bitmap = static_cast<extract_bitmap_t>(
303-
internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level - 1));
302+
auto defined_bitmap = static_cast<extract_bitmap_t>(::arrow::bit_util::FromLittleEndian(
303+
internal::GreaterThanBitmap(def_levels, batch_size, level_info.def_level - 1)));
304304

305305
if (has_repeated_parent) {
306306
// Greater than level_info.repeated_ancestor_def_level - 1 implies >= the
307307
// repeated_ancestor_def_level
308-
auto present_bitmap = static_cast<extract_bitmap_t>(internal::GreaterThanBitmap(
309-
def_levels, batch_size, level_info.repeated_ancestor_def_level - 1));
308+
auto present_bitmap = static_cast<extract_bitmap_t>(
309+
::arrow::bit_util::FromLittleEndian(internal::GreaterThanBitmap(
310+
def_levels, batch_size, level_info.repeated_ancestor_def_level - 1)));
310311
auto selected_bits = ExtractBits(defined_bitmap, present_bitmap);
311312
int64_t selected_count = ::arrow::bit_util::PopCount(present_bitmap);
312313
if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) {

cpp/src/parquet/test_util.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "arrow/extension_type.h"
3434
#include "arrow/io/memory.h"
3535
#include "arrow/testing/util.h"
36+
#include "arrow/util/endian.h"
3637
#include "arrow/util/float16.h"
3738

3839
#include "parquet/column_page.h"
@@ -319,8 +320,9 @@ class DataPageBuilder {
319320
encoder.Encode(static_cast<int>(levels.size()), levels.data());
320321

321322
int32_t rle_bytes = encoder.len();
323+
int32_t rle_bytes_le = ::arrow::bit_util::ToLittleEndian(rle_bytes);
322324
PARQUET_THROW_NOT_OK(
323-
sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes), sizeof(int32_t)));
325+
sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes_le), sizeof(int32_t)));
324326
PARQUET_THROW_NOT_OK(sink_->Write(encode_buffer.data(), rle_bytes));
325327
}
326328
};
@@ -835,7 +837,7 @@ inline void GenerateData<FLBA>(int num_values, FLBA* out, std::vector<uint8_t>*
835837
// ----------------------------------------------------------------------
836838
// Test utility functions for geometry
837839

838-
#if defined(ARROW_LITTLE_ENDIAN)
840+
#if ARROW_LITTLE_ENDIAN
839841
static constexpr uint8_t kWkbNativeEndianness = 0x01;
840842
#else
841843
static constexpr uint8_t kWkbNativeEndianness = 0x00;

0 commit comments

Comments
 (0)