Skip to content

Commit 7dbe358

Browse files
GH-48210 Fix Bloom Filter logic to enable Parquet DB support on s390x
1 parent 2fb2f79 commit 7dbe358

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

cpp/src/parquet/bloom_filter.cc

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include "generated/parquet_types.h"
2727

28+
#include "arrow/util/endian.h"
2829
#include "parquet/bloom_filter.h"
2930
#include "parquet/exception.h"
3031
#include "parquet/thrift_internal.h"
@@ -203,13 +204,14 @@ bool BlockSplitBloomFilter::FindHash(uint64_t hash) const {
203204
const uint32_t bucket_index =
204205
static_cast<uint32_t>(((hash >> 32) * (num_bytes_ / kBytesPerFilterBlock)) >> 32);
205206
const uint32_t key = static_cast<uint32_t>(hash);
206-
const uint32_t* bitset32 = reinterpret_cast<const uint32_t*>(data_->data());
207+
const uint32_t* raw_bitset32 = reinterpret_cast<const uint32_t*>(data_->data());
207208

208209
for (int i = 0; i < kBitsSetPerBlock; ++i) {
210+
const uint32_t bitset_word = ::arrow::bit_util::FromLittleEndian(
211+
raw_bitset32[kBitsSetPerBlock * bucket_index + i]);
209212
// Calculate mask for key in the given bitset.
210213
const uint32_t mask = UINT32_C(0x1) << ((key * SALT[i]) >> 27);
211-
if (ARROW_PREDICT_FALSE(0 ==
212-
(bitset32[kBitsSetPerBlock * bucket_index + i] & mask))) {
214+
if (ARROW_PREDICT_FALSE(0 == (bitset_word & mask))) {
213215
return false;
214216
}
215217
}
@@ -220,12 +222,16 @@ void BlockSplitBloomFilter::InsertHashImpl(uint64_t hash) {
220222
const uint32_t bucket_index =
221223
static_cast<uint32_t>(((hash >> 32) * (num_bytes_ / kBytesPerFilterBlock)) >> 32);
222224
const uint32_t key = static_cast<uint32_t>(hash);
223-
uint32_t* bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data());
225+
uint32_t* raw_bitset32 = reinterpret_cast<uint32_t*>(data_->mutable_data());
224226

225227
for (int i = 0; i < kBitsSetPerBlock; i++) {
228+
const int word_index = bucket_index * kBitsSetPerBlock + i;
229+
uint32_t bitset_word = ::arrow::bit_util::FromLittleEndian(raw_bitset32[word_index]);
230+
226231
// Calculate mask for key in the given bitset.
227232
const uint32_t mask = UINT32_C(0x1) << ((key * SALT[i]) >> 27);
228-
bitset32[bucket_index * kBitsSetPerBlock + i] |= mask;
233+
bitset_word |= mask;
234+
raw_bitset32[word_index] = ::arrow::bit_util::ToLittleEndian(bitset_word);
229235
}
230236
}
231237

0 commit comments

Comments
 (0)