2525
2626#include " generated/parquet_types.h"
2727
28+ #include " arrow/util/endian.h"
2829#include " parquet/bloom_filter.h"
2930#include " parquet/exception.h"
3031#include " parquet/thrift_internal.h"
@@ -203,13 +204,14 @@ bool BlockSplitBloomFilter::FindHash(uint64_t hash) const {
203204 const uint32_t bucket_index =
204205 static_cast <uint32_t >(((hash >> 32 ) * (num_bytes_ / kBytesPerFilterBlock )) >> 32 );
205206 const uint32_t key = static_cast <uint32_t >(hash);
206- const uint32_t * bitset32 = reinterpret_cast <const uint32_t *>(data_->data ());
207+ const uint32_t * raw_bitset32 = reinterpret_cast <const uint32_t *>(data_->data ());
207208
208209 for (int i = 0 ; i < kBitsSetPerBlock ; ++i) {
210+ const uint32_t bitset_word = ::arrow::bit_util::FromLittleEndian (
211+ raw_bitset32[kBitsSetPerBlock * bucket_index + i]);
209212 // Calculate mask for key in the given bitset.
210213 const uint32_t mask = UINT32_C (0x1 ) << ((key * SALT[i]) >> 27 );
211- if (ARROW_PREDICT_FALSE (0 ==
212- (bitset32[kBitsSetPerBlock * bucket_index + i] & mask))) {
214+ if (ARROW_PREDICT_FALSE (0 == (bitset_word & mask))) {
213215 return false ;
214216 }
215217 }
@@ -220,12 +222,16 @@ void BlockSplitBloomFilter::InsertHashImpl(uint64_t hash) {
220222 const uint32_t bucket_index =
221223 static_cast <uint32_t >(((hash >> 32 ) * (num_bytes_ / kBytesPerFilterBlock )) >> 32 );
222224 const uint32_t key = static_cast <uint32_t >(hash);
223- uint32_t * bitset32 = reinterpret_cast <uint32_t *>(data_->mutable_data ());
225+ uint32_t * raw_bitset32 = reinterpret_cast <uint32_t *>(data_->mutable_data ());
224226
225227 for (int i = 0 ; i < kBitsSetPerBlock ; i++) {
228+ const int word_index = bucket_index * kBitsSetPerBlock + i;
229+ uint32_t bitset_word = ::arrow::bit_util::FromLittleEndian (raw_bitset32[word_index]);
230+
226231 // Calculate mask for key in the given bitset.
227232 const uint32_t mask = UINT32_C (0x1 ) << ((key * SALT[i]) >> 27 );
228- bitset32[bucket_index * kBitsSetPerBlock + i] |= mask;
233+ bitset_word |= mask;
234+ raw_bitset32[word_index] = ::arrow::bit_util::ToLittleEndian (bitset_word);
229235 }
230236}
231237
0 commit comments