Skip to content

Commit a37f5a3

Browse files
GH-48151: [C++][Parquet] Fix arrow-ipc-message-internal-test & arrow-acero-hash-join-node-test failures
1 parent 7cd2f2a commit a37f5a3

File tree

2 files changed

+52
-5
lines changed

2 files changed

+52
-5
lines changed

cpp/src/arrow/acero/hash_join.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "arrow/compute/row/row_encoder_internal.h"
3232
#include "arrow/util/logging_internal.h"
3333
#include "arrow/util/tracing_internal.h"
34+
#include "arrow/util/endian.h"
3435

3536
namespace arrow {
3637

@@ -306,12 +307,33 @@ class HashJoinBasicImpl : public HashJoinImpl {
306307

307308
size_t num_probed_rows = match.size() + no_match.size();
308309
if (mask.is_scalar()) {
309-
const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
310-
if (mask_scalar.is_valid && mask_scalar.value) {
311-
// All rows passed, nothing left to do
312-
return Status::OK();
310+
#if ARROW_LITTLE_ENDIAN
311+
const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
312+
if (mask_scalar.is_valid && mask_scalar.value) {
313+
// All rows passed, nothing left to do
314+
return Status::OK();
315+
#else
316+
// Check if the scalar is a BooleanScalar before casting
317+
if (mask.scalar()->type->id() == Type::BOOL) {
318+
const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
319+
if (mask_scalar.is_valid && mask_scalar.value) {
320+
// All rows passed, nothing left to do
321+
return Status::OK();
322+
} else {
323+
// Nothing passed, no_match becomes everything
324+
no_match.resize(num_probed_rows);
325+
std::iota(no_match.begin(), no_match.end(), 0);
326+
match_left.clear();
327+
match_right.clear();
328+
match.clear();
329+
return Status::OK();
330+
}
331+
#endif
313332
} else {
314-
// Nothing passed, no_match becomes everything
333+
// On Little-endian systems: Nothing passed, no_match becomes everything
334+
// On Big-endian systems:
335+
// If it's not a BooleanScalar (e.g., NullScalar), treat as false
336+
// This handles cases like literal(NullScalar()) in filter expressions
315337
no_match.resize(num_probed_rows);
316338
std::iota(no_match.begin(), no_match.end(), 0);
317339
match_left.clear();

cpp/src/arrow/ipc/message_internal_test.cc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "arrow/ipc/options.h"
2626
#include "arrow/testing/gtest_util.h"
2727
#include "arrow/util/key_value_metadata.h"
28+
#include "arrow/util/endian.h"
2829

2930
namespace arrow::ipc::internal {
3031

@@ -55,6 +56,7 @@ TEST(TestMessageInternal, TestByteIdentical) {
5556
ASSERT_OK(
5657
WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), &out_buffer));
5758

59+
#if ARROW_LITTLE_ENDIAN
5860
// This is example output from macOS+ARM+LLVM
5961
const uint8_t expected[] = {
6062
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00,
@@ -74,6 +76,29 @@ TEST(TestMessageInternal, TestByteIdentical) {
7476
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30, 0x00, 0x00, 0x08, 0x00,
7577
0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
7678
0x40, 0x00, 0x00, 0x00};
79+
#else
80+
// On Big-endian systems, 4 bytes are appended to indicate it as a BE system. Hence the
81+
// total size is 4 bytes more than the LE systems.
82+
// This is example output from Linux+s390x+GCC
83+
const uint8_t expected[] = {
84+
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00,
85+
0x08, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
86+
0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x06, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x0A, 0x00,
87+
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
88+
0x02, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xD8, 0xFF,
89+
0xFF, 0xFF, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
90+
0x6B, 0x65, 0x79, 0x5F, 0x32, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x05, 0x00,
91+
0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x32, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0C, 0x00,
92+
0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00,
93+
0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x5F, 0x76, 0x61,
94+
0x6C, 0x75, 0x65, 0x00, 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x00,
95+
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00,
96+
0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00,
97+
0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00,
98+
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30,
99+
0x00, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00,
100+
0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00};
101+
#endif
77102
Buffer expected_buffer(expected, sizeof(expected));
78103

79104
AssertBufferEqual(expected_buffer, *out_buffer);

0 commit comments

Comments
 (0)