Skip to content

Commit 14656f5

Browse files
authored
feat(small): Display NullEquality in join executor's EXPLAIN output (#17664)
* Clarify null-equal explain expectations * Format null equality display strings * fix test * review: more concise message * review: more concise message
1 parent 75c7720 commit 14656f5

File tree

7 files changed

+87
-18
lines changed

7 files changed

+87
-18
lines changed

datafusion/core/tests/physical_optimizer/projection_pushdown.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,7 @@ fn test_hash_join_after_projection() -> Result<()> {
12811281
&JoinType::Inner,
12821282
None,
12831283
PartitionMode::Auto,
1284-
NullEquality::NullEqualsNull,
1284+
NullEquality::NullEqualsNothing,
12851285
)?);
12861286
let projection: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new(
12871287
vec![

datafusion/core/tests/physical_optimizer/test_utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ pub fn hash_join_exec(
236236
join_type,
237237
None,
238238
PartitionMode::Partitioned,
239-
NullEquality::NullEqualsNull,
239+
NullEquality::NullEqualsNothing,
240240
)?))
241241
}
242242

datafusion/physical-plan/src/joins/hash_join/exec.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,12 @@ impl DisplayAs for HashJoinExec {
725725
} else {
726726
"".to_string()
727727
};
728+
let display_null_equality =
729+
if matches!(self.null_equality(), NullEquality::NullEqualsNull) {
730+
", NullsEqual: true"
731+
} else {
732+
""
733+
};
728734
let on = self
729735
.on
730736
.iter()
@@ -733,8 +739,13 @@ impl DisplayAs for HashJoinExec {
733739
.join(", ");
734740
write!(
735741
f,
736-
"HashJoinExec: mode={:?}, join_type={:?}, on=[{}]{}{}",
737-
self.mode, self.join_type, on, display_filter, display_projections,
742+
"HashJoinExec: mode={:?}, join_type={:?}, on=[{}]{}{}{}",
743+
self.mode,
744+
self.join_type,
745+
on,
746+
display_filter,
747+
display_projections,
748+
display_null_equality,
738749
)
739750
}
740751
DisplayFormatType::TreeRender => {
@@ -753,6 +764,10 @@ impl DisplayAs for HashJoinExec {
753764

754765
writeln!(f, "on={on}")?;
755766

767+
if matches!(self.null_equality(), NullEquality::NullEqualsNull) {
768+
writeln!(f, "NullsEqual: true")?;
769+
}
770+
756771
if let Some(filter) = self.filter.as_ref() {
757772
writeln!(f, "filter={filter}")?;
758773
}

datafusion/physical-plan/src/joins/sort_merge_join/exec.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -351,15 +351,22 @@ impl DisplayAs for SortMergeJoinExec {
351351
.map(|(c1, c2)| format!("({c1}, {c2})"))
352352
.collect::<Vec<String>>()
353353
.join(", ");
354+
let display_null_equality =
355+
if matches!(self.null_equality(), NullEquality::NullEqualsNull) {
356+
", NullsEqual: true"
357+
} else {
358+
""
359+
};
354360
write!(
355361
f,
356-
"SortMergeJoin: join_type={:?}, on=[{}]{}",
362+
"SortMergeJoin: join_type={:?}, on=[{}]{}{}",
357363
self.join_type,
358364
on,
359365
self.filter.as_ref().map_or("".to_string(), |f| format!(
360366
", filter={}",
361367
f.expression()
362-
))
368+
)),
369+
display_null_equality,
363370
)
364371
}
365372
DisplayFormatType::TreeRender => {
@@ -375,7 +382,13 @@ impl DisplayAs for SortMergeJoinExec {
375382
if self.join_type() != JoinType::Inner {
376383
writeln!(f, "join_type={:?}", self.join_type)?;
377384
}
378-
writeln!(f, "on={on}")
385+
writeln!(f, "on={on}")?;
386+
387+
if matches!(self.null_equality(), NullEquality::NullEqualsNull) {
388+
writeln!(f, "NullsEqual: true")?;
389+
}
390+
391+
Ok(())
379392
}
380393
}
381394
}

datafusion/sqllogictest/test_files/create_external_table.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,4 +302,4 @@ CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION '../../parquet-test
302302
statement error DataFusion error: SQL error: ParserError\("'IF NOT EXISTS' cannot coexist with 'REPLACE'"\)
303303
CREATE OR REPLACE EXTERNAL TABLE IF NOT EXISTS t_conflict(c1 int)
304304
STORED AS CSV
305-
LOCATION 'foo.csv';
305+
LOCATION 'foo.csv';

datafusion/sqllogictest/test_files/join_is_not_distinct_from.slt

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,51 @@ logical_plan
8181
physical_plan
8282
01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
8383
02)--CoalesceBatchesExec: target_batch_size=8192
84-
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)]
84+
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
8585
04)------DataSourceExec: partitions=1, partition_sizes=[1]
8686
05)------DataSourceExec: partitions=1, partition_sizes=[1]
8787

88+
statement ok
89+
set datafusion.explain.format = "tree";
90+
91+
# Tree explain should highlight null equality semantics
92+
query TT
93+
EXPLAIN SELECT t1.id AS t1_id, t2.id AS t2_id, t1.val, t2.val
94+
FROM t1
95+
JOIN t2 ON t1.val IS NOT DISTINCT FROM t2.val
96+
----
97+
physical_plan
98+
01)┌───────────────────────────┐
99+
02)│ ProjectionExec │
100+
03)│ -------------------- │
101+
04)│ t1_id: id │
102+
05)│ t2_id: id │
103+
06)│ val: val │
104+
07)└─────────────┬─────────────┘
105+
08)┌─────────────┴─────────────┐
106+
09)│ CoalesceBatchesExec │
107+
10)│ -------------------- │
108+
11)│ target_batch_size: │
109+
12)│ 8192 │
110+
13)└─────────────┬─────────────┘
111+
14)┌─────────────┴─────────────┐
112+
15)│ HashJoinExec │
113+
16)│ -------------------- │
114+
17)│ NullsEqual: true ├──────────────┐
115+
18)│ │ │
116+
19)│ on: (val = val) │ │
117+
20)└─────────────┬─────────────┘ │
118+
21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
119+
22)│ DataSourceExec ││ DataSourceExec │
120+
23)│ -------------------- ││ -------------------- │
121+
24)│ bytes: 288 ││ bytes: 288 │
122+
25)│ format: memory ││ format: memory │
123+
26)│ rows: 1 ││ rows: 1 │
124+
27)└───────────────────────────┘└───────────────────────────┘
125+
126+
statement ok
127+
set datafusion.explain.format = "indent";
128+
88129
# For nested expression comparision, it should still able to be converted to Hash Join
89130
query IIII rowsort
90131
SELECT t1.id AS t1_id, t2.id AS t2_id, t1.val, t2.val
@@ -108,7 +149,7 @@ logical_plan
108149
physical_plan
109150
01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
110151
02)--CoalesceBatchesExec: target_batch_size=8192
111-
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4]
152+
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4], NullsEqual: true
112153
04)------CoalescePartitionsExec
113154
05)--------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
114155
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
@@ -139,7 +180,7 @@ logical_plan
139180
physical_plan
140181
01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
141182
02)--CoalesceBatchesExec: target_batch_size=8192
142-
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4]
183+
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4], NullsEqual: true
143184
04)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
144185
05)--------DataSourceExec: partitions=1, partition_sizes=[1]
145186
06)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
@@ -201,11 +242,11 @@ logical_plan
201242
physical_plan
202243
01)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
203244
02)--CoalesceBatchesExec: target_batch_size=8192
204-
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4]
245+
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4], NullsEqual: true
205246
04)------DataSourceExec: partitions=1, partition_sizes=[1]
206247
05)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
207248
06)--------CoalesceBatchesExec: target_batch_size=8192
208-
07)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)]
249+
07)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
209250
08)------------DataSourceExec: partitions=1, partition_sizes=[1]
210251
09)------------DataSourceExec: partitions=1, partition_sizes=[1]
211252

@@ -246,7 +287,7 @@ JOIN t4 ON (t3.val1 IS NOT DISTINCT FROM t4.val1) AND (t3.val2 IS NOT DISTINCT F
246287
01)ProjectionExec: expr=[id@0 as t3_id, id@3 as t4_id, val1@1 as val1, val1@4 as val1, val2@2 as val2, val2@5 as val2]
247288
02)--CoalesceBatchesExec: target_batch_size=8192
248289
02)--Inner Join: t3.val1 = t4.val1, t3.val2 = t4.val2
249-
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)]
290+
03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)], NullsEqual: true
250291
03)----TableScan: t3 projection=[id, val1, val2]
251292
04)------DataSourceExec: partitions=1, partition_sizes=[1]
252293
04)----TableScan: t4 projection=[id, val1, val2]

datafusion/sqllogictest/test_files/union.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ logical_plan
308308
physical_plan
309309
01)UnionExec
310310
02)--CoalesceBatchesExec: target_batch_size=2
311-
03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(id@0, CAST(t2.id AS Int32)@2), (name@1, name@1)]
311+
03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(id@0, CAST(t2.id AS Int32)@2), (name@1, name@1)], NullsEqual: true
312312
04)------CoalescePartitionsExec
313313
05)--------AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
314314
06)----------CoalesceBatchesExec: target_batch_size=2
@@ -321,7 +321,7 @@ physical_plan
321321
13)----------DataSourceExec: partitions=1, partition_sizes=[1]
322322
14)--ProjectionExec: expr=[CAST(id@0 AS Int32) as id, name@1 as name]
323323
15)----CoalesceBatchesExec: target_batch_size=2
324-
16)------HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(CAST(t2.id AS Int32)@2, id@0), (name@1, name@1)], projection=[id@0, name@1]
324+
16)------HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(CAST(t2.id AS Int32)@2, id@0), (name@1, name@1)], projection=[id@0, name@1], NullsEqual: true
325325
17)--------CoalescePartitionsExec
326326
18)----------ProjectionExec: expr=[id@0 as id, name@1 as name, CAST(id@0 AS Int32) as CAST(t2.id AS Int32)]
327327
19)------------AggregateExec: mode=FinalPartitioned, gby=[id@0 as id, name@1 as name], aggr=[]
@@ -378,7 +378,7 @@ logical_plan
378378
physical_plan
379379
01)UnionExec
380380
02)--CoalesceBatchesExec: target_batch_size=2
381-
03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)]
381+
03)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
382382
04)------CoalescePartitionsExec
383383
05)--------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
384384
06)----------CoalesceBatchesExec: target_batch_size=2
@@ -389,7 +389,7 @@ physical_plan
389389
11)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
390390
12)--------DataSourceExec: partitions=1, partition_sizes=[1]
391391
13)--CoalesceBatchesExec: target_batch_size=2
392-
14)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)]
392+
14)----HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(name@0, name@0)], NullsEqual: true
393393
15)------CoalescePartitionsExec
394394
16)--------AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
395395
17)----------CoalesceBatchesExec: target_batch_size=2

0 commit comments

Comments
 (0)