@@ -81,10 +81,51 @@ logical_plan
8181physical_plan
828201)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
838302)--CoalesceBatchesExec: target_batch_size=8192
84- 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)]
84+ 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
858504)------DataSourceExec: partitions=1, partition_sizes=[1]
868605)------DataSourceExec: partitions=1, partition_sizes=[1]
8787
88+ statement ok
89+ set datafusion.explain.format = "tree";
90+
91+ # Tree explain should highlight null equality semantics
92+ query TT
93+ EXPLAIN SELECT t1.id AS t1_id, t2.id AS t2_id, t1.val, t2.val
94+ FROM t1
95+ JOIN t2 ON t1.val IS NOT DISTINCT FROM t2.val
96+ ----
97+ physical_plan
98+ 01)┌───────────────────────────┐
99+ 02)│ ProjectionExec │
100+ 03)│ -------------------- │
101+ 04)│ t1_id: id │
102+ 05)│ t2_id: id │
103+ 06)│ val: val │
104+ 07)└─────────────┬─────────────┘
105+ 08)┌─────────────┴─────────────┐
106+ 09)│ CoalesceBatchesExec │
107+ 10)│ -------------------- │
108+ 11)│ target_batch_size: │
109+ 12)│ 8192 │
110+ 13)└─────────────┬─────────────┘
111+ 14)┌─────────────┴─────────────┐
112+ 15)│ HashJoinExec │
113+ 16)│ -------------------- │
114+ 17)│ NullsEqual: true ├──────────────┐
115+ 18)│ │ │
116+ 19)│ on: (val = val) │ │
117+ 20)└─────────────┬─────────────┘ │
118+ 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
119+ 22)│ DataSourceExec ││ DataSourceExec │
120+ 23)│ -------------------- ││ -------------------- │
121+ 24)│ bytes: 288 ││ bytes: 288 │
122+ 25)│ format: memory ││ format: memory │
123+ 26)│ rows: 1 ││ rows: 1 │
124+ 27)└───────────────────────────┘└───────────────────────────┘
125+
126+ statement ok
127+ set datafusion.explain.format = "indent";
128+
88129# For nested expression comparision, it should still able to be converted to Hash Join
89130query IIII rowsort
90131SELECT t1.id AS t1_id, t2.id AS t2_id, t1.val, t2.val
@@ -108,7 +149,7 @@ logical_plan
108149physical_plan
10915001)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
11015102)--CoalesceBatchesExec: target_batch_size=8192
111- 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4]
152+ 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], projection=[id@0, val@1, id@3, val@4], NullsEqual: true
11215304)------CoalescePartitionsExec
11315405)--------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
11415506)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
@@ -139,7 +180,7 @@ logical_plan
139180physical_plan
14018101)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
14118202)--CoalesceBatchesExec: target_batch_size=8192
142- 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4]
183+ 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1.val + Int64(1)@2, t2.val + Int64(1)@2)], filter=CAST(val@0 AS Int64) % 3 IS DISTINCT FROM CAST(val@1 AS Int64) % 3, projection=[id@0, val@1, id@3, val@4], NullsEqual: true
14318404)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t1.val + Int64(1)]
14418505)--------DataSourceExec: partitions=1, partition_sizes=[1]
14518606)------ProjectionExec: expr=[id@0 as id, val@1 as val, CAST(val@1 AS Int64) + 1 as t2.val + Int64(1)]
@@ -201,11 +242,11 @@ logical_plan
201242physical_plan
20224301)ProjectionExec: expr=[id@0 as t1_id, id@2 as t2_id, val@1 as val, val@3 as val]
20324402)--CoalesceBatchesExec: target_batch_size=8192
204- 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4]
245+ 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@0, val@1)], projection=[id@1, val@2, id@3, val@4], NullsEqual: true
20524604)------DataSourceExec: partitions=1, partition_sizes=[1]
20624705)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
20724806)--------CoalesceBatchesExec: target_batch_size=8192
208- 07)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)]
249+ 07)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val@1, val@1)], NullsEqual: true
20925008)------------DataSourceExec: partitions=1, partition_sizes=[1]
21025109)------------DataSourceExec: partitions=1, partition_sizes=[1]
211252
@@ -246,7 +287,7 @@ JOIN t4 ON (t3.val1 IS NOT DISTINCT FROM t4.val1) AND (t3.val2 IS NOT DISTINCT F
24628701)ProjectionExec: expr=[id@0 as t3_id, id@3 as t4_id, val1@1 as val1, val1@4 as val1, val2@2 as val2, val2@5 as val2]
24728802)--CoalesceBatchesExec: target_batch_size=8192
24828902)--Inner Join: t3.val1 = t4.val1, t3.val2 = t4.val2
249- 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)]
290+ 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(val1@1, val1@1), (val2@2, val2@2)], NullsEqual: true
25029103)----TableScan: t3 projection=[id, val1, val2]
25129204)------DataSourceExec: partitions=1, partition_sizes=[1]
25229304)----TableScan: t4 projection=[id, val1, val2]
0 commit comments