From dc4cb9b2c2350fc74968acfa6644be25b615cbce Mon Sep 17 00:00:00 2001 From: Arenatlx <314806019@qq.com> Date: Thu, 2 Jan 2025 15:10:51 +0800 Subject: [PATCH] planner: leverage stats collection rule to get operator num (#58635) ref pingcap/tidb#51664 --- pkg/planner/cascades/memo/memo.go | 10 +- .../core/casetest/cascades/memo_test.go | 16 ++- .../cascades/testdata/cascades_suite_out.json | 117 ++++++++++++------ .../core/collect_column_stats_usage.go | 9 +- .../core/collect_column_stats_usage_test.go | 4 +- pkg/planner/core/rule_collect_plan_stats.go | 5 +- pkg/sessionctx/stmtctx/stmtctx.go | 3 + 7 files changed, 111 insertions(+), 53 deletions(-) diff --git a/pkg/planner/cascades/memo/memo.go b/pkg/planner/cascades/memo/memo.go index 0f6a9c8a46fd7..59da150eb9c9e 100644 --- a/pkg/planner/cascades/memo/memo.go +++ b/pkg/planner/cascades/memo/memo.go @@ -48,14 +48,18 @@ type Memo struct { } // NewMemo creates a new memo. -func NewMemo() *Memo { +func NewMemo(caps ...uint64) *Memo { + // default capacity is 4. + capacity := uint64(4) + if len(caps) > 1 { + capacity = caps[0] + } return &Memo{ groupIDGen: &GroupIDGenerator{id: 0}, groups: list.New(), groupID2Group: make(map[GroupID]*list.Element), hash2GlobalGroupExpr: hashmap.New[*GroupExpression, *GroupExpression]( - // todo: feel the operator count at the prev normalization rule. - 4, + capacity, func(a, b *GroupExpression) bool { return a.Equals(b) }, diff --git a/pkg/planner/core/casetest/cascades/memo_test.go b/pkg/planner/core/casetest/cascades/memo_test.go index 09bf65337e03c..fe1f868b7926c 100644 --- a/pkg/planner/core/casetest/cascades/memo_test.go +++ b/pkg/planner/core/casetest/cascades/memo_test.go @@ -50,8 +50,9 @@ func TestDeriveStats(t *testing.T) { p := parser.New() var input []string var output []struct { - SQL string - Str []string + SQL string + Str []string + OpNum uint64 } statsSuiteData := GetCascadesSuiteData() statsSuiteData.LoadTestCases(t, &input, &output) @@ -72,7 +73,7 @@ func TestDeriveStats(t *testing.T) { lp := p.(base.LogicalPlan) // after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase // of memo, we don't have to expect the upper operator's group cols passing down anymore. - mm := memo.NewMemo() + mm := memo.NewMemo(lp.SCtx().GetSessionVars().StmtCtx.OperatorNum) _, err = mm.Init(lp) require.Nil(t, err) // check the stats state in memo group. @@ -117,6 +118,7 @@ func TestDeriveStats(t *testing.T) { testdata.OnRecord(func() { output[i].SQL = tt output[i].Str = strs + output[i].OpNum = lp.SCtx().GetSessionVars().StmtCtx.OperatorNum }) require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt) } @@ -142,8 +144,9 @@ func TestGroupNDVCols(t *testing.T) { p := parser.New() var input []string var output []struct { - SQL string - Str []string + SQL string + Str []string + OpNum uint64 } statsSuiteData := GetCascadesSuiteData() statsSuiteData.LoadTestCases(t, &input, &output) @@ -163,7 +166,7 @@ func TestGroupNDVCols(t *testing.T) { lp := p.(base.LogicalPlan) // after stats derive is done, which means the up-down propagation of group ndv is done, in bottom-up building phase // of memo, we don't have to expect the upper operator's group cols passing down anymore. - mm := memo.NewMemo() + mm := memo.NewMemo(lp.SCtx().GetSessionVars().StmtCtx.OperatorNum) mm.Init(lp) // check the stats state in memo group. b := &bytes.Buffer{} @@ -207,6 +210,7 @@ func TestGroupNDVCols(t *testing.T) { testdata.OnRecord(func() { output[i].SQL = tt output[i].Str = strs + output[i].OpNum = lp.SCtx().GetSessionVars().StmtCtx.OperatorNum }) require.Equal(t, output[i].Str, strs, "case i:"+strconv.Itoa(i)+" "+tt) } diff --git a/pkg/planner/core/casetest/cascades/testdata/cascades_suite_out.json b/pkg/planner/core/casetest/cascades/testdata/cascades_suite_out.json index dba0440108677..c9545c4afdec9 100644 --- a/pkg/planner/core/casetest/cascades/testdata/cascades_suite_out.json +++ b/pkg/planner/core/casetest/cascades/testdata/cascades_suite_out.json @@ -8,7 +8,8 @@ "GID:1, GE:DataSource_1{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_2{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_3{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select * from t1, t2 where t1.a = t2.a and t1.b = t2.b", @@ -17,7 +18,8 @@ "GID:2, GE:DataSource_5{}, logic prop:{stats:{count 10, ColNDVs map[4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Join_9{GID:1, GID:2}, logic prop:{stats:{count 5.555555555555555, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Projection_8{GID:3}, logic prop:{stats:{count 5.555555555555555, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(1) from t1 where a > 0 group by a, b", @@ -25,7 +27,8 @@ "GID:1, GE:DataSource_10{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_12{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_13{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select count(1) from t1 where b > 0 group by a, b", @@ -33,7 +36,8 @@ "GID:1, GE:DataSource_14{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_16{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_17{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select count(1) from t1 where cos(a) > 0 group by a, b", @@ -41,7 +45,8 @@ "GID:1, GE:DataSource_18{}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs [{[1 2] 3.2}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_20{GID:1}, logic prop:{stats:{count 3.2, ColNDVs map[4:3.2], GroupNDVs [{[1 2] 3.2}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_21{GID:2}, logic prop:{stats:{count 3.2, ColNDVs map[4:3.2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select count(c3) from (select a as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1", @@ -49,7 +54,8 @@ "GID:1, GE:DataSource_22{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_24{GID:1}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_25{GID:2}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs []}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(c3) from (select a+b as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1", @@ -57,7 +63,8 @@ "GID:1, GE:DataSource_26{}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_28{GID:1}, logic prop:{stats:{count 2, ColNDVs map[6:2], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_29{GID:2}, logic prop:{stats:{count 2, ColNDVs map[6:2], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b > (select t2.b from t2 where t2.a = t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -68,7 +75,8 @@ "GID:4, GE:Apply_37{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 7:5 8:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_38{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_39{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 7 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b > (select t2.b from t2 where t2.a = t1.a)) tmp group by tmp.a, tmp.b", @@ -80,7 +88,8 @@ "GID:5, GE:Apply_46{GID:1, GID:4}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Aggregation_48{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:7, GE:Projection_49{GID:6}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 7 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -91,7 +100,8 @@ "GID:4, GE:Apply_58{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_59{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_60{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -102,7 +112,8 @@ "GID:4, GE:Apply_70{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_71{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_72{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) tmp group by tmp.a, tmp.b", @@ -113,7 +124,8 @@ "GID:4, GE:Apply_81{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_83{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_84{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) tmp group by tmp.a, tmp.b", @@ -124,7 +136,8 @@ "GID:4, GE:Apply_93{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_95{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_96{GID:5}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(1) from t1, t2 where t1.a = t2.a group by t1.a, t1.b", @@ -134,7 +147,8 @@ "GID:3, GE:Join_105{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_103{GID:3}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_104{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t1.a, t1.b", @@ -144,7 +158,8 @@ "GID:3, GE:Join_111{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_109{GID:3}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_110{GID:4}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t2.a, t2.b", @@ -154,7 +169,8 @@ "GID:3, GE:Join_117{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_115{GID:3}, logic prop:{stats:{count 3, ColNDVs map[7:3], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_116{GID:4}, logic prop:{stats:{count 3, ColNDVs map[7:3], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t1.a, t1.b", @@ -164,7 +180,8 @@ "GID:3, GE:Join_123{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 2:2 4:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_121{GID:3}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_122{GID:4}, logic prop:{stats:{count 2, ColNDVs map[7:2], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t2.a, t2.b", @@ -174,7 +191,8 @@ "GID:3, GE:Join_129{GID:1, GID:2}, logic prop:{stats:{count 16.666666666666668, ColNDVs map[1:2 4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_127{GID:3}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_128{GID:4}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -184,7 +202,8 @@ "GID:3, GE:Join_136{GID:1, GID:2}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_137{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_138{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -194,7 +213,8 @@ "GID:3, GE:Join_145{GID:1, GID:2}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_146{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_147{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b in (select t2.b from t2 where t2.a > t1.a)) tmp group by tmp.a, tmp.b", @@ -204,7 +224,8 @@ "GID:3, GE:Join_153{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_155{GID:3}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_156{GID:4}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 where t1.b not in (select t2.b from t2 where t2.a > t1.a)) tmp group by tmp.a, tmp.b", @@ -214,7 +235,8 @@ "GID:3, GE:Join_162{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:1.6 2:1.6], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_164{GID:3}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_165{GID:4}, logic prop:{stats:{count 1.6, ColNDVs map[7:1.6], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select * from t1 left join (select t2.a as a, t2.b as b, count(1) as cnt from t2 group by t2.a, t2.b) as tmp on t1.a = tmp.a and t1.b = tmp.b", @@ -224,7 +246,8 @@ "GID:3, GE:Aggregation_168{GID:2}, logic prop:{stats:{count 9, ColNDVs map[4:9 5:9 7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7,test.t2.a,test.t2.b] PKOrUK: [[test.t2.a,test.t2.b]] NullableUK: []}}", "GID:4, GE:Join_172{GID:1, GID:3}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5 7:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#7,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_171{GID:4}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 4:5 5:5 7:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b,Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from (select t1.a as a, t1.b as b from t1 limit 3) tmp group by tmp.a, tmp.b", @@ -233,7 +256,8 @@ "GID:2, GE:Limit_179{GID:1}, logic prop:{stats:{count 3, ColNDVs map[1:2 2:2], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Aggregation_176{GID:2}, logic prop:{stats:{count 2, ColNDVs map[4:2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Projection_177{GID:3}, logic prop:{stats:{count 2, ColNDVs map[4:2], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(tmp.a_sum) from (select t1.a as a, t1.b as b, sum(a) over() as a_sum from t1) tmp group by tmp.a, tmp.b", @@ -242,7 +266,8 @@ "GID:2, GE:Window_183{GID:1}, logic prop:{stats:{count 5, ColNDVs map[1:2 2:2 5:5], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#5] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Aggregation_185{GID:2}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Projection_186{GID:3}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 } ] }, @@ -255,7 +280,8 @@ "GID:1, GE:DataSource_2{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_3{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_4{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select * from t1, t2 where t1.a = t2.a and t1.b = t2.b", @@ -264,7 +290,8 @@ "GID:2, GE:DataSource_6{}, logic prop:{stats:{count 9, ColNDVs map[4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Join_10{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Projection_9{GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:3 5:3], GroupNDVs []}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(1) from t1 where a > 0 group by a, b", @@ -272,7 +299,8 @@ "GID:1, GE:DataSource_11{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_13{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_14{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select count(1) from t1 where b > 0 group by a, b", @@ -280,7 +308,8 @@ "GID:1, GE:DataSource_15{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_17{GID:1}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_18{GID:2}, logic prop:{stats:{count 4, ColNDVs map[4:4], GroupNDVs []}, schema:{Column: [Column#4] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 3 }, { "SQL": "select count(c3) from (select a as c1, b as c2, a+1 as c3 from t1) as tmp group by c2, c1", @@ -288,7 +317,8 @@ "GID:1, GE:DataSource_19{}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b] PKOrUK: [] NullableUK: []}}", "GID:2, GE:Aggregation_21{GID:1}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Projection_22{GID:2}, logic prop:{stats:{count 4, ColNDVs map[5:4], GroupNDVs []}, schema:{Column: [Column#5] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b > (select t2.b from t2 where t2.a = t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -299,7 +329,8 @@ "GID:4, GE:Apply_30{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 7:4 8:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_31{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_32{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 7 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -310,7 +341,8 @@ "GID:4, GE:Apply_40{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_41{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_42{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a = t1.a limit 3)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -321,7 +353,8 @@ "GID:4, GE:Apply_52{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Aggregation_53{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:6, GE:Projection_54{GID:5}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 6 }, { "SQL": "select count(1) from t1 left join t2 on t1.a = t2.a group by t1.a, t1.b", @@ -331,7 +364,8 @@ "GID:3, GE:Join_62{GID:1, GID:2}, logic prop:{stats:{count 12, ColNDVs map[1:2 2:2 4:3], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_60{GID:3}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_61{GID:4}, logic prop:{stats:{count 4, ColNDVs map[7:4], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(1) from t1 right join t2 on t1.a = t2.a group by t2.a, t2.b", @@ -341,7 +375,8 @@ "GID:3, GE:Join_68{GID:1, GID:2}, logic prop:{stats:{count 12, ColNDVs map[1:2 4:3 5:3], GroupNDVs [{[4 5] 9}]}, schema:{Column: [test.t1.a,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_66{GID:3}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_67{GID:4}, logic prop:{stats:{count 9, ColNDVs map[7:9], GroupNDVs []}, schema:{Column: [Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -351,7 +386,8 @@ "GID:3, GE:Join_75{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_76{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_77{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(tmp.cmp) from (select t1.a as a, t1.b as b, (t1.b not in (select t2.b from t2 where t2.a > t1.a)) as cmp from t1) tmp group by tmp.a, tmp.b", @@ -361,7 +397,8 @@ "GID:3, GE:Join_84{GID:1, GID:2}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 10:2], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#10] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Aggregation_85{GID:3}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_86{GID:4}, logic prop:{stats:{count 4, ColNDVs map[11:4], GroupNDVs []}, schema:{Column: [Column#11] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select * from t1 left join (select t2.a as a, t2.b as b, count(1) as cnt from t2 group by t2.a, t2.b) as tmp on t1.a = tmp.a and t1.b = tmp.b", @@ -371,7 +408,8 @@ "GID:3, GE:Aggregation_89{GID:2}, logic prop:{stats:{count 9, ColNDVs map[4:9 5:9 7:9], GroupNDVs [{[4 5] 9}]}, schema:{Column: [Column#7,test.t2.a,test.t2.b] PKOrUK: [[test.t2.a,test.t2.b]] NullableUK: []}}", "GID:4, GE:Join_93{GID:1, GID:3}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:4 5:4 7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#7,test.t2.a,test.t2.b] PKOrUK: [] NullableUK: []}}", "GID:5, GE:Projection_92{GID:4}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 4:4 5:4 7:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,test.t2.a,test.t2.b,Column#7] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 5 }, { "SQL": "select count(tmp.a_sum) from (select t1.a as a, t1.b as b, sum(a) over() as a_sum from t1) tmp group by tmp.a, tmp.b", @@ -380,7 +418,8 @@ "GID:2, GE:Window_97{GID:1}, logic prop:{stats:{count 4, ColNDVs map[1:2 2:2 5:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [test.t1.a,test.t1.b,Column#5] PKOrUK: [] NullableUK: []}}", "GID:3, GE:Aggregation_99{GID:2}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs [{[1 2] 4}]}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}", "GID:4, GE:Projection_100{GID:3}, logic prop:{stats:{count 4, ColNDVs map[6:4], GroupNDVs []}, schema:{Column: [Column#6] PKOrUK: [] NullableUK: []}}" - ] + ], + "OpNum": 4 } ] } diff --git a/pkg/planner/core/collect_column_stats_usage.go b/pkg/planner/core/collect_column_stats_usage.go index 45b9d9163be5b..60c9d0c96b355 100644 --- a/pkg/planner/core/collect_column_stats_usage.go +++ b/pkg/planner/core/collect_column_stats_usage.go @@ -58,6 +58,9 @@ type columnStatsUsageCollector struct { // tblID2PartitionIDs is used for tables with static pruning mode. // Note that we've no longer suggested to use static pruning mode. tblID2PartitionIDs map[int64][]int64 + + // operatorNum is the number of operators in the logical plan. + operatorNum uint64 } func newColumnStatsUsageCollector(histNeeded bool, enabledPlanCapture bool) *columnStatsUsageCollector { @@ -304,6 +307,7 @@ func (c *columnStatsUsageCollector) collectFromPlan(askedColGroups [][]*expressi c.updateColMap(col, []*expression.Column{x.SeedSchema.Columns[i]}) } } + c.operatorNum++ } // CollectColumnStatsUsage collects column stats usage from logical plan. @@ -312,17 +316,18 @@ func (c *columnStatsUsageCollector) collectFromPlan(askedColGroups [][]*expressi // First return value: predicate columns // Second return value: the visited table IDs(For partition table, we only record its global meta ID. The meta ID of each partition will be recorded in tblID2PartitionIDs) // Third return value: the visited partition IDs. Used for static partition pruning. -// Forth return value: the recorded asked column group for each datasource table, which will require collecting composite index for it's group ndv info. +// Forth return value: the number of operators in the logical plan. // TODO: remove the third return value when the static partition pruning is totally deprecated. func CollectColumnStatsUsage(lp base.LogicalPlan, histNeeded bool) ( map[model.TableItemID]bool, *intset.FastIntSet, map[int64][]int64, + uint64, ) { collector := newColumnStatsUsageCollector(histNeeded, lp.SCtx().GetSessionVars().IsPlanReplayerCaptureEnabled()) collector.collectFromPlan(nil, lp) if collector.collectVisitedTable { recordTableRuntimeStats(lp.SCtx(), collector.visitedtbls) } - return collector.predicateCols, collector.visitedPhysTblIDs, collector.tblID2PartitionIDs + return collector.predicateCols, collector.visitedPhysTblIDs, collector.tblID2PartitionIDs, collector.operatorNum } diff --git a/pkg/planner/core/collect_column_stats_usage_test.go b/pkg/planner/core/collect_column_stats_usage_test.go index 95442ff609413..bf5194ef2789b 100644 --- a/pkg/planner/core/collect_column_stats_usage_test.go +++ b/pkg/planner/core/collect_column_stats_usage_test.go @@ -80,7 +80,7 @@ func getStatsLoadItem(t *testing.T, is infoschema.InfoSchema, item model.StatsLo } func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expected []string, comment string) { - tblColIDs, _, _ := CollectColumnStatsUsage(lp, false) + tblColIDs, _, _, _ := CollectColumnStatsUsage(lp, false) cols := make([]string, 0, len(tblColIDs)) for tblColID := range tblColIDs { col := getColumnName(t, is, tblColID, comment) @@ -91,7 +91,7 @@ func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema, } func checkColumnStatsUsageForStatsLoad(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expectedCols []string, expectedParts map[string][]string, comment string) { - predicateCols, _, expandedPartitions := CollectColumnStatsUsage(lp, true) + predicateCols, _, expandedPartitions, _ := CollectColumnStatsUsage(lp, true) loadItems := make([]model.StatsLoadItem, 0, len(predicateCols)) for tblColID, fullLoad := range predicateCols { loadItems = append(loadItems, model.StatsLoadItem{TableItemID: tblColID, FullLoad: fullLoad}) diff --git a/pkg/planner/core/rule_collect_plan_stats.go b/pkg/planner/core/rule_collect_plan_stats.go index d31dfa60a045f..ed169ef322d59 100644 --- a/pkg/planner/core/rule_collect_plan_stats.go +++ b/pkg/planner/core/rule_collect_plan_stats.go @@ -45,7 +45,10 @@ func (c *CollectPredicateColumnsPoint) Optimize(_ context.Context, plan base.Log } syncWait := plan.SCtx().GetSessionVars().StatsLoadSyncWait.Load() histNeeded := syncWait > 0 - predicateColumns, visitedPhysTblIDs, tid2pids := CollectColumnStatsUsage(plan, histNeeded) + predicateColumns, visitedPhysTblIDs, tid2pids, opNum := CollectColumnStatsUsage(plan, histNeeded) + // opNum is collected via the common stats load rule, some operators may be cleaned like proj for later rule. + // so opNum is not that accurate, but it's enough for the memo hashmap's init capacity. + plan.SCtx().GetSessionVars().StmtCtx.OperatorNum = opNum if len(predicateColumns) > 0 { plan.SCtx().UpdateColStatsUsage(maps.Keys(predicateColumns)) } diff --git a/pkg/sessionctx/stmtctx/stmtctx.go b/pkg/sessionctx/stmtctx/stmtctx.go index 9b544620649f5..ed49a643b21a5 100644 --- a/pkg/sessionctx/stmtctx/stmtctx.go +++ b/pkg/sessionctx/stmtctx/stmtctx.go @@ -436,6 +436,9 @@ type StatementContext struct { // and the `for share` execution is enabled by `tidb_enable_noop_functions`, no locks should be // acquired in this case. ForShareLockEnabledByNoop bool + + // OperatorNum is used to record the number of operators in the current logical plan. + OperatorNum uint64 } // DefaultStmtErrLevels is the default error levels for statement