Skip to content

Commit ad31c99

Browse files
terry1purcellti-chi-bot
authored andcommitted
This is an automated cherry-pick of pingcap#56631
Signed-off-by: ti-chi-bot <[email protected]>
1 parent 4af46a5 commit ad31c99

File tree

8 files changed

+3123
-7
lines changed

8 files changed

+3123
-7
lines changed

cmd/explaintest/r/explain_complex.result

+20
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ UNIQUE KEY org_employee_position_pk (hotel_id,user_id,position_id)
246246
set tidb_cost_model_version=2;
247247
explain format = 'brief' SELECT d.id, d.ctx, d.name, d.left_value, d.right_value, d.depth, d.leader_id, d.status, d.created_on, d.updated_on FROM org_department AS d LEFT JOIN org_position AS p ON p.department_id = d.id AND p.status = 1000 LEFT JOIN org_employee_position AS ep ON ep.position_id = p.id AND ep.status = 1000 WHERE (d.ctx = 1 AND (ep.user_id = 62 OR d.id = 20 OR d.id = 20) AND d.status = 1000) GROUP BY d.id ORDER BY d.left_value;
248248
id estRows task access object operator info
249+
<<<<<<< HEAD:cmd/explaintest/r/explain_complex.result
249250
Sort 1.00 root test.org_department.left_value
250251
└─HashAgg 1.00 root group by:test.org_department.id, funcs:firstrow(test.org_department.id)->test.org_department.id, funcs:firstrow(test.org_department.ctx)->test.org_department.ctx, funcs:firstrow(test.org_department.name)->test.org_department.name, funcs:firstrow(test.org_department.left_value)->test.org_department.left_value, funcs:firstrow(test.org_department.right_value)->test.org_department.right_value, funcs:firstrow(test.org_department.depth)->test.org_department.depth, funcs:firstrow(test.org_department.leader_id)->test.org_department.leader_id, funcs:firstrow(test.org_department.status)->test.org_department.status, funcs:firstrow(test.org_department.created_on)->test.org_department.created_on, funcs:firstrow(test.org_department.updated_on)->test.org_department.updated_on
251252
└─Selection 0.01 root or(eq(test.org_employee_position.user_id, 62), or(eq(test.org_department.id, 20), eq(test.org_department.id, 20)))
@@ -263,6 +264,25 @@ Sort 1.00 root test.org_department.left_value
263264
└─TableReader(Probe) 9.99 root data:Selection
264265
└─Selection 9.99 cop[tikv] eq(test.org_employee_position.status, 1000), not(isnull(test.org_employee_position.position_id))
265266
└─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo
267+
=======
268+
Sort 1.00 root explain_complex.org_department.left_value
269+
└─HashAgg 1.00 root group by:explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.id)->explain_complex.org_department.id, funcs:firstrow(explain_complex.org_department.ctx)->explain_complex.org_department.ctx, funcs:firstrow(explain_complex.org_department.name)->explain_complex.org_department.name, funcs:firstrow(explain_complex.org_department.left_value)->explain_complex.org_department.left_value, funcs:firstrow(explain_complex.org_department.right_value)->explain_complex.org_department.right_value, funcs:firstrow(explain_complex.org_department.depth)->explain_complex.org_department.depth, funcs:firstrow(explain_complex.org_department.leader_id)->explain_complex.org_department.leader_id, funcs:firstrow(explain_complex.org_department.status)->explain_complex.org_department.status, funcs:firstrow(explain_complex.org_department.created_on)->explain_complex.org_department.created_on, funcs:firstrow(explain_complex.org_department.updated_on)->explain_complex.org_department.updated_on
270+
└─Selection 0.01 root or(eq(explain_complex.org_employee_position.user_id, 62), or(eq(explain_complex.org_department.id, 20), eq(explain_complex.org_department.id, 20)))
271+
└─HashJoin 0.02 root left outer join, equal:[eq(explain_complex.org_position.id, explain_complex.org_employee_position.position_id)]
272+
├─TableReader(Build) 9.99 root data:Selection
273+
│ └─Selection 9.99 cop[tikv] eq(explain_complex.org_employee_position.status, 1000), not(isnull(explain_complex.org_employee_position.position_id))
274+
│ └─TableFullScan 10000.00 cop[tikv] table:ep keep order:false, stats:pseudo
275+
└─IndexJoin(Probe) 0.01 root left outer join, inner:IndexLookUp, outer key:explain_complex.org_department.id, inner key:explain_complex.org_position.department_id, equal cond:eq(explain_complex.org_department.id, explain_complex.org_position.department_id)
276+
├─IndexLookUp(Build) 0.01 root
277+
│ ├─IndexRangeScan(Build) 10.00 cop[tikv] table:d, index:org_department_ctx_index(ctx) range:[1,1], keep order:false, stats:pseudo
278+
│ └─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_department.status, 1000)
279+
│ └─TableRowIDScan 10.00 cop[tikv] table:d keep order:false, stats:pseudo
280+
└─IndexLookUp(Probe) 0.01 root
281+
├─Selection(Build) 12.50 cop[tikv] not(isnull(explain_complex.org_position.department_id))
282+
│ └─IndexRangeScan 12.51 cop[tikv] table:p, index:org_position_department_id_index(department_id) range: decided by [eq(explain_complex.org_position.department_id, explain_complex.org_department.id)], keep order:false, stats:pseudo
283+
└─Selection(Probe) 0.01 cop[tikv] eq(explain_complex.org_position.status, 1000)
284+
└─TableRowIDScan 12.50 cop[tikv] table:p keep order:false, stats:pseudo
285+
>>>>>>> 8fde2d6fa2b (planner: set min for high risk plan steps (#56631)):tests/integrationtest/r/explain_complex.result
266286
set tidb_cost_model_version=1;
267287
create table test.Tab_A (id int primary key,bid int,cid int,name varchar(20),type varchar(20),num int,amt decimal(11,2));
268288
create table test.Tab_B (id int primary key,name varchar(20));

pkg/planner/cardinality/row_size.go

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// Copyright 2023 PingCAP, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cardinality
16+
17+
import (
18+
"math"
19+
20+
"github.com/pingcap/tidb/pkg/expression"
21+
"github.com/pingcap/tidb/pkg/kv"
22+
"github.com/pingcap/tidb/pkg/parser/mysql"
23+
"github.com/pingcap/tidb/pkg/planner/planctx"
24+
"github.com/pingcap/tidb/pkg/statistics"
25+
"github.com/pingcap/tidb/pkg/tablecodec"
26+
"github.com/pingcap/tidb/pkg/util/chunk"
27+
)
28+
29+
const pseudoColSize = 8.0
30+
31+
// GetIndexAvgRowSize computes average row size for a index scan.
32+
func GetIndexAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*expression.Column, isUnique bool) (size float64) {
33+
size = GetAvgRowSize(ctx, coll, cols, true, true)
34+
// tablePrefix(1) + tableID(8) + indexPrefix(2) + indexID(8)
35+
// Because the cols for index scan always contain the handle, so we don't add the rowID here.
36+
size += 19
37+
if !isUnique {
38+
// add the len("_")
39+
size++
40+
}
41+
return
42+
}
43+
44+
// GetTableAvgRowSize computes average row size for a table scan, exclude the index key-value pairs.
45+
func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*expression.Column, storeType kv.StoreType, handleInCols bool) (size float64) {
46+
size = GetAvgRowSize(ctx, coll, cols, false, true)
47+
switch storeType {
48+
case kv.TiKV:
49+
size += tablecodec.RecordRowKeyLen
50+
// The `cols` for TiKV always contain the row_id, so prefix row size subtract its length.
51+
size -= 8
52+
case kv.TiFlash:
53+
if !handleInCols {
54+
size += 8 /* row_id length */
55+
}
56+
}
57+
// Avoid errors related to size less than zero
58+
size = max(0, size)
59+
return
60+
}
61+
62+
// GetAvgRowSize computes average row size for given columns.
63+
func GetAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*expression.Column, isEncodedKey bool, isForScan bool) (size float64) {
64+
sessionVars := ctx.GetSessionVars()
65+
if coll.Pseudo || coll.ColNum() == 0 || coll.RealtimeCount == 0 {
66+
size = pseudoColSize * float64(len(cols))
67+
} else {
68+
for _, col := range cols {
69+
colHist := coll.GetCol(col.UniqueID)
70+
// Normally this would not happen, it is for compatibility with old version stats which
71+
// does not include TotColSize.
72+
if colHist == nil || (!colHist.IsHandle && colHist.TotColSize == 0 && (colHist.NullCount != coll.RealtimeCount)) {
73+
size += pseudoColSize
74+
continue
75+
}
76+
// We differentiate if the column is encoded as key or value, because the resulted size
77+
// is different.
78+
if sessionVars.EnableChunkRPC && !isForScan {
79+
size += AvgColSizeChunkFormat(colHist, coll.RealtimeCount)
80+
} else {
81+
size += AvgColSize(colHist, coll.RealtimeCount, isEncodedKey)
82+
}
83+
}
84+
}
85+
// Avoid errors related to size less than zero
86+
size = max(0, size)
87+
if sessionVars.EnableChunkRPC && !isForScan {
88+
// Add 1/8 byte for each column's nullBitMap byte.
89+
return size + float64(len(cols))/8
90+
}
91+
// Add 1 byte for each column's flag byte. See `encode` for details.
92+
return size + float64(len(cols))
93+
}
94+
95+
// GetAvgRowSizeDataInDiskByRows computes average row size for given columns.
96+
func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression.Column) (size float64) {
97+
if coll.Pseudo || coll.ColNum() == 0 || coll.RealtimeCount == 0 {
98+
for _, col := range cols {
99+
size += float64(chunk.EstimateTypeWidth(col.GetStaticType()))
100+
}
101+
} else {
102+
for _, col := range cols {
103+
colHist := coll.GetCol(col.UniqueID)
104+
// Normally this would not happen, it is for compatibility with old version stats which
105+
// does not include TotColSize.
106+
if colHist == nil || (!colHist.IsHandle && colHist.TotColSize == 0 && (colHist.NullCount != coll.RealtimeCount)) {
107+
size += float64(chunk.EstimateTypeWidth(col.GetStaticType()))
108+
continue
109+
}
110+
size += AvgColSizeDataInDiskByRows(colHist, coll.RealtimeCount)
111+
}
112+
}
113+
// Add 8 byte for each column's size record. See `DataInDiskByRows` for details.
114+
return max(0, size+float64(8*len(cols)))
115+
}
116+
117+
// AvgColSize is the average column size of the histogram. These sizes are derived from function `encode`
118+
// and `Datum::ConvertTo`, so we need to update them if those 2 functions are changed.
119+
func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
120+
if count == 0 {
121+
return 0
122+
}
123+
// Note that, if the handle column is encoded as value, instead of key, i.e,
124+
// when the handle column is in a unique index, the real column size may be
125+
// smaller than 8 because it is encoded using `EncodeVarint`. Since we don't
126+
// know the exact value size now, use 8 as approximation.
127+
if c.IsHandle {
128+
return 8
129+
}
130+
histCount := c.TotalRowCount()
131+
notNullRatio := 1.0
132+
if histCount > 0 {
133+
notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount)
134+
}
135+
switch c.Histogram.Tp.GetType() {
136+
case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
137+
return 8 * notNullRatio
138+
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeYear, mysql.TypeEnum, mysql.TypeBit, mysql.TypeSet:
139+
if isKey {
140+
return 8 * notNullRatio
141+
}
142+
}
143+
// Keep two decimal place.
144+
return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100)
145+
}
146+
147+
// AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode`
148+
// and `DecodeToChunk`, so we need to update them if those 2 functions are changed.
149+
func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
150+
if count == 0 {
151+
return 0
152+
}
153+
fixedLen := chunk.GetFixedLen(c.Histogram.Tp)
154+
if fixedLen >= 0 {
155+
return float64(fixedLen)
156+
}
157+
// Keep two decimal place.
158+
// Add 8 bytes for unfixed-len type's offsets.
159+
// Minus Log2(avgSize) for unfixed-len type LEN.
160+
avgSize := float64(c.TotColSize) / float64(count)
161+
if avgSize < 1 {
162+
return max(0, math.Round(avgSize*100)/100) + 8
163+
}
164+
return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8
165+
}
166+
167+
// AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived
168+
// from `chunk.DataInDiskByRows` so we need to update them if those 2 functions are changed.
169+
func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 {
170+
if count == 0 {
171+
return 0
172+
}
173+
histCount := c.TotalRowCount()
174+
notNullRatio := 1.0
175+
if histCount > 0 {
176+
notNullRatio = 1.0 - float64(c.NullCount)/histCount
177+
}
178+
size := chunk.GetFixedLen(c.Histogram.Tp)
179+
if size >= 0 {
180+
return float64(size) * notNullRatio
181+
}
182+
// Keep two decimal place.
183+
// Minus Log2(avgSize) for unfixed-len type LEN.
184+
avgSize := float64(c.TotColSize) / float64(count)
185+
if avgSize < 1 {
186+
return max(0, math.Round((avgSize)*100)/100)
187+
}
188+
return math.Round((avgSize-math.Log2(avgSize))*100) / 100
189+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
[
2+
{
3+
"Name": "TestCollectDependingVirtualCols",
4+
"Cases": [
5+
{
6+
"TableName": "t",
7+
"InputColNames": [
8+
"a",
9+
"b"
10+
],
11+
"OutputColNames": []
12+
},
13+
{
14+
"TableName": "t",
15+
"InputColNames": [
16+
"c"
17+
],
18+
"OutputColNames": [
19+
"_v$_ic_char_0",
20+
"_v$_ic_signed_0",
21+
"_v$_ic_unsigned_0"
22+
]
23+
},
24+
{
25+
"TableName": "t",
26+
"InputColNames": [
27+
"b",
28+
"c"
29+
],
30+
"OutputColNames": [
31+
"_v$_ic_char_0",
32+
"_v$_ic_signed_0",
33+
"_v$_ic_unsigned_0"
34+
]
35+
},
36+
{
37+
"TableName": "t1",
38+
"InputColNames": [
39+
"a"
40+
],
41+
"OutputColNames": [
42+
"vab"
43+
]
44+
},
45+
{
46+
"TableName": "t1",
47+
"InputColNames": [
48+
"b"
49+
],
50+
"OutputColNames": [
51+
"_v$_ib_0",
52+
"vab",
53+
"vvc"
54+
]
55+
},
56+
{
57+
"TableName": "t1",
58+
"InputColNames": [
59+
"c"
60+
],
61+
"OutputColNames": [
62+
"_v$_icvab_0",
63+
"vc"
64+
]
65+
},
66+
{
67+
"TableName": "t1",
68+
"InputColNames": [
69+
"vab"
70+
],
71+
"OutputColNames": [
72+
"_v$_icvab_0",
73+
"_v$_ivvcvab_0",
74+
"vvabvvc"
75+
]
76+
},
77+
{
78+
"TableName": "t1",
79+
"InputColNames": [
80+
"vab",
81+
"c"
82+
],
83+
"OutputColNames": [
84+
"_v$_icvab_0",
85+
"_v$_ivvcvab_0",
86+
"vc",
87+
"vvabvvc"
88+
]
89+
},
90+
{
91+
"TableName": "t1",
92+
"InputColNames": [
93+
"vc",
94+
"c",
95+
"vvc"
96+
],
97+
"OutputColNames": [
98+
"_v$_icvab_0",
99+
"_v$_ivvcvab_0",
100+
"vvabvvc"
101+
]
102+
}
103+
]
104+
},
105+
{
106+
"Name": "TestPartialStatsInExplain",
107+
"Cases": [
108+
{
109+
"Query": "explain format = brief select * from tp where b = 10",
110+
"Result": [
111+
"TableReader 0.01 root partition:all data:Selection",
112+
"└─Selection 0.01 cop[tikv] eq(test.tp.b, 10)",
113+
" └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[b:allEvicted]"
114+
]
115+
},
116+
{
117+
"Query": "explain format = brief select * from t join tp where tp.a = 10 and t.b = tp.c",
118+
"Result": [
119+
"Projection 1.00 root test.t.a, test.t.b, test.t.c, test.tp.a, test.tp.b, test.tp.c",
120+
"└─HashJoin 1.00 root inner join, equal:[eq(test.tp.c, test.t.b)]",
121+
" ├─TableReader(Build) 1.00 root partition:p1 data:Selection",
122+
" │ └─Selection 1.00 cop[tikv] eq(test.tp.a, 10), not(isnull(test.tp.c))",
123+
" │ └─TableFullScan 6.00 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
124+
" └─TableReader(Probe) 3.00 root data:Selection",
125+
" └─Selection 3.00 cop[tikv] not(isnull(test.t.b))",
126+
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]"
127+
]
128+
},
129+
{
130+
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
131+
"Result": [
132+
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
133+
"├─TableReader(Build) 1.00 root data:TableRangeScan",
134+
"│ └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]",
135+
"└─IndexJoin(Probe) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
136+
" ├─TableReader(Build) 0.33 root data:Selection",
137+
" │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
138+
" │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
139+
" └─IndexLookUp(Probe) 0.33 root partition:p0 ",
140+
" ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
141+
" │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
142+
" └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]"
143+
]
144+
}
145+
]
146+
},
147+
{
148+
"Name": "TestPlanStatsLoadForCTE",
149+
"Cases": [
150+
{
151+
"Query": "explain format= brief with cte(x, y) as (select d + 1, b from t where c > 1) select * from cte where x < 3",
152+
"Result": [
153+
"Projection 1.60 root plus(test.t.d, 1)->Column#12, test.t.b",
154+
"└─TableReader 1.60 root data:Selection",
155+
" └─Selection 1.60 cop[tikv] gt(test.t.c, 1), lt(plus(test.t.d, 1), 3)",
156+
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false"
157+
]
158+
}
159+
]
160+
}
161+
]

0 commit comments

Comments
 (0)