Skip to content

Commit 8fde2d6

Browse files
authoredOct 29, 2024
planner: set min for high risk plan steps (pingcap#56631)
close pingcap#55126
1 parent 1c386db commit 8fde2d6

File tree

8 files changed

+81
-70
lines changed

8 files changed

+81
-70
lines changed
 

‎pkg/planner/cardinality/row_size.go

+12-8
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ func GetTableAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols
5454
size += 8 /* row_id length */
5555
}
5656
}
57+
// Avoid errors related to size less than zero
58+
size = max(0, size)
5759
return
5860
}
5961

@@ -80,6 +82,8 @@ func GetAvgRowSize(ctx planctx.PlanContext, coll *statistics.HistColl, cols []*e
8082
}
8183
}
8284
}
85+
// Avoid errors related to size less than zero
86+
size = max(0, size)
8387
if sessionVars.EnableChunkRPC && !isForScan {
8488
// Add 1/8 byte for each column's nullBitMap byte.
8589
return size + float64(len(cols))/8
@@ -107,7 +111,7 @@ func GetAvgRowSizeDataInDiskByRows(coll *statistics.HistColl, cols []*expression
107111
}
108112
}
109113
// Add 8 byte for each column's size record. See `DataInDiskByRows` for details.
110-
return size + float64(8*len(cols))
114+
return max(0, size+float64(8*len(cols)))
111115
}
112116

113117
// AvgColSize is the average column size of the histogram. These sizes are derived from function `encode`
@@ -126,7 +130,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
126130
histCount := c.TotalRowCount()
127131
notNullRatio := 1.0
128132
if histCount > 0 {
129-
notNullRatio = 1.0 - float64(c.NullCount)/histCount
133+
notNullRatio = max(0, 1.0-float64(c.NullCount)/histCount)
130134
}
131135
switch c.Histogram.Tp.GetType() {
132136
case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
@@ -137,7 +141,7 @@ func AvgColSize(c *statistics.Column, count int64, isKey bool) float64 {
137141
}
138142
}
139143
// Keep two decimal place.
140-
return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
144+
return max(0, math.Round(float64(c.TotColSize)/float64(count)*100)/100)
141145
}
142146

143147
// AvgColSizeChunkFormat is the average column size of the histogram. These sizes are derived from function `Encode`
@@ -147,17 +151,17 @@ func AvgColSizeChunkFormat(c *statistics.Column, count int64) float64 {
147151
return 0
148152
}
149153
fixedLen := chunk.GetFixedLen(c.Histogram.Tp)
150-
if fixedLen != -1 {
154+
if fixedLen >= 0 {
151155
return float64(fixedLen)
152156
}
153157
// Keep two decimal place.
154158
// Add 8 bytes for unfixed-len type's offsets.
155159
// Minus Log2(avgSize) for unfixed-len type LEN.
156160
avgSize := float64(c.TotColSize) / float64(count)
157161
if avgSize < 1 {
158-
return math.Round(avgSize*100)/100 + 8
162+
return max(0, math.Round(avgSize*100)/100) + 8
159163
}
160-
return math.Round((avgSize-math.Log2(avgSize))*100)/100 + 8
164+
return max(0, math.Round((avgSize-math.Log2(avgSize))*100)/100) + 8
161165
}
162166

163167
// AvgColSizeDataInDiskByRows is the average column size of the histogram. These sizes are derived
@@ -172,14 +176,14 @@ func AvgColSizeDataInDiskByRows(c *statistics.Column, count int64) float64 {
172176
notNullRatio = 1.0 - float64(c.NullCount)/histCount
173177
}
174178
size := chunk.GetFixedLen(c.Histogram.Tp)
175-
if size != -1 {
179+
if size >= 0 {
176180
return float64(size) * notNullRatio
177181
}
178182
// Keep two decimal place.
179183
// Minus Log2(avgSize) for unfixed-len type LEN.
180184
avgSize := float64(c.TotColSize) / float64(count)
181185
if avgSize < 1 {
182-
return math.Round((avgSize)*100) / 100
186+
return max(0, math.Round((avgSize)*100)/100)
183187
}
184188
return math.Round((avgSize-math.Log2(avgSize))*100) / 100
185189
}

‎pkg/planner/core/casetest/partition/testdata/partition_pruner_out.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -470,12 +470,12 @@
470470
"Plan": [
471471
"Projection 0.00 root test_partition.t1.id, test_partition.t1.a, test_partition.t1.b, test_partition.t2.id, test_partition.t2.a, test_partition.t2.b",
472472
"└─HashJoin 0.00 root CARTESIAN inner join",
473-
" ├─TableReader(Build) 0.00 root partition:p1 data:Selection",
474-
" │ └─Selection 0.00 cop[tikv] eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
475-
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
476-
" └─TableReader(Probe) 0.01 root partition:p0 data:Selection",
477-
" └─Selection 0.01 cop[tikv] eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
478-
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
473+
" ├─TableReader(Build) 0.01 root partition:p0 data:Selection",
474+
" │ └─Selection 0.01 cop[tikv] eq(test_partition.t1.id, 7), or(eq(test_partition.t1.a, 1), and(eq(test_partition.t1.a, 3), in(test_partition.t1.b, 3, 5)))",
475+
" │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
476+
" └─TableReader(Probe) 0.00 root partition:p1 data:Selection",
477+
" └─Selection 0.00 cop[tikv] eq(test_partition.t2.b, 7), eq(test_partition.t2.id, 7), in(test_partition.t2.a, 6, 7, 8)",
478+
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
479479
],
480480
"IndexPlan": [
481481
"HashJoin 0.03 root CARTESIAN inner join",

‎pkg/planner/core/casetest/planstats/testdata/plan_stats_suite_out.json

+10-10
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,16 @@
130130
"Query": "explain format = brief select * from t join tp partition (p0) join t2 where t.a < 10 and t.b = tp.c and t2.a > 10 and t2.a = tp.c",
131131
"Result": [
132132
"HashJoin 0.33 root inner join, equal:[eq(test.tp.c, test.t2.a)]",
133-
"├─IndexJoin(Build) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
134-
"├─TableReader(Build) 0.33 root data:Selection",
135-
"│ │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
136-
"│ │ └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
137-
"│ └─IndexLookUp(Probe) 0.33 root partition:p0 ",
138-
"├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
139-
"│ │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
140-
" └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]",
141-
"└─TableReader(Probe) 1.00 root data:TableRangeScan",
142-
" └─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]"
133+
"├─TableReader(Build) 1.00 root data:TableRangeScan",
134+
"└─TableRangeScan 1.00 cop[tikv] table:t2 range:(10,+inf], keep order:false, stats:partial[a:allEvicted]",
135+
"└─IndexJoin(Probe) 0.33 root inner join, inner:IndexLookUp, outer key:test.t.b, inner key:test.tp.c, equal cond:eq(test.t.b, test.tp.c)",
136+
" ├─TableReader(Build) 0.33 root data:Selection",
137+
" │ └─Selection 0.33 cop[tikv] gt(test.t.b, 10), not(isnull(test.t.b))",
138+
" └─TableRangeScan 1.00 cop[tikv] table:t range:[-inf,10), keep order:false, stats:partial[idx:allEvicted, a:allEvicted, b:allEvicted]",
139+
" └─IndexLookUp(Probe) 0.33 root partition:p0 ",
140+
" ├─Selection(Build) 0.33 cop[tikv] gt(test.tp.c, 10), not(isnull(test.tp.c))",
141+
" │ └─IndexRangeScan 0.50 cop[tikv] table:tp, index:ic(c) range: decided by [eq(test.tp.c, test.t.b)], keep order:false, stats:partial[c:allEvicted]",
142+
" └─TableRowIDScan(Probe) 0.33 cop[tikv] table:tp keep order:false, stats:partial[c:allEvicted]"
143143
]
144144
}
145145
]

‎pkg/planner/core/casetest/testdata/integration_suite_out.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@
165165
{
166166
"SQL": "explain format = 'verbose' select (2) in (select /*+ read_from_storage(tiflash[t1]) */ count(*) from t1) from (select t.b < (select /*+ read_from_storage(tiflash[t2]) */ t.b from t2 limit 1 ) from t3 t) t; -- we do generate the agg pushed-down plan of mpp, but cost-cmp failed",
167167
"Plan": [
168-
"HashJoin_17 3.00 32770.77 root CARTESIAN left outer semi join",
168+
"HashJoin_17 3.00 32781.07 root CARTESIAN left outer semi join",
169169
"├─Selection_22(Build) 0.80 31149.25 root eq(2, Column#18)",
170170
"│ └─StreamAgg_29 1.00 31099.35 root funcs:count(1)->Column#18",
171171
"│ └─TableReader_41 3.00 30949.65 root MppVersion: 2, data:ExchangeSender_40",

‎pkg/planner/core/plan_cost_ver1.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1251,10 +1251,10 @@ func getCardinality(operator base.PhysicalPlan, costFlag uint64) float64 {
12511251
if actualProbeCnt == 0 {
12521252
return 0
12531253
}
1254-
return getOperatorActRows(operator) / float64(actualProbeCnt)
1254+
return max(0, getOperatorActRows(operator)/float64(actualProbeCnt))
12551255
}
12561256
rows := operator.StatsCount()
1257-
if rows == 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
1257+
if rows <= 0 && operator.SCtx().GetSessionVars().CostModelVersion == modelVer2 {
12581258
// 0 est-row can lead to 0 operator cost which makes plan choice unstable.
12591259
rows = 1
12601260
}

‎pkg/planner/core/plan_cost_ver2.go

+34-27
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option
103103
return p.PlanCostVer2, nil
104104
}
105105

106+
const (
107+
// MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches
108+
// zero, all plan choices result in a low cost - making it difficult to differentiate plan choices.
109+
// A low value of 1.0 here is used for most (non probe acceses) to reduce this risk.
110+
MinNumRows = 1.0
111+
// MinRowSize provides a minimum column length to ensure that any adjustment or calculation
112+
// in costing does not go below this value. 2.0 is used as a reasonable lowest column length.
113+
MinRowSize = 2.0
114+
// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
115+
TiFlashStartupRowPenalty = 10000
116+
// MaxPenaltyRowCount applies a penalty for high risk scans
117+
MaxPenaltyRowCount = 1000
118+
)
119+
106120
// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
107121
// plan-cost = rows * log2(row-size) * scan-factor
108122
// log2(row-size) is from experiments.
@@ -112,23 +126,14 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
112126
}
113127

114128
rows := getCardinality(p, option.CostFlag)
115-
rowSize := math.Max(getAvgRowSize(p.StatsInfo(), p.schema.Columns), 2.0) // consider all index columns
129+
rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns) // consider all index columns
116130
scanFactor := getTaskScanFactorVer2(p, kv.TiKV, taskType)
117131

118132
p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
119133
p.PlanCostInit = true
120134
return p.PlanCostVer2, nil
121135
}
122136

123-
const (
124-
// MinRowSize provides a minimum to avoid underestimation
125-
MinRowSize = 2.0
126-
// TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
127-
TiFlashStartupRowPenalty = 10000
128-
// MaxPenaltyRowCount applies a penalty for high risk scans
129-
MaxPenaltyRowCount = 1000
130-
)
131-
132137
// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
133138
// plan-cost = rows * log2(row-size) * scan-factor
134139
// log2(row-size) is from experiments.
@@ -137,17 +142,19 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
137142
return p.PlanCostVer2, nil
138143
}
139144

140-
rows := getCardinality(p, option.CostFlag)
141-
142145
var columns []*expression.Column
143146
if p.StoreType == kv.TiKV { // Assume all columns for TiKV
144147
columns = p.tblCols
145148
} else { // TiFlash
146149
columns = p.schema.Columns
147150
}
151+
rows := getCardinality(p, option.CostFlag)
148152
rowSize := getAvgRowSize(p.StatsInfo(), columns)
149-
// Ensure rowSize has a reasonable minimum value to avoid underestimation
150-
rowSize = math.Max(rowSize, MinRowSize)
153+
// Ensure rows and rowSize have a reasonable minimum value to avoid underestimation
154+
if !p.isChildOfIndexLookUp {
155+
rows = max(MinNumRows, rows)
156+
rowSize = max(rowSize, MinRowSize)
157+
}
151158

152159
scanFactor := getTaskScanFactorVer2(p, p.StoreType, taskType)
153160
p.PlanCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
@@ -177,7 +184,7 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
177184

178185
shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
179186
if shouldApplyPenalty {
180-
newRowCount := math.Min(MaxPenaltyRowCount, math.Max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
187+
newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
181188
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
182189
}
183190
}
@@ -235,7 +242,7 @@ func (p *PhysicalTableReader) GetPlanCostVer2(taskType property.TaskType, option
235242
}
236243

237244
rows := getCardinality(p.tablePlan, option.CostFlag)
238-
rowSize := getAvgRowSize(p.StatsInfo(), p.schema.Columns)
245+
rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.schema.Columns))
239246
netFactor := getTaskNetFactorVer2(p, taskType)
240247
concurrency := float64(p.SCtx().GetSessionVars().DistSQLScanConcurrency())
241248
childType := property.CopSingleReadTaskType
@@ -395,8 +402,8 @@ func (p *PhysicalSort) GetPlanCostVer2(taskType property.TaskType, option *optim
395402
return p.PlanCostVer2, nil
396403
}
397404

398-
rows := math.Max(getCardinality(p.Children()[0], option.CostFlag), 1)
399-
rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
405+
rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
406+
rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
400407
cpuFactor := getTaskCPUFactorVer2(p, taskType)
401408
memFactor := getTaskMemFactorVer2(p, taskType)
402409
diskFactor := defaultVer2Factors.TiDBDisk
@@ -443,14 +450,14 @@ func (p *PhysicalTopN) GetPlanCostVer2(taskType property.TaskType, option *optim
443450
return p.PlanCostVer2, nil
444451
}
445452

446-
rows := getCardinality(p.Children()[0], option.CostFlag)
453+
rows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
447454
n := max(1, float64(p.Count+p.Offset))
448455
if n > 10000 {
449456
// It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`.
450457
// For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged.
451458
n = min(n, rows)
452459
}
453-
rowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
460+
rowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
454461
cpuFactor := getTaskCPUFactorVer2(p, taskType)
455462
memFactor := getTaskMemFactorVer2(p, taskType)
456463

@@ -499,9 +506,9 @@ func (p *PhysicalHashAgg) GetPlanCostVer2(taskType property.TaskType, option *op
499506
return p.PlanCostVer2, nil
500507
}
501508

502-
inputRows := getCardinality(p.Children()[0], option.CostFlag)
503-
outputRows := getCardinality(p, option.CostFlag)
504-
outputRowSize := getAvgRowSize(p.StatsInfo(), p.Schema().Columns)
509+
inputRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
510+
outputRows := max(MinNumRows, getCardinality(p, option.CostFlag))
511+
outputRowSize := max(MinRowSize, getAvgRowSize(p.StatsInfo(), p.Schema().Columns))
505512
cpuFactor := getTaskCPUFactorVer2(p, taskType)
506513
memFactor := getTaskMemFactorVer2(p, taskType)
507514
concurrency := float64(p.SCtx().GetSessionVars().HashAggFinalConcurrency())
@@ -531,8 +538,8 @@ func (p *PhysicalMergeJoin) GetPlanCostVer2(taskType property.TaskType, option *
531538
return p.PlanCostVer2, nil
532539
}
533540

534-
leftRows := getCardinality(p.Children()[0], option.CostFlag)
535-
rightRows := getCardinality(p.Children()[1], option.CostFlag)
541+
leftRows := max(MinNumRows, getCardinality(p.Children()[0], option.CostFlag))
542+
rightRows := max(MinNumRows, getCardinality(p.Children()[1], option.CostFlag))
536543
cpuFactor := getTaskCPUFactorVer2(p, taskType)
537544

538545
filterCost := costusage.SumCostVer2(filterCostVer2(option, leftRows, p.LeftConditions, cpuFactor),
@@ -570,9 +577,9 @@ func (p *PhysicalHashJoin) GetPlanCostVer2(taskType property.TaskType, option *o
570577
build, probe = probe, build
571578
buildFilters, probeFilters = probeFilters, buildFilters
572579
}
573-
buildRows := getCardinality(build, option.CostFlag)
580+
buildRows := max(MinNumRows, getCardinality(build, option.CostFlag))
574581
probeRows := getCardinality(probe, option.CostFlag)
575-
buildRowSize := getAvgRowSize(build.StatsInfo(), build.Schema().Columns)
582+
buildRowSize := max(MinRowSize, getAvgRowSize(build.StatsInfo(), build.Schema().Columns))
576583
tidbConcurrency := float64(p.Concurrency)
577584
mppConcurrency := float64(3) // TODO: remove this empirical value
578585
cpuFactor := getTaskCPUFactorVer2(p, taskType)

0 commit comments

Comments
 (0)
Please sign in to comment.