@@ -103,6 +103,20 @@ func (p *PhysicalProjection) GetPlanCostVer2(taskType property.TaskType, option
103
103
return p .PlanCostVer2 , nil
104
104
}
105
105
106
+ const (
107
+ // MinNumRows provides a minimum to avoid underestimation. As selectivity estimation approaches
108
+ // zero, all plan choices result in a low cost - making it difficult to differentiate plan choices.
109
+ // A low value of 1.0 here is used for most (non probe acceses) to reduce this risk.
110
+ MinNumRows = 1.0
111
+ // MinRowSize provides a minimum column length to ensure that any adjustment or calculation
112
+ // in costing does not go below this value. 2.0 is used as a reasonable lowest column length.
113
+ MinRowSize = 2.0
114
+ // TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
115
+ TiFlashStartupRowPenalty = 10000
116
+ // MaxPenaltyRowCount applies a penalty for high risk scans
117
+ MaxPenaltyRowCount = 1000
118
+ )
119
+
106
120
// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
107
121
// plan-cost = rows * log2(row-size) * scan-factor
108
122
// log2(row-size) is from experiments.
@@ -112,23 +126,14 @@ func (p *PhysicalIndexScan) GetPlanCostVer2(taskType property.TaskType, option *
112
126
}
113
127
114
128
rows := getCardinality (p , option .CostFlag )
115
- rowSize := math . Max ( getAvgRowSize (p .StatsInfo (), p .schema .Columns ), 2.0 ) // consider all index columns
129
+ rowSize := getAvgRowSize (p .StatsInfo (), p .schema .Columns ) // consider all index columns
116
130
scanFactor := getTaskScanFactorVer2 (p , kv .TiKV , taskType )
117
131
118
132
p .PlanCostVer2 = scanCostVer2 (option , rows , rowSize , scanFactor )
119
133
p .PlanCostInit = true
120
134
return p .PlanCostVer2 , nil
121
135
}
122
136
123
- const (
124
- // MinRowSize provides a minimum to avoid underestimation
125
- MinRowSize = 2.0
126
- // TiFlashStartupRowPenalty applies a startup penalty for TiFlash scan to encourage TiKV usage for small scans
127
- TiFlashStartupRowPenalty = 10000
128
- // MaxPenaltyRowCount applies a penalty for high risk scans
129
- MaxPenaltyRowCount = 1000
130
- )
131
-
132
137
// GetPlanCostVer2 returns the plan-cost of this sub-plan, which is:
133
138
// plan-cost = rows * log2(row-size) * scan-factor
134
139
// log2(row-size) is from experiments.
@@ -137,17 +142,19 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
137
142
return p .PlanCostVer2 , nil
138
143
}
139
144
140
- rows := getCardinality (p , option .CostFlag )
141
-
142
145
var columns []* expression.Column
143
146
if p .StoreType == kv .TiKV { // Assume all columns for TiKV
144
147
columns = p .tblCols
145
148
} else { // TiFlash
146
149
columns = p .schema .Columns
147
150
}
151
+ rows := getCardinality (p , option .CostFlag )
148
152
rowSize := getAvgRowSize (p .StatsInfo (), columns )
149
- // Ensure rowSize has a reasonable minimum value to avoid underestimation
150
- rowSize = math .Max (rowSize , MinRowSize )
153
+ // Ensure rows and rowSize have a reasonable minimum value to avoid underestimation
154
+ if ! p .isChildOfIndexLookUp {
155
+ rows = max (MinNumRows , rows )
156
+ rowSize = max (rowSize , MinRowSize )
157
+ }
151
158
152
159
scanFactor := getTaskScanFactorVer2 (p , p .StoreType , taskType )
153
160
p .PlanCostVer2 = scanCostVer2 (option , rows , rowSize , scanFactor )
@@ -177,7 +184,7 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
177
184
178
185
shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate )
179
186
if shouldApplyPenalty {
180
- newRowCount := math .Min (MaxPenaltyRowCount , math . Max (float64 (tblColHists .ModifyCount ), float64 (tblColHists .RealtimeCount )))
187
+ newRowCount := math .Min (MaxPenaltyRowCount , max (float64 (tblColHists .ModifyCount ), float64 (tblColHists .RealtimeCount )))
181
188
p .PlanCostVer2 = costusage .SumCostVer2 (p .PlanCostVer2 , scanCostVer2 (option , newRowCount , rowSize , scanFactor ))
182
189
}
183
190
}
@@ -235,7 +242,7 @@ func (p *PhysicalTableReader) GetPlanCostVer2(taskType property.TaskType, option
235
242
}
236
243
237
244
rows := getCardinality (p .tablePlan , option .CostFlag )
238
- rowSize := getAvgRowSize (p .StatsInfo (), p .schema .Columns )
245
+ rowSize := max ( MinRowSize , getAvgRowSize (p .StatsInfo (), p .schema .Columns ) )
239
246
netFactor := getTaskNetFactorVer2 (p , taskType )
240
247
concurrency := float64 (p .SCtx ().GetSessionVars ().DistSQLScanConcurrency ())
241
248
childType := property .CopSingleReadTaskType
@@ -395,8 +402,8 @@ func (p *PhysicalSort) GetPlanCostVer2(taskType property.TaskType, option *optim
395
402
return p .PlanCostVer2 , nil
396
403
}
397
404
398
- rows := math . Max ( getCardinality (p .Children ()[0 ], option .CostFlag ), 1 )
399
- rowSize := getAvgRowSize (p .StatsInfo (), p .Schema ().Columns )
405
+ rows := max ( MinNumRows , getCardinality (p .Children ()[0 ], option .CostFlag ))
406
+ rowSize := max ( MinRowSize , getAvgRowSize (p .StatsInfo (), p .Schema ().Columns ) )
400
407
cpuFactor := getTaskCPUFactorVer2 (p , taskType )
401
408
memFactor := getTaskMemFactorVer2 (p , taskType )
402
409
diskFactor := defaultVer2Factors .TiDBDisk
@@ -443,14 +450,14 @@ func (p *PhysicalTopN) GetPlanCostVer2(taskType property.TaskType, option *optim
443
450
return p .PlanCostVer2 , nil
444
451
}
445
452
446
- rows := getCardinality (p .Children ()[0 ], option .CostFlag )
453
+ rows := max ( MinNumRows , getCardinality (p .Children ()[0 ], option .CostFlag ) )
447
454
n := max (1 , float64 (p .Count + p .Offset ))
448
455
if n > 10000 {
449
456
// It's only used to prevent some extreme cases, e.g. `select * from t order by a limit 18446744073709551615`.
450
457
// For normal cases, considering that `rows` may be under-estimated, better to keep `n` unchanged.
451
458
n = min (n , rows )
452
459
}
453
- rowSize := getAvgRowSize (p .StatsInfo (), p .Schema ().Columns )
460
+ rowSize := max ( MinRowSize , getAvgRowSize (p .StatsInfo (), p .Schema ().Columns ) )
454
461
cpuFactor := getTaskCPUFactorVer2 (p , taskType )
455
462
memFactor := getTaskMemFactorVer2 (p , taskType )
456
463
@@ -499,9 +506,9 @@ func (p *PhysicalHashAgg) GetPlanCostVer2(taskType property.TaskType, option *op
499
506
return p .PlanCostVer2 , nil
500
507
}
501
508
502
- inputRows := getCardinality (p .Children ()[0 ], option .CostFlag )
503
- outputRows := getCardinality (p , option .CostFlag )
504
- outputRowSize := getAvgRowSize (p .StatsInfo (), p .Schema ().Columns )
509
+ inputRows := max ( MinNumRows , getCardinality (p .Children ()[0 ], option .CostFlag ) )
510
+ outputRows := max ( MinNumRows , getCardinality (p , option .CostFlag ) )
511
+ outputRowSize := max ( MinRowSize , getAvgRowSize (p .StatsInfo (), p .Schema ().Columns ) )
505
512
cpuFactor := getTaskCPUFactorVer2 (p , taskType )
506
513
memFactor := getTaskMemFactorVer2 (p , taskType )
507
514
concurrency := float64 (p .SCtx ().GetSessionVars ().HashAggFinalConcurrency ())
@@ -531,8 +538,8 @@ func (p *PhysicalMergeJoin) GetPlanCostVer2(taskType property.TaskType, option *
531
538
return p .PlanCostVer2 , nil
532
539
}
533
540
534
- leftRows := getCardinality (p .Children ()[0 ], option .CostFlag )
535
- rightRows := getCardinality (p .Children ()[1 ], option .CostFlag )
541
+ leftRows := max ( MinNumRows , getCardinality (p .Children ()[0 ], option .CostFlag ) )
542
+ rightRows := max ( MinNumRows , getCardinality (p .Children ()[1 ], option .CostFlag ) )
536
543
cpuFactor := getTaskCPUFactorVer2 (p , taskType )
537
544
538
545
filterCost := costusage .SumCostVer2 (filterCostVer2 (option , leftRows , p .LeftConditions , cpuFactor ),
@@ -570,9 +577,9 @@ func (p *PhysicalHashJoin) GetPlanCostVer2(taskType property.TaskType, option *o
570
577
build , probe = probe , build
571
578
buildFilters , probeFilters = probeFilters , buildFilters
572
579
}
573
- buildRows := getCardinality (build , option .CostFlag )
580
+ buildRows := max ( MinNumRows , getCardinality (build , option .CostFlag ) )
574
581
probeRows := getCardinality (probe , option .CostFlag )
575
- buildRowSize := getAvgRowSize (build .StatsInfo (), build .Schema ().Columns )
582
+ buildRowSize := max ( MinRowSize , getAvgRowSize (build .StatsInfo (), build .Schema ().Columns ) )
576
583
tidbConcurrency := float64 (p .Concurrency )
577
584
mppConcurrency := float64 (3 ) // TODO: remove this empirical value
578
585
cpuFactor := getTaskCPUFactorVer2 (p , taskType )
0 commit comments