Skip to content

Commit 0d7061d

Browse files
committed
sql/opt: implement canary full statistics rollout with configurable window
This commit implements the core logic for canary statistics rollout, allowing gradual deployment of newly collected full statistics. Previously, all queries would immediately use the most recent full statistics, which could cause performance regressions if the new full statistics were inaccurate. The implementation adds a `CanaryWindowSize` field in table descriptors and catalog interfaces to define the canary period, along with logic in the statistics builder to skip "canary" statistics (the latest stats within the canary window) when not using the canary path. The cluster setting `sql.stats.canary_fraction` controls what percentage of queries use canary statistics. Release note (sql change): implement canary full statistics rollout core logic, which is configurable via the table-level storage paramter (`canary_window`) and the cluster setting `sql.stats.canary_fraction`.
1 parent 267c606 commit 0d7061d

File tree

9 files changed

+92
-7
lines changed

9 files changed

+92
-7
lines changed

pkg/sql/catalog/descriptor.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package catalog
77

88
import (
99
"context"
10+
"time"
1011

1112
"github.com/cockroachdb/cockroach/pkg/clusterversion"
1213
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
@@ -878,6 +879,9 @@ type TableDescriptor interface {
878879
// security for the table and false if it is no force. When forced is
879880
// set the table's RLS policies are enforced even on the table owner.
880881
IsRowLevelSecurityForced() bool
882+
// GetCanaryWindowSize returns the canary statistics rollout duration.
883+
// See TableDescriptor.CanaryWindowSize for details.
884+
GetCanaryWindowSize() time.Duration
881885
}
882886

883887
// MutableTableDescriptor is both a MutableDescriptor and a TableDescriptor.

pkg/sql/catalog/tabledesc/table_desc.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
package tabledesc
88

99
import (
10+
"time"
11+
1012
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
1113
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
1214
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb"
@@ -740,3 +742,8 @@ func (desc *wrapper) IsRowLevelSecurityEnabled() bool {
740742
func (desc *wrapper) IsRowLevelSecurityForced() bool {
741743
return desc.RowLevelSecurityForced
742744
}
745+
746+
// GetCanaryWindowSize implements the TableDescriptor interface.
747+
func (desc *wrapper) GetCanaryWindowSize() time.Duration {
748+
return desc.CanaryWindowSize
749+
}

pkg/sql/opt/cat/table.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ type Table interface {
205205

206206
// Policies returns all the policies defined for this table.
207207
Policies() *Policies
208+
209+
// CanaryWindowSize returns the canary window size for the table.
210+
CanaryWindowSize() time.Duration
208211
}
209212

210213
// CheckConstraint represents a check constraint on a table. Check constraints

pkg/sql/opt/exec/explain/plan_gist_factory.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"context"
1111
b64 "encoding/base64"
1212
"encoding/binary"
13+
"time"
1314

1415
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
1516
"github.com/cockroachdb/cockroach/pkg/roachpb"
@@ -682,6 +683,9 @@ func (u *unknownTable) IsRowLevelSecurityEnabled() bool { return false }
682683
// IsRowLevelSecurityForced is part of the cat.Table interface
683684
func (u *unknownTable) IsRowLevelSecurityForced() bool { return false }
684685

686+
// CanaryWindowSize is part of the cat.Table interface
687+
func (u *unknownTable) CanaryWindowSize() time.Duration { return 0 }
688+
685689
// Policies is part of the cat.Table interface.
686690
func (u *unknownTable) Policies() *cat.Policies { return nil }
687691

pkg/sql/opt/memo/statistics_builder.go

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"context"
1010
"math"
1111
"reflect"
12+
"time"
1213

1314
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
1415
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
@@ -23,6 +24,7 @@ import (
2324
"github.com/cockroachdb/cockroach/pkg/sql/stats"
2425
"github.com/cockroachdb/cockroach/pkg/sql/types"
2526
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
27+
"github.com/cockroachdb/cockroach/pkg/util/hlc"
2628
"github.com/cockroachdb/cockroach/pkg/util/json"
2729
"github.com/cockroachdb/cockroach/pkg/util/log"
2830
"github.com/cockroachdb/errors"
@@ -659,6 +661,7 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
659661
}
660662

661663
tab := sb.md.Table(tabID)
664+
tabMeta := sb.md.TableMeta(tabID)
662665
// Create a mapping from table column ordinals to inverted index column
663666
// ordinals. This allows us to do a fast lookup while iterating over all
664667
// stats from a statistic's column to any associated inverted columns.
@@ -680,13 +683,41 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
680683
// Make now and annotate the metadata table with it for next time.
681684
stats = &props.Statistics{}
682685

686+
useCanary := sb.mem.Metadata().UseCanaryStats()
687+
var skippedStatsCreationTimestamp time.Time
688+
canaryWindowSize := tabMeta.CanaryWindowSize
683689
// Find the most recent full statistic. (Stats are ordered with most recent first.)
684690
var first int
685-
for first < tab.StatisticCount() &&
686-
(tab.Statistic(first).IsPartial() ||
687-
tab.Statistic(first).IsMerged() && !sb.evalCtx.SessionData().OptimizerUseMergedPartialStatistics ||
688-
tab.Statistic(first).IsForecast() && !sb.evalCtx.SessionData().OptimizerUseForecasts) {
689-
first++
691+
sd := sb.evalCtx.SessionData()
692+
for first < tab.StatisticCount() {
693+
stat := tab.Statistic(first)
694+
if stat.IsPartial() ||
695+
stat.IsMerged() && !sd.OptimizerUseMergedPartialStatistics ||
696+
stat.IsForecast() && !sd.OptimizerUseForecasts {
697+
first++
698+
continue
699+
} else if canaryWindowSize > 0 && !useCanary && first < tab.StatisticCount()-1 {
700+
// The following, we are getting full statistics for the stable stats,
701+
// in contrast to the canary stats. The canary stats is skipped.
702+
// If there remains only one full statistics, don't skip.
703+
createdAtTS := hlc.Timestamp{WallTime: stat.CreatedAt().UnixNano()}
704+
if stat.CreatedAt() == skippedStatsCreationTimestamp && !skippedStatsCreationTimestamp.IsZero() {
705+
// We've already seen this canary stat, so skip it.
706+
first++
707+
continue
708+
}
709+
// Found a canary stats (defined as creation time within the canary window size). Register the
710+
// creation timestamp and move on the next older one.
711+
if createdAtTS.AddDuration(canaryWindowSize).After(hlc.Timestamp{WallTime: time.Now().UnixNano()}) {
712+
// If there is already a canary stats skipped, we don't skip again.
713+
if skippedStatsCreationTimestamp.IsZero() {
714+
skippedStatsCreationTimestamp = stat.CreatedAt()
715+
first++
716+
continue
717+
}
718+
}
719+
}
720+
break
690721
}
691722

692723
if first >= tab.StatisticCount() {

pkg/sql/opt/metadata.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ var canaryFraction = settings.RegisterFloatSetting(
7373
func canaryRollDice(evalCtx *eval.Context) bool {
7474
threshold := canaryFraction.Get(&evalCtx.Settings.SV)
7575

76-
// If the fraction is 0, never use canary stats.
76+
// If the fraction is 0, never use canary stats. (should we even allow?)
7777
if threshold == 0 {
7878
return false
7979
}
@@ -824,7 +824,7 @@ func (md *Metadata) AddTable(tab cat.Table, alias *tree.TableName) TableID {
824824
if md.tables == nil {
825825
md.tables = make([]TableMeta, 0, 4)
826826
}
827-
md.tables = append(md.tables, TableMeta{MetaID: tabID, Table: tab, Alias: *alias})
827+
md.tables = append(md.tables, TableMeta{MetaID: tabID, Table: tab, Alias: *alias, CanaryWindowSize: tab.CanaryWindowSize()})
828828

829829
colCount := tab.ColumnCount()
830830
if md.cols == nil {
@@ -945,6 +945,7 @@ func (md *Metadata) DuplicateTable(
945945
partialIndexPredicates: partialIndexPredicates,
946946
indexPartitionLocalities: tabMeta.indexPartitionLocalities,
947947
checkConstraintsStats: checkConstraintsStats,
948+
CanaryWindowSize: tabMeta.CanaryWindowSize,
948949
}
949950
newTabMeta.indexVisibility.cached = tabMeta.indexVisibility.cached
950951
newTabMeta.indexVisibility.notVisible = tabMeta.indexVisibility.notVisible
@@ -1331,3 +1332,9 @@ func (md *Metadata) checkRLSDependencies(
13311332
// check already accounts for changes in the table descriptor version.
13321333
return true, nil
13331334
}
1335+
1336+
// UseCanaryStats returns true if the metadata is configured to use canary
1337+
// statistics for the current query planning.
1338+
func (md *Metadata) UseCanaryStats() bool {
1339+
return md.useCanaryStats
1340+
}

pkg/sql/opt/table_meta.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package opt
88
import (
99
"context"
1010
"math/rand"
11+
"time"
1112

1213
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb"
1314
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
@@ -214,6 +215,11 @@ type TableMeta struct {
214215
cached *intsets.Fast
215216
notVisible *intsets.Fast
216217
}
218+
219+
// CanaryWindowSize determines the time duration for which a newly collected
220+
// stats is in a "canary" state before it is promoted to a stable state.
221+
// See TableDescriptor.CanaryWindowSize for details.
222+
CanaryWindowSize time.Duration
217223
}
218224

219225
// IsIndexNotVisible returns true if the given index is not visible, and false
@@ -293,6 +299,7 @@ func (tm *TableMeta) copyFrom(from *TableMeta, copyScalarFn func(Expr) Expr) {
293299
Alias: from.Alias,
294300
IgnoreForeignKeys: from.IgnoreForeignKeys,
295301
IgnoreUniqueWithoutIndexKeys: from.IgnoreUniqueWithoutIndexKeys,
302+
CanaryWindowSize: from.CanaryWindowSize,
296303
// Annotations are not copied.
297304
}
298305

pkg/sql/opt/testutils/testcat/test_catalog.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,8 @@ type Table struct {
922922
rlsForced bool
923923
policies cat.Policies
924924
nextPolicyID descpb.PolicyID
925+
926+
canaryWindowSize time.Duration
925927
}
926928

927929
var _ cat.Table = &Table{}
@@ -1232,6 +1234,9 @@ func (tt *Table) IsRowLevelSecurityEnabled() bool { return tt.rlsEnabled }
12321234
// IsRowLevelSecurityForced is part of the cat.Table interface.
12331235
func (tt *Table) IsRowLevelSecurityForced() bool { return tt.rlsForced }
12341236

1237+
// CanaryWindowSize is part of the cat.Table interface.
1238+
func (tt *Table) CanaryWindowSize() time.Duration { return tt.canaryWindowSize }
1239+
12351240
// Policies is part of the cat.Table interface.
12361241
func (tt *Table) Policies() *cat.Policies {
12371242
return &tt.policies

pkg/sql/opt_catalog.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,9 @@ type optTable struct {
941941
rlsForced bool
942942
policies cat.Policies
943943

944+
// Canary statistics rollout duration.
945+
canaryWindow time.Duration
946+
944947
// colMap is a mapping from unique ColumnID to column ordinal within the
945948
// table. This is a common lookup that needs to be fast.
946949
colMap catalog.TableColMap
@@ -1224,6 +1227,9 @@ func newOptTable(
12241227
ot.rlsForced = desc.IsRowLevelSecurityForced()
12251228
ot.policies = getOptPolicies(desc.GetPolicies())
12261229

1230+
// Store canary stats rollout duration.
1231+
ot.canaryWindow = desc.GetCanaryWindowSize()
1232+
12271233
// Synthesize any check constraints for user defined types.
12281234
var synthesizedChecks []optCheckConstraint
12291235
if ot.rlsEnabled {
@@ -1630,6 +1636,11 @@ func (ot *optTable) IsRowLevelSecurityEnabled() bool { return ot.rlsEnabled }
16301636
// IsRowLevelSecurityForced is part of the cat.Table interface.
16311637
func (ot *optTable) IsRowLevelSecurityForced() bool { return ot.rlsForced }
16321638

1639+
// CanaryWindowSize is part of the cat.Table interface.
1640+
func (ot *optTable) CanaryWindowSize() time.Duration {
1641+
return ot.canaryWindow
1642+
}
1643+
16331644
// Policies is part of the cat.Table interface.
16341645
func (ot *optTable) Policies() *cat.Policies {
16351646
if !ot.rlsEnabled {
@@ -2777,6 +2788,12 @@ func (ot *optVirtualTable) IsRowLevelSecurityEnabled() bool { return false }
27772788
// IsRowLevelSecurityForced is part of the cat.Table interface.
27782789
func (ot *optVirtualTable) IsRowLevelSecurityForced() bool { return false }
27792790

2791+
// CanaryWindowSize is part of the cat.Table interface.
2792+
// TODO: think about the actual number.
2793+
func (ot *optVirtualTable) CanaryWindowSize() time.Duration {
2794+
return 0
2795+
}
2796+
27802797
// Policies is part of the cat.Table interface.
27812798
func (ot *optVirtualTable) Policies() *cat.Policies { return nil }
27822799

0 commit comments

Comments
 (0)