Skip to content

Commit 9604f82

Browse files
committed
sql: implement stats_as_of session variable for time-based statistics selection
This commit adds a new session variable `stats_as_of` that allows controlling statistics selection based on a specific timestamp rather than the current time. Previously, statistics selection was always relative to the current wall clock time, making it difficult to get consistent query plans for historical analysis or testing. This feature is only for debugging and troubleshooting, and should not be used in production. The implementation is also integrated into the existing canary statistics logic to respect the as-of timestamp when determining canary window boundaries. Release note (sql change): adds a new session variable `stats_as_of` that allows controlling statistics selection based on a specific timestamp rather than the current time.
1 parent 0d7061d commit 9604f82

File tree

6 files changed

+56
-2
lines changed

6 files changed

+56
-2
lines changed

pkg/sql/opt/memo/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ go_library(
5252
"//pkg/util/buildutil",
5353
"//pkg/util/duration",
5454
"//pkg/util/encoding",
55+
"//pkg/util/hlc",
5556
"//pkg/util/intsets",
5657
"//pkg/util/iterutil",
5758
"//pkg/util/json",

pkg/sql/opt/memo/statistics_builder.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -686,29 +686,40 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
686686
useCanary := sb.mem.Metadata().UseCanaryStats()
687687
var skippedStatsCreationTimestamp time.Time
688688
canaryWindowSize := tabMeta.CanaryWindowSize
689+
// TODO(janexing): should we use clock.Now() or the StmtTimestamp as
690+
// the default? Or avoid this issue by only setting it if the session var
691+
// is set?
692+
asOfTs := hlc.Timestamp{WallTime: sb.evalCtx.StmtTimestamp.UnixNano()}
693+
if asOf := sb.evalCtx.SessionData().StatsAsOf; !asOf.IsEmpty() {
694+
asOfTs = asOf
695+
}
689696
// Find the most recent full statistic. (Stats are ordered with most recent first.)
690697
var first int
691698
sd := sb.evalCtx.SessionData()
692699
for first < tab.StatisticCount() {
693700
stat := tab.Statistic(first)
701+
createdAtTS := hlc.Timestamp{WallTime: stat.CreatedAt().UnixNano()}
694702
if stat.IsPartial() ||
695703
stat.IsMerged() && !sd.OptimizerUseMergedPartialStatistics ||
696704
stat.IsForecast() && !sd.OptimizerUseForecasts {
697705
first++
698706
continue
707+
} else if createdAtTS.After(asOfTs) {
708+
// The stats is too new, skip it.
709+
first++
710+
continue
699711
} else if canaryWindowSize > 0 && !useCanary && first < tab.StatisticCount()-1 {
700712
// The following, we are getting full statistics for the stable stats,
701713
// in contrast to the canary stats. The canary stats is skipped.
702714
// If there remains only one full statistics, don't skip.
703-
createdAtTS := hlc.Timestamp{WallTime: stat.CreatedAt().UnixNano()}
704715
if stat.CreatedAt() == skippedStatsCreationTimestamp && !skippedStatsCreationTimestamp.IsZero() {
705716
// We've already seen this canary stat, so skip it.
706717
first++
707718
continue
708719
}
709720
// Found a canary stats (defined as creation time within the canary window size). Register the
710721
// creation timestamp and move on the next older one.
711-
if createdAtTS.AddDuration(canaryWindowSize).After(hlc.Timestamp{WallTime: time.Now().UnixNano()}) {
722+
if createdAtTS.AddDuration(canaryWindowSize).After(asOfTs) {
712723
// If there is already a canary stats skipped, we don't skip again.
713724
if skippedStatsCreationTimestamp.IsZero() {
714725
skippedStatsCreationTimestamp = stat.CreatedAt()

pkg/sql/sessiondatapb/local_only_session_data.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,10 @@ message LocalOnlySessionData {
751751
// clamp the selectivity of open-ended inequality filters (e.g. <, >, !=)
752752
// but not (=, BETWEEN etc.) to a minimum threshold.
753753
bool optimizer_clamp_inequality_selectivity = 190;
754+
// StatsAsOf is an optional timestamp that, when set, stats selection
755+
// will be based on the age gap between the stats creation timestamp and
756+
// the StatsAsOf timestamp, rather than the current time.
757+
util.hlc.Timestamp stats_as_of = 191 [(gogoproto.nullable) = false];
754758

755759
///////////////////////////////////////////////////////////////////////////
756760
// WARNING: consider whether a session parameter you're adding needs to //

pkg/sql/sessionmutator/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ go_library(
1313
"//pkg/sql/sessiondata",
1414
"//pkg/sql/sessiondatapb",
1515
"//pkg/util/duration",
16+
"//pkg/util/hlc",
1617
"//pkg/util/timeutil",
1718
"//pkg/util/timeutil/pgdate",
1819
"@com_github_cockroachdb_redact//:redact",

pkg/sql/sessionmutator/mutator.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
1919
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
2020
"github.com/cockroachdb/cockroach/pkg/util/duration"
21+
"github.com/cockroachdb/cockroach/pkg/util/hlc"
2122
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2223
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
2324
"github.com/cockroachdb/redact"
@@ -1102,3 +1103,7 @@ func (m *SessionDataMutator) SetOptimizerClampLowHistogramSelectivity(val bool)
11021103
func (m *SessionDataMutator) SetOptimizerClampInequalitySelectivity(val bool) {
11031104
m.Data.OptimizerClampInequalitySelectivity = val
11041105
}
1106+
1107+
func (m *SessionDataMutator) SetStatsAsOf(val hlc.Timestamp) {
1108+
m.Data.StatsAsOf = val
1109+
}

pkg/sql/vars.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4444,6 +4444,38 @@ var varGen = map[string]sessionVar{
44444444
},
44454445
GlobalDefault: globalFalse,
44464446
},
4447+
`stats_as_of`: {
4448+
GetStringVal: makePostgresBoolGetStringValFn(`stats_as_of`),
4449+
SetWithPlanner: func(ctx context.Context, p *planner, local bool, s string) error {
4450+
ts := hlc.Timestamp{}
4451+
if s != "" {
4452+
asOfTimestamp, err := p.EvalAsOfTimestamp(ctx, tree.AsOfClause{Expr: tree.NewStrVal(s)})
4453+
if err != nil {
4454+
return errors.Wrap(err, "could not parse stats_as_of")
4455+
}
4456+
ts = asOfTimestamp.Timestamp
4457+
}
4458+
4459+
return p.applyOnSessionDataMutators(
4460+
ctx,
4461+
local,
4462+
func(m sessionmutator.SessionDataMutator) error {
4463+
m.SetStatsAsOf(ts)
4464+
return nil
4465+
},
4466+
)
4467+
},
4468+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
4469+
asOfTs := evalCtx.SessionData().StatsAsOf
4470+
if asOfTs.IsEmpty() {
4471+
return "", nil
4472+
}
4473+
return asOfTs.AsOfSystemTime(), nil
4474+
},
4475+
GlobalDefault: func(sv *settings.Values) string {
4476+
return ""
4477+
},
4478+
},
44474479
}
44484480

44494481
func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {

0 commit comments

Comments
 (0)