Skip to content

Commit 6c87a57

Browse files
committed
sql/opt: add data-driven test framework for canary statistics
This commit introduces testing infrastructure for the canary statistics feature. Previously, there was no systematic way to test the canary statistics selection logic and the stats_as_of functionality. The implementation adds a new data-driven test framework in `pkg/sql/opt/memo/statistics_builder_test.go` that supports commands for setting canary fraction, stats-as-of timestamps, and making statistics. The motivation for adding this test infrastructure is to unit-test the canary full statistics rollout logic in `statisticsBuilder.makeTableStatistics()`. While end-to-end testing can be accomplished via `EXPLAIN ANALYZE` statements, such logictests would also be influenced by `Builder.maybeAnnotateWithEstimates()`, which performs its own filtering of full statistics. This dedicated test infrastructure isolates the testing of the `makeTableStatistics` logic to ensure the canary selection behavior can be verified independently. Release note: None
1 parent 0f0405d commit 6c87a57

File tree

6 files changed

+189
-3
lines changed

6 files changed

+189
-3
lines changed

pkg/sql/opt/memo/statistics_builder_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,20 @@ package memo
77

88
import (
99
"context"
10+
"strconv"
1011
"testing"
1112

1213
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
1314
"github.com/cockroachdb/cockroach/pkg/sql/opt"
1415
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
1516
"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
1617
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat"
18+
"github.com/cockroachdb/cockroach/pkg/sql/sem/asof"
1719
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
1820
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
21+
"github.com/cockroachdb/cockroach/pkg/util/hlc"
22+
"github.com/cockroachdb/datadriven"
23+
"github.com/stretchr/testify/require"
1924
)
2025

2126
// Most of the functionality in statistics.go is tested by data-driven
@@ -238,3 +243,54 @@ func testStats(t *testing.T, s *props.Statistics, expectedStats string) {
238243
t.Fatalf("\nexpected: %s\nactual : %s", expectedStats, actual)
239244
}
240245
}
246+
247+
func TestDataDriven(t *testing.T) {
248+
evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings())
249+
catalog := testcat.New()
250+
var mem Memo
251+
252+
statsFunc := func(tab *testcat.Table, tabID opt.TableID) string {
253+
t.Helper()
254+
sb := &statisticsBuilder{}
255+
sb.init(context.Background(), &evalCtx, &mem)
256+
257+
// TODO: the resStats should contain the createdAt timestamp.
258+
resStats := sb.makeTableStatistics(tabID)
259+
return resStats.String()
260+
}
261+
262+
datadriven.Walk(t, "testdata/stats_builder", func(t *testing.T, path string) {
263+
datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string {
264+
switch td.Cmd {
265+
case "exec-ddl":
266+
_, err := catalog.ExecuteDDL(td.Input)
267+
require.NoError(t, err)
268+
case "canary-fraction":
269+
require.NotEmpty(t, td.Input)
270+
fraction, err := strconv.ParseFloat(td.Input, 64)
271+
require.NoError(t, err)
272+
evalCtx.TestingKnobs.CanaryFraction = &fraction
273+
case "stats-as-of":
274+
require.NotEmpty(t, td.Input)
275+
if td.Input == "null" {
276+
evalCtx.SessionData().StatsAsOf = hlc.Timestamp{}
277+
} else {
278+
asOfTs, err := asof.Eval(context.Background(), tree.AsOfClause{Expr: tree.NewStrVal(td.Input)}, &tree.SemaContext{}, &evalCtx)
279+
require.NoError(t, err)
280+
evalCtx.SessionData().StatsAsOf = asOfTs.Timestamp
281+
}
282+
case "make-stats":
283+
mem.Init(context.Background(), &evalCtx)
284+
require.GreaterOrEqual(t, len(td.CmdArgs), 1)
285+
tblName := td.CmdArgs[0].String()
286+
tn := tree.NewUnqualifiedTableName(tree.Name(tblName))
287+
tab := catalog.Table(tn)
288+
tabID := mem.Metadata().AddTable(tab, tn)
289+
return statsFunc(tab, tabID)
290+
default:
291+
t.Fatalf("unknown command: %s", td.Cmd)
292+
}
293+
return ""
294+
})
295+
})
296+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
exec-ddl
2+
CREATE TABLE sel (a INT, b INT, c INT, d STRING, e STRING) WITH (canary_window = '30s')
3+
----
4+
5+
stats-as-of
6+
2018-01-01 1:00:40.00000+00:00
7+
----
8+
9+
exec-ddl
10+
ALTER TABLE sel INJECT STATISTICS '[
11+
{
12+
"columns": ["a","b","c","d","e"],
13+
"created_at": "2018-01-01 1:00:20.00000+00:00",
14+
"row_count": 10,
15+
"distinct_count": 10
16+
},
17+
{
18+
"columns": ["a","b","c","d","e"],
19+
"created_at": "2018-01-01 1:00:00.00000+00:00",
20+
"row_count": 20,
21+
"distinct_count": 20
22+
}
23+
]'
24+
----
25+
26+
# We should pick the canary stats created at 1:00:20 since they are within the 30s canary window.
27+
canary-fraction
28+
1.0
29+
----
30+
31+
make-stats sel
32+
----
33+
[rows=10]
34+
35+
# We should pick the stable stats created at 1:00:00 which is the second most recent stats.
36+
canary-fraction
37+
0.0
38+
----
39+
40+
make-stats sel
41+
----
42+
[rows=20]
43+
44+
45+
exec-ddl
46+
ALTER TABLE sel SET (canary_window = '10s')
47+
----
48+
49+
# We should pick the stable stats created at 1:00:20 since there doesn't
50+
# exist any canary stats within the 10s canary window.
51+
canary-fraction
52+
0.0
53+
----
54+
55+
make-stats sel
56+
----
57+
[rows=10]
58+
59+
subtest only-one-full-stats-available
60+
61+
exec-ddl
62+
ALTER TABLE sel INJECT STATISTICS '[
63+
{
64+
"columns": ["a","b","c","d","e"],
65+
"created_at": "2018-01-01 1:00:20.00000+00:00",
66+
"row_count": 30,
67+
"distinct_count": 30
68+
}
69+
]'
70+
----
71+
72+
# For both canary and stable stats, we will pick the only one available,
73+
# regardless if it is within the canary window range.
74+
canary-fraction
75+
1.0
76+
----
77+
78+
make-stats sel
79+
----
80+
[rows=30]
81+
82+
# We should pick the stable stats created at 1:00:00 which is the second most recent stats.
83+
canary-fraction
84+
0.0
85+
----
86+
87+
make-stats sel
88+
----
89+
[rows=30]
90+
91+
subtest end

pkg/sql/opt/metadata.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,12 @@ var canaryFraction = settings.RegisterFloatSetting(
7171
// should use the "canary path" for statistics.
7272
// This selection is atomic per query.
7373
func canaryRollDice(evalCtx *eval.Context) bool {
74-
threshold := canaryFraction.Get(&evalCtx.Settings.SV)
75-
74+
var threshold float64
75+
if evalCtx.TestingKnobs.CanaryFraction != nil {
76+
threshold = *evalCtx.TestingKnobs.CanaryFraction
77+
} else {
78+
threshold = canaryFraction.Get(&evalCtx.Settings.SV)
79+
}
7680
// If the fraction is 0, never use canary stats. (should we even allow?)
7781
if threshold == 0 {
7882
return false

pkg/sql/opt/testutils/testcat/alter_table.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,12 @@ func (tc *Catalog) AlterTable(stmt *tree.AlterTable) {
4747
default:
4848
panic(errors.AssertionFailedf("unsupported constraint type %v", d))
4949
}
50-
50+
case *tree.AlterTableSetStorageParams:
51+
if canaryWindowExpr := t.StorageParams.GetVal(canaryWindowStorageParamKey); canaryWindowExpr != nil {
52+
tab.canaryWindowSize = getDurationFromStrExpr(canaryWindowExpr)
53+
} else {
54+
panic(errors.AssertionFailedf("unsupported AlterTableSetStorageParams stmt with storage params: %v", t.StorageParams))
55+
}
5156
default:
5257
panic(errors.AssertionFailedf("unsupported ALTER TABLE command %T", t))
5358
}

pkg/sql/opt/testutils/testcat/create_table.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"sort"
1414
"strconv"
1515
"strings"
16+
"time"
1617

1718
"github.com/cockroachdb/cockroach/pkg/config/zonepb"
1819
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
@@ -30,7 +31,9 @@ import (
3031
"github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb"
3132
"github.com/cockroachdb/cockroach/pkg/sql/types"
3233
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecpb"
34+
"github.com/cockroachdb/cockroach/pkg/util/duration"
3335
"github.com/cockroachdb/cockroach/pkg/util/intsets"
36+
"github.com/cockroachdb/errors"
3437
)
3538

3639
type indexType int
@@ -58,6 +61,8 @@ var uniqueRowIDString = "unique_rowid()"
5861
// referencing side of foreign keys (like it was required before 20.2).
5962
const createFKIndexes = false
6063

64+
const canaryWindowStorageParamKey = "canary_window"
65+
6166
// CreateTable creates a test table from a parsed DDL statement and adds it to
6267
// the catalog. This is intended for testing, and is not a complete (and
6368
// probably not fully correct) implementation. It just has to be "good enough".
@@ -441,6 +446,10 @@ OuterLoop:
441446
}
442447
}
443448

449+
if canaryWindowExpr := stmt.StorageParams.GetVal(canaryWindowStorageParamKey); canaryWindowExpr != nil {
450+
tab.canaryWindowSize = getDurationFromStrExpr(canaryWindowExpr)
451+
}
452+
444453
// Add the new table to the catalog.
445454
tc.AddTable(tab)
446455

@@ -1588,3 +1597,22 @@ func generateDefExprForSerialCol(
15881597
" %s", colName, tableName.String()))
15891598
}
15901599
}
1600+
1601+
func getDurationFromStrExpr(expr tree.Expr) time.Duration {
1602+
if expr == nil {
1603+
return 0
1604+
}
1605+
exprStr, ok := expr.(*tree.StrVal)
1606+
if !ok {
1607+
panic("expr must be of StrVal type")
1608+
}
1609+
d, err := tree.ParseDInterval(duration.IntervalStyle_POSTGRES, exprStr.RawString())
1610+
if err != nil {
1611+
panic(errors.Wrapf(err, "parsing %q", exprStr.RawString()))
1612+
}
1613+
durInSeconds, ok := d.Duration.AsInt64()
1614+
if !ok {
1615+
panic("unable to convert duration to seconds")
1616+
}
1617+
return time.Duration(durInSeconds) * time.Second
1618+
}

pkg/sql/sem/eval/testing_knobs.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ type TestingKnobs struct {
4141
// We use clusterversion.Key rather than a roachpb.Version because it will be used
4242
// to get initial values to use during bootstrap.
4343
TenantLogicalVersionKeyOverride clusterversion.Key
44+
45+
CanaryFraction *float64
4446
}
4547

4648
var _ base.ModuleTestingKnobs = &TestingKnobs{}

0 commit comments

Comments
 (0)