Skip to content

Commit 8aa3669

Browse files
craig[bot]spilchenkev-caopeachdawnleachrafiss
committed
155653: roachtest: run INSPECT as background job and poll for completion r=spilchen a=spilchen Long-running INSPECT statements (e.g., >3h) have previously failed due to connection resets, despite no server crash. This likely stems from a missed keep-alive or a client timeout, though the exact cause is unclear. This change updates the roachtest to: - Run the INSPECT statement as a background job using a short statement_timeout (5s). - Poll the job table for job completion instead of waiting synchronously. - Report progress at 10% intervals to improve visibility without overwhelming the logs. Fixes #155610 Release note: none Epic: none 155657: sql: admins now have full access to all external connections r=rafiss a=kev-cao Previously, if a non-admin user were to create an external connection, that connection would not show up in the output of `SHOW EXTERNAL CONNECTIONS` that was run by an admin. This patch fixes this bug so that admins can now see/use all external connections. Fixes: #146773 Release note: Admin users now have full access to all external connections. 155664: Update diagram for ALTER EXTERNAL CONNECTION r=kev-cao a=peachdawnleach docgen: update ALTER EXTERNAL CONNECTION diagram Epic: none Release note: none Release justification: non-production code change Part of [DOC-13254](https://cockroachlabs.atlassian.net/browse/DOC-13254) New diagram: <img width="1023" height="283" alt="Screenshot 2025-10-17 at 7 24 43 PM" src="https://github.com/user-attachments/assets/21975cb9-e9ef-4162-8782-7249436bad05" /> 155792: sql: increase timeout in TestIndexBackfillerResumePreservesProgress r=rafiss a=rafiss This increases all three timeouts in the test from 5 seconds to 30 seconds Resolves: #155646 Release note: None 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Matt Spilchen <[email protected]> Co-authored-by: Kevin Cao <[email protected]> Co-authored-by: Peach Leach <[email protected]> Co-authored-by: Rafi Shamim <[email protected]>
5 parents a7bdedf + f8b539b + ae3c866 + 31d2ba4 + db26171 commit 8aa3669

File tree

11 files changed

+166
-31
lines changed

11 files changed

+166
-31
lines changed

docs/generated/sql/bnf/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ FILES = [
2525
"alter_database_to_schema_stmt",
2626
"alter_ddl_stmt",
2727
"alter_default_privileges_stmt",
28-
"alter_external_connection_stmt",
28+
"alter_external_connection",
2929
"alter_func_stmt",
3030
"alter_func_options_stmt",
3131
"alter_func_rename_stmt",
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
alter_external_connection_stmt ::=
2+
'ALTER' 'EXTERNAL' 'CONNECTION' connection_name 'AS' connection_uri
3+
| 'ALTER' 'EXTERNAL' 'CONNECTION' 'IF' 'EXISTS' connection_name 'AS' connection_uri

docs/generated/sql/bnf/alter_external_connection_stmt.bnf

Lines changed: 0 additions & 3 deletions
This file was deleted.

pkg/cmd/docgen/diagrams.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,12 @@ var specs = []stmtSpec{
415415
inline: []string{"opt_for_roles", "role_or_group_or_user", "name_list", "opt_in_schemas", "schema_name_list", "abbreviated_grant_stmt", "opt_with_grant_option", "target_object_type", "abbreviated_revoke_stmt", "opt_drop_behavior"},
416416
nosplit: true,
417417
},
418+
{
419+
name: "alter_external_connection",
420+
stmt: "alter_external_connection_stmt",
421+
replace: map[string]string{"label_spec": "connection_name", "string_or_placeholder": "connection_uri"},
422+
unlink: []string{"connection_name", "connection_uri"},
423+
},
418424
{
419425
name: "alter_index",
420426
stmt: "alter_index_stmt",

pkg/cmd/roachtest/tests/inspect_throughput.go

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil"
2222
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
2323
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
24+
"github.com/cockroachdb/cockroach/pkg/jobs"
2425
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
2526
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2627
"github.com/cockroachdb/cockroach/pkg/workload/histogram"
2728
"github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter"
2829
"github.com/lib/pq"
30+
"github.com/stretchr/testify/require"
2931
)
3032

3133
func registerInspectThoughput(r registry.Registry) {
@@ -34,7 +36,7 @@ func registerInspectThoughput(r registry.Registry) {
3436

3537
// Long run: 12 nodes × 8 CPUs, 1B rows, 2 index checks (runs INSPECT twice: 1 index, then 2 indexes), ~5 hours (in v25.4)
3638
const indexesForLongRun = 2
37-
r.Add(makeInspectThroughputTest(r, 12, 8, 1_000_000_000, 8*time.Hour, indexesForLongRun))
39+
r.Add(makeInspectThroughputTest(r, 12, 8, 1_000_000_000, 11*time.Hour, indexesForLongRun))
3840
}
3941

4042
// initInspectHistograms creates a histogram registry with multiple named metrics.
@@ -226,9 +228,7 @@ func makeInspectThroughputTest(
226228
before := timeutil.Now()
227229

228230
inspectSQL := fmt.Sprintf("INSPECT TABLE bulkingest.bulkingest WITH OPTIONS INDEX (%s)", cfg.indexListSQL)
229-
if _, err := db.Exec(inspectSQL); err != nil {
230-
t.Fatal(err)
231-
}
231+
jobID := runInspectInBackground(ctx, t, db, inspectSQL)
232232

233233
// Tick after INSPECT completes to capture elapsed time for this specific metric.
234234
tickHistogram(cfg.metricName)
@@ -250,15 +250,9 @@ func makeInspectThroughputTest(
250250
0
251251
)
252252
FROM system.job_info
253-
WHERE job_id = (
254-
SELECT job_id
255-
FROM [SHOW JOBS]
256-
WHERE job_type = 'INSPECT'
257-
ORDER BY created DESC
258-
LIMIT 1
259-
)
253+
WHERE job_id = $1
260254
AND info_key = 'legacy_progress'`
261-
err := db.QueryRow(querySQL).Scan(&jobTotalCheckCount)
255+
err := db.QueryRow(querySQL, jobID).Scan(&jobTotalCheckCount)
262256
if err != nil {
263257
t.L().Printf("Warning: failed to query job total check count: %v", err)
264258
} else {
@@ -302,3 +296,79 @@ func disableRowCountValidation(t test.Test, db *gosql.DB) {
302296
}
303297
}
304298
}
299+
300+
// runInspectInBackground runs an INSPECT command with a short statement timeout,
301+
// forcing it to run as a background job. It then polls the job until completion,
302+
// reporting progress at 10% intervals. Returns the job ID.
303+
func runInspectInBackground(
304+
ctx context.Context, t test.Test, db *gosql.DB, inspectSQL string,
305+
) (jobID int64) {
306+
// Set a short statement timeout to force INSPECT to run as a background job.
307+
_, err := db.Exec("SET statement_timeout = '5s'")
308+
require.NoError(t, err)
309+
310+
// Reset statement timeout after we're done.
311+
defer func() {
312+
_, resetErr := db.Exec("RESET statement_timeout")
313+
require.NoError(t, resetErr)
314+
}()
315+
316+
_, err = db.Exec(inspectSQL)
317+
318+
// This may fail if the INSPECT took longer than the statement_timeout to run.
319+
// So, we tolerate only statement timeout errors here.
320+
if err != nil {
321+
require.ErrorContains(t, err, "statement timeout")
322+
}
323+
324+
// Get the INSPECT job ID.
325+
getJobIDSQL := `
326+
SELECT job_id
327+
FROM [SHOW JOBS]
328+
WHERE job_type = 'INSPECT'
329+
ORDER BY created DESC
330+
LIMIT 1`
331+
if err := db.QueryRow(getJobIDSQL).Scan(&jobID); err != nil {
332+
t.Fatalf("failed to get INSPECT job ID: %v", err)
333+
}
334+
t.L().Printf("INSPECT job ID: %d", jobID)
335+
336+
// Poll the job until it completes, reporting progress at 10% intervals.
337+
const pollInterval = 5 * time.Second
338+
lastReportedThreshold := -1
339+
ticker := time.NewTicker(pollInterval)
340+
defer ticker.Stop()
341+
342+
for {
343+
select {
344+
case <-ctx.Done():
345+
t.Fatalf("context canceled while waiting for INSPECT job %d", jobID)
346+
case <-ticker.C:
347+
var status jobs.State
348+
var fractionCompleted float64
349+
checkJobSQL := `
350+
SELECT status, fraction_completed
351+
FROM [SHOW JOBS]
352+
WHERE job_id = $1`
353+
if err := db.QueryRow(checkJobSQL, jobID).Scan(&status, &fractionCompleted); err != nil {
354+
t.Fatalf("failed to query job %d status: %v", jobID, err)
355+
}
356+
357+
// Report progress at 10% thresholds (0%, 10%, 20%, ..., 90%).
358+
currentThreshold := int(fractionCompleted * 10)
359+
if currentThreshold > lastReportedThreshold && currentThreshold < 10 {
360+
t.L().Printf("INSPECT job %d: %d%% complete", jobID, currentThreshold*10)
361+
lastReportedThreshold = currentThreshold
362+
}
363+
364+
// Check if job is complete.
365+
switch status {
366+
case jobs.StateSucceeded:
367+
t.L().Printf("INSPECT job %d: 100%% complete (succeeded)", jobID)
368+
return jobID
369+
case jobs.StateFailed, jobs.StateCanceled:
370+
t.Fatalf("INSPECT job %d finished with status: %s", jobID, status)
371+
}
372+
}
373+
}
374+
}

pkg/gen/bnf.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ BNF_SRCS = [
2525
"//docs/generated/sql/bnf:alter_database_to_schema_stmt.bnf",
2626
"//docs/generated/sql/bnf:alter_ddl_stmt.bnf",
2727
"//docs/generated/sql/bnf:alter_default_privileges_stmt.bnf",
28-
"//docs/generated/sql/bnf:alter_external_connection_stmt.bnf",
28+
"//docs/generated/sql/bnf:alter_external_connection.bnf",
2929
"//docs/generated/sql/bnf:alter_func_dep_extension_stmt.bnf",
3030
"//docs/generated/sql/bnf:alter_func_options_stmt.bnf",
3131
"//docs/generated/sql/bnf:alter_func_owner_stmt.bnf",

pkg/gen/docs.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ DOCS_SRCS = [
3535
"//docs/generated/sql/bnf:alter_database_to_schema_stmt.bnf",
3636
"//docs/generated/sql/bnf:alter_ddl_stmt.bnf",
3737
"//docs/generated/sql/bnf:alter_default_privileges_stmt.bnf",
38-
"//docs/generated/sql/bnf:alter_external_connection_stmt.bnf",
38+
"//docs/generated/sql/bnf:alter_external_connection.bnf",
3939
"//docs/generated/sql/bnf:alter_func_dep_extension_stmt.bnf",
4040
"//docs/generated/sql/bnf:alter_func_options_stmt.bnf",
4141
"//docs/generated/sql/bnf:alter_func_owner_stmt.bnf",

pkg/sql/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,7 @@ go_test(
743743
"show_cluster_setting_test.go",
744744
"show_create_all_tables_builtin_test.go",
745745
"show_create_table_test.go",
746+
"show_external_connection_test.go",
746747
"show_fingerprints_test.go",
747748
"show_ranges_test.go",
748749
"show_stats_test.go",

pkg/sql/indexbackfiller_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ func TestIndexBackfillerResumePreservesProgress(t *testing.T) {
668668
return err
669669
}
670670
return nil
671-
}, 5*time.Second)
671+
}, 30*time.Second)
672672

673673
ensureJobState := func(targetState string) {
674674
testutils.SucceedsWithin(t, func() error {
@@ -682,7 +682,7 @@ func TestIndexBackfillerResumePreservesProgress(t *testing.T) {
682682
targetState, jobState)
683683
}
684684
return nil
685-
}, 5*time.Second)
685+
}, 30*time.Second)
686686
}
687687

688688
var completedSpans roachpb.SpanGroup
@@ -738,7 +738,7 @@ func TestIndexBackfillerResumePreservesProgress(t *testing.T) {
738738
}
739739

740740
return nil
741-
}, 5*time.Second)
741+
}, 30*time.Second)
742742
}
743743

744744
for isBlockingBackfillProgress.Load() {
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package sql
7+
8+
import (
9+
"context"
10+
"testing"
11+
12+
"github.com/cockroachdb/cockroach/pkg/base"
13+
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
14+
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
15+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
16+
"github.com/cockroachdb/cockroach/pkg/util/log"
17+
"github.com/stretchr/testify/require"
18+
)
19+
20+
func TestAdminShowExternalConnection(t *testing.T) {
21+
defer leaktest.AfterTest(t)()
22+
defer log.Scope(t).Close(t)
23+
24+
ctx := context.Background()
25+
26+
s, db, _ := serverutils.StartServer(t, base.TestServerArgs{})
27+
defer s.Stopper().Stop(ctx)
28+
srv := s.ApplicationLayer()
29+
30+
adminDB := sqlutils.MakeSQLRunner(db)
31+
const password = "correcthorsebatterystaple"
32+
adminDB.Exec(t, "CREATE USER testuser1 WITH PASSWORD $1", password)
33+
adminDB.Exec(t, "GRANT SYSTEM EXTERNALCONNECTION TO testuser1")
34+
35+
nonAdminDB := sqlutils.MakeSQLRunner(srv.SQLConn(
36+
t, serverutils.UserPassword("testuser1", password), serverutils.ClientCerts(false),
37+
))
38+
nonAdminDB.Exec(t, "CREATE EXTERNAL CONNECTION foo AS 'userfile:///'")
39+
40+
t.Run("non-admin user cannot see other external connections", func(t *testing.T) {
41+
_, err := db.Exec("CREATE USER testuser2 WITH PASSWORD $1", password)
42+
require.NoError(t, err)
43+
44+
nonAdminDB2 := sqlutils.MakeSQLRunner(srv.SQLConn(
45+
t, serverutils.UserPassword("testuser2", password), serverutils.ClientCerts(false),
46+
))
47+
48+
rows := nonAdminDB2.QueryStr(t, "SHOW EXTERNAL CONNECTIONS")
49+
require.Empty(t, rows, "expected no rows for non-admin user without privileges")
50+
})
51+
52+
t.Run("admin user can see all external connections", func(t *testing.T) {
53+
rows := adminDB.QueryStr(t, "SHOW EXTERNAL CONNECTIONS")
54+
require.Len(t, rows, 1, "expected one row for admin user")
55+
})
56+
}

0 commit comments

Comments
 (0)