From 8a3094fb53e4d95374c8ebaf44e828b54d612257 Mon Sep 17 00:00:00 2001 From: ibrahimkettaneh Date: Sat, 30 May 2026 12:09:40 -0400 Subject: [PATCH] kvserver: defer snapshot scratch cleanup to Store.Start Move sstSnapshotStorage.Clear() till after LoadAndReconcileReplicas (the future WAG-replay site). With separated engines, WAG replay can reference files in the scratch directory, so the cleanup must run after replay can consume them. Clear() still runs before Transport.ListenIncomingRaftMessages, so no in-flight scratch directory is at risk. Release note: None Epic: None --- pkg/kv/kvserver/store.go | 21 ++++++--- pkg/kv/kvserver/store_test.go | 81 ++++++++++++++++++++++++++++++++ pkg/kv/kvserver/testing_knobs.go | 3 ++ 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 44ab408bd5a2..463d5e125e7b 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -1705,17 +1705,14 @@ func NewStore( }) s.eagerLeaseAcquisitionLimiter = cfg.EagerLeaseAcquisitionLimiter - // The snapshot storage is usually empty at this point since it is cleared - // after each snapshot application, except when the node crashed right before - // it can clean it up. If this fails it's not a correctness issue since the - // storage is also cleared before receiving a snapshot. + // The snapshot storage is constructed here but not yet cleared. With + // separated engines, leftover scratch files may be needed by WAG replay. + // The cleanup is deferred until after WAG replay is done and is flushed to + // guarantee that we don't need these files anymore. // // NB: we don't need the snapshot storage in the raft engine. With separated // storage, the log engine part of snapshot ingestion is written as a batch. s.sstSnapshotStorage = snaprecv.NewSSTSnapshotStorage(s.StateEngine(), s.limiters.BulkIOWriteRate) - if err := s.sstSnapshotStorage.Clear(); err != nil { - log.KvDistribution.Warningf(ctx, "failed to clear snapshot storage: %v", err) - } s.protectedtsReader = cfg.ProtectedTimestampReader s.limiters.ConcurrentAddSSTableRequests = limit.MakeConcurrentRequestLimiter( @@ -2397,6 +2394,16 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error { if err != nil { return err } + + // Clear any leftover snapshot scratch files. Leftovers occur when a node + // crashed mid-snapshot; on a clean restart the directory is already empty. + if fn := s.cfg.TestingKnobs.BeforeClearSnapshotScratchOnStart; fn != nil { + fn() + } + if err := s.sstSnapshotStorage.Clear(); err != nil { + log.KvDistribution.Warningf(ctx, "failed to clear snapshot storage: %v", err) + } + logEvery := log.Every(10 * time.Second) for i, repl := range repls { // Log progress regularly, but not for the first replica (we only want to diff --git a/pkg/kv/kvserver/store_test.go b/pkg/kv/kvserver/store_test.go index c3c9ff7ce96c..0ca9bb28852b 100644 --- a/pkg/kv/kvserver/store_test.go +++ b/pkg/kv/kvserver/store_test.go @@ -73,6 +73,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" + "github.com/cockroachdb/errors/oserror" "github.com/cockroachdb/redact" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -424,6 +425,86 @@ func TestStoreInitAndBootstrap(t *testing.T) { }) } +// TestStoreStartClearsSnapshotStorageScratch verifies the scratch directory's +// lifecycle across Store.Start, for both single and separated engines: +// +// 1. Leftover scratch files (simulating a crash mid-snapshot) survive past +// the point where WAG replay would consume them. This is checked via the +// BeforeClearSnapshotScratchOnStart knob. +// 2. The same files are then removed by Clear before Start returns. +// +// TODO(sep-raft-log): Ensure that the test still passes after introducing WAG +// replay. It is essential to have a flush after finishing WAG replay to avoid +// deleting files that would be needed in case of a crash. +func TestStoreStartClearsSnapshotStorageScratch(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + testutils.RunTrueAndFalse(t, "separated", func(t *testing.T, sepEng bool) { + ctx := context.Background() + stopper := stop.NewStopper() + defer stopper.Stop(ctx) + cfg := TestStoreConfig(nil) + + // The knob fires during Start, by which point env and ssts (assigned + // below after the store is built) are populated. The closure captures + // them by reference. + var env *fs.Env + var ssts []string + var preClearCalls int + var preClearErrs []error + cfg.TestingKnobs.BeforeClearSnapshotScratchOnStart = func() { + preClearCalls++ + for _, p := range ssts { + if _, statErr := env.Stat(p); statErr != nil { + preClearErrs = append(preClearErrs, + errors.Wrapf(statErr, "scratch file %s missing at pre-clear hook", p)) + } + } + } + + store := createTestStoreWithoutStart( + ctx, t, stopper, testStoreOpts{useSeparatedEngines: sepEng}, &cfg, + ) + + // Seed a leftover scratch file under the snapshot storage directory. + // We deliberately skip scratch.Close() to simulate a node that crashed + // mid-snapshot and never ran the per-snapshot cleanup. + scratch := store.sstSnapshotStorage.NewScratchSpace( + roachpb.RangeID(42), uuid.MakeV4(), cfg.Settings, + ) + f, err := scratch.NewFile(ctx, 0) + require.NoError(t, err) + require.NoError(t, f.Write([]byte("leftover sst"))) + require.NoError(t, f.Finish()) + + env = store.StateEngine().Env() + ssts = scratch.SSTs() + require.NotEmpty(t, ssts) + + // Sanity check: the leftover file exists before Start. + for _, p := range ssts { + _, statErr := env.Stat(p) + require.NoError(t, statErr, "scratch file %s should exist before Start", p) + } + + require.NoError(t, store.Start(ctx, stopper)) + store.WaitForInit() + + // (1) The pre-clear hook ran exactly once, with all leftover files + // still on disk. This is where WAG replay would consume them. + require.Equal(t, 1, preClearCalls, "pre-clear knob should fire exactly once") + require.Empty(t, preClearErrs, "leftover scratch files should survive past WAG replay") + + // (2) Scratch file should have been removed by Start. + for _, p := range ssts { + _, statErr := env.Stat(p) + require.True(t, oserror.IsNotExist(statErr), + "scratch file %s should be removed by Start, got err=%v", p, statErr) + } + }) +} + // TestInitializeEngineErrors verifies bootstrap failure if engine // is not empty. func TestInitializeEngineErrors(t *testing.T) { diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 4b1749171b2c..f3ff95554e48 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -361,6 +361,9 @@ type StoreTestingKnobs struct { // HandleSnapshotDone is run after the entirety of receiving a snapshot, // regardless of whether it succeeds, gets cancelled, times out, or errors. HandleSnapshotDone func() + // BeforeClearSnapshotScratchOnStart is called just before cleaning scratch + // files on startup is executed. + BeforeClearSnapshotScratchOnStart func() // ReplicaAddSkipLearnerRollback causes replica addition to skip the learner // rollback that happens when either the initial snapshot or the promotion of // a learner to a voter fails.