cockroachdb
diff --git a/‎pkg/util/admission/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions b/‎pkg/util/admission/BUILD.bazel‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/util/admission/cpu_time_token_filler.go‎
Lines changed: 179 additions & 0 deletions b/‎pkg/util/admission/cpu_time_token_filler.go‎
Lines changed: 179 additions & 0 deletions
diff --git a/‎pkg/util/admission/cpu_time_token_filler_test.go‎
Lines changed: 143 additions & 0 deletions b/‎pkg/util/admission/cpu_time_token_filler_test.go‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎pkg/util/admission/cpu_time_token_granter.go‎
Lines changed: 26 additions & 0 deletions b/‎pkg/util/admission/cpu_time_token_granter.go‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎pkg/util/admission/cpu_time_token_granter_test.go‎
Lines changed: 18 additions & 0 deletions b/‎pkg/util/admission/cpu_time_token_granter_test.go‎
Lines changed: 18 additions & 0 deletions
@@ -4,6 +4,7 @@ go_library(
     name = "admission",
     srcs = [
         "admission.go",
+        "cpu_time_token_filler.go",
         "cpu_time_token_granter.go",
         "disk_bandwidth.go",
         "elastic_cpu_grant_coordinator.go",
@@ -58,6 +59,7 @@ go_library(
 go_test(
     name = "admission_test",
     srcs = [
+        "cpu_time_token_filler_test.go",
         "cpu_time_token_granter_test.go",
         "disk_bandwidth_test.go",
         "elastic_cpu_granter_test.go",
 
@@ -0,0 +1,179 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package admission
+
+import (
+	"time"
+
+	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+	"github.com/cockroachdb/errors"
+)
+
+// timePerTick is how frequently cpuTimeTokenFiller ticks its time.Ticker & adds
+// tokens to the buckets. Must be < 1s. Must divide 1s evenly.
+const timePerTick = 1 * time.Millisecond
+
+// cpuTimeTokenFiller starts a goroutine which periodically calls
+// cpuTimeTokenAllocator to add tokens to a cpuTimeTokenGranter. For example, on
+// an 8 vCPU machine, we may want to allow burstable tier-0 work to use 6 seconds
+// of CPU time per second. Then cpuTimeTokenAllocator.rates[tier0][canBurst] would
+// equal 6 seconds per second, and cpuTimeTokenFiller would add 6 seconds of token
+// every second, but smoothly -- 1ms at a time. See cpuTimeTokenGranter for details
+// on the multi-dimensional token buckets owned by cpuTimeTokenGranter; the TLDR is
+// there is one bucket per <resource tier, burst qualification> pair.
+//
+// cpuTimeTokenFiller owns the time.Ticker logic. The details of the token allocation
+// are left to the cpuTimeTokenAllocator, in order to improve clarity & testability.
+//
+// Note that the combination of cpuTimeTokenFiller & cpuTimeTokenAllocator are written
+// to be robust against delayed and dropped time.Timer ticks. That
+// is, in the presence of delayed and dropped ticks, the correct number of tokens will
+// be added to the buckets; they just may be added in a less smooth fashion than
+// normal. If ticks are delayed more than roughly 1s, not enough tokens will be
+// added to the bucket, but we do not expect this significant of a delay in practice
+// (admission control will be running).
+//
+// See ticker docs, where it is mentioned ticks can be dropped, if receivers are
+// slow: https://pkg.go.dev/time#NewTicker
+//
+// The mechanism by which the goroutine adds the correct number of tokens, in the
+// presence of delayed or dropped ticks, is:
+//   - time is split into intervals of 1s
+//   - intervals are split into 1s / timePerTick(=1ms) time.Ticker ticks
+//   - cpuTimeTokenAllocator attempts to allocate remaining tokens for interval evenly
+//     across remaining ticks in the interval
+//   - once interval is complete, all remaining tokens needed for that interval
+//     are added (e.g. see t.allocateTokens(1) below), then a new interval starts
+type cpuTimeTokenFiller struct {
+	allocator  tokenAllocator
+	timeSource timeutil.TimeSource
+	closeCh    chan struct{}
+	// Used only in unit tests.
+	tickCh *chan struct{}
+}
+
+// tokenAllocator abstracts cpuTimeTokenAllocator for testing.
+type tokenAllocator interface {
+	allocateTokens(remainingTicksInInInterval int64)
+	resetInterval()
+}
+
+func (f *cpuTimeTokenFiller) start() {
+	ticker := f.timeSource.NewTicker(timePerTick)
+	intervalStart := f.timeSource.Now()
+	// Every 1s a new interval starts. every timePerTick time token allocation
+	// is done. The expected number of ticks left in the interval is passed to
+	// the allocator. The expected number of ticks left can jump around, if
+	// time.Timer ticks are delayed or dropped.
+	go func() {
+		lastRemainingTicks := int64(0)
+		for {
+			select {
+			case t := <-ticker.Ch():
+				var remainingTicks int64
+				elapsedSinceIntervalStart := t.Sub(intervalStart)
+				if elapsedSinceIntervalStart >= time.Second {
+					// INVARIANT: During each interval, allocateTokens(1) must be called, before
+					// resetInterval() can be called.
+					//
+					// Without this invariant, cpuTimeTokenAllocator.rates tokens would not be
+					// allocated every 1s.
+					if lastRemainingTicks > 1 {
+						f.allocator.allocateTokens(1)
+					}
+					intervalStart = t
+					f.allocator.resetInterval()
+					remainingTicks = int64(time.Second / timePerTick)
+				} else {
+					remainingSinceIntervalStart := time.Second - elapsedSinceIntervalStart
+					if remainingSinceIntervalStart < 0 {
+						panic(errors.AssertionFailedf("remainingSinceIntervalStart is negative %d", remainingSinceIntervalStart))
+					}
+					// ceil(a / b) == (a + b - 1) / b, when using integer division.
+					// Round up so that we don't accumulate tokens to give in a burst on the
+					// last tick.
+					remainingTicks =
+						int64((remainingSinceIntervalStart + timePerTick - 1) / timePerTick)
+				}
+				f.allocator.allocateTokens(max(1, remainingTicks))
+				lastRemainingTicks = remainingTicks
+				// Only non-nil in unit tests.
+				if f.tickCh != nil {
+					*f.tickCh <- struct{}{}
+				}
+			case <-f.closeCh:
+				return
+			}
+		}
+	}()
+}
+
+// cpuTimeTokenAllocator allocates tokens to a cpuTimeTokenGranter. See the comment
+// above cpuTimeTokenFiller for a high level picture. The responsibility of
+// cpuTimeTokenAllocator is to gradually allocate rates tokens every interval,
+// while respecting bucketCapacity. We have split up the ticking & token allocation
+// logic, in order to improve clarity & testability.
+type cpuTimeTokenAllocator struct {
+	granter *cpuTimeTokenGranter
+
+	// Mutable fields. No mutex, since only a single goroutine will call the
+	// cpuTimeTokenAllocator.
+
+	// rates stores the number of token added to each bucket every interval.
+	rates [numResourceTiers][numBurstQualifications]int64
+	// bucketCapacity stores the maximum number of tokens that can be in each bucket.
+	// That is, if a bucket is already at capacity, no more tokens will be added.
+	bucketCapacity [numResourceTiers][numBurstQualifications]int64
+	// allocated stores the number of tokens added to each bucket in the current
+	// interval.
+	allocated [numResourceTiers][numBurstQualifications]int64
+}
+
+var _ tokenAllocator = &cpuTimeTokenAllocator{}
+
+// allocateTokens allocates tokens to a cpuTimeTokenGranter. allocateTokens
+// adds rates tokens every interval, while respecting bucketCapacity.
+// allocateTokens adds tokens evenly among the expected remaining ticks in
+// the interval.
+// INVARIANT: remainingTicks >= 1.
+// TODO(josh): Expand to cover tenant-specific token buckets too.
+func (a *cpuTimeTokenAllocator) allocateTokens(expectedRemainingTicksInInterval int64) {
+	allocateFunc := func(total int64, allocated int64, remainingTicks int64) (toAllocate int64) {
+		remainingTokens := total - allocated
+		// ceil(a / b) == (a + b - 1) / b, when using integer division.
+		// Round up so that we don't accumulate tokens to give in a burst on the
+		// last tick.
+		toAllocate = (remainingTokens + remainingTicks - 1) / remainingTicks
+		if toAllocate < 0 {
+			panic(errors.AssertionFailedf("toAllocate is negative %d", toAllocate))
+		}
+		if toAllocate+allocated > total {
+			toAllocate = total - allocated
+		}
+		return toAllocate
+	}
+
+	var delta [numResourceTiers][numBurstQualifications]int64
+	for wc := range a.rates {
+		for kind := range a.rates[wc] {
+			toAllocateTokens := allocateFunc(
+				a.rates[wc][kind], a.allocated[wc][kind], expectedRemainingTicksInInterval)
+			a.allocated[wc][kind] += toAllocateTokens
+			delta[wc][kind] = toAllocateTokens
+		}
+	}
+	a.granter.refill(delta, a.bucketCapacity)
+}
+
+// resetInterval is called to signal the beginning of a new interval. allocateTokens
+// adds rates tokens every interval.
+func (a *cpuTimeTokenAllocator) resetInterval() {
+	for wc := range a.allocated {
+		for kind := range a.allocated[wc] {
+			a.allocated[wc][kind] = 0
+		}
+	}
+}
@@ -0,0 +1,143 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package admission
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
+	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+	"github.com/cockroachdb/datadriven"
+)
+
+func TestCPUTimeTokenFiller(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	// Fixed time for reproducibility.
+	unixNanos := int64(1758938600000000000) // 2025-09-24T14:30:00Z
+	startTime := time.Unix(0, unixNanos).UTC()
+	testTime := timeutil.NewManualTime(startTime)
+
+	var buf strings.Builder
+	allocator := testTokenAllocator{buf: &buf}
+	var filler cpuTimeTokenFiller
+	flushAndReset := func() string {
+		fmt.Fprintf(&buf, "elapsed: %s\n", testTime.Since(startTime))
+		str := buf.String()
+		buf.Reset()
+		return str
+	}
+
+	tickCh := make(chan struct{})
+	datadriven.RunTest(t, datapathutils.TestDataPath(t, "cpu_time_token_filler"), func(t *testing.T, d *datadriven.TestData) string {
+		switch d.Cmd {
+		case "init":
+			filler = cpuTimeTokenFiller{
+				allocator:  &allocator,
+				closeCh:    make(chan struct{}),
+				timeSource: testTime,
+				tickCh:     &tickCh,
+			}
+			filler.start()
+			return flushAndReset()
+		case "advance":
+			var dur time.Duration
+			d.ScanArgs(t, "dur", &dur)
+			testTime.AdvanceInOneTick(dur)
+			<-tickCh
+			return flushAndReset()
+		case "stop":
+			close(filler.closeCh)
+			return flushAndReset()
+		default:
+			return fmt.Sprintf("unknown command: %s", d.Cmd)
+		}
+	})
+}
+
+type testTokenAllocator struct {
+	buf *strings.Builder
+}
+
+func (a *testTokenAllocator) resetInterval() {
+	fmt.Fprintf(a.buf, "resetInterval()\n")
+}
+
+func (a *testTokenAllocator) allocateTokens(remainingTicks int64) {
+	fmt.Fprintf(a.buf, "allocateTokens(%d)\n", remainingTicks)
+}
+
+func TestCPUTimeTokenAllocator(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+	defer log.Scope(t).Close(t)
+
+	granter := &cpuTimeTokenGranter{}
+	tier0Granter := &cpuTimeTokenChildGranter{
+		tier:   testTier0,
+		parent: granter,
+	}
+	tier1Granter := &cpuTimeTokenChildGranter{
+		tier:   testTier1,
+		parent: granter,
+	}
+	var requesters [numResourceTiers]*testRequester
+	requesters[testTier0] = &testRequester{
+		additionalID: "tier0",
+		granter:      tier0Granter,
+	}
+	requesters[testTier1] = &testRequester{
+		additionalID: "tier1",
+		granter:      tier1Granter,
+	}
+	granter.requester[testTier0] = requesters[testTier0]
+	granter.requester[testTier1] = requesters[testTier1]
+
+	allocator := cpuTimeTokenAllocator{
+		granter: granter,
+	}
+	allocator.rates[testTier0][canBurst] = 5
+	allocator.rates[testTier0][noBurst] = 4
+	allocator.rates[testTier1][canBurst] = 3
+	allocator.rates[testTier1][noBurst] = 2
+	allocator.bucketCapacity = allocator.rates
+
+	var buf strings.Builder
+	flushAndReset := func(printGranter bool) string {
+		if printGranter {
+			fmt.Fprint(&buf, granter.String())
+		}
+		str := buf.String()
+		buf.Reset()
+		return str
+	}
+
+	datadriven.RunTest(t, datapathutils.TestDataPath(t, "cpu_time_token_allocator"), func(t *testing.T, d *datadriven.TestData) string {
+		switch d.Cmd {
+		case "resetInterval":
+			allocator.resetInterval()
+			return flushAndReset(false /* printGranter */)
+		case "allocate":
+			var remainingTicks int64
+			d.ScanArgs(t, "remaining", &remainingTicks)
+			allocator.allocateTokens(remainingTicks)
+			return flushAndReset(true /* printGranter */)
+		case "clear":
+			granter.mu.buckets[testTier0][canBurst].tokens = 0
+			granter.mu.buckets[testTier0][noBurst].tokens = 0
+			granter.mu.buckets[testTier1][canBurst].tokens = 0
+			granter.mu.buckets[testTier1][noBurst].tokens = 0
+			return flushAndReset(true /* printGranter */)
+		default:
+			return fmt.Sprintf("unknown command: %s", d.Cmd)
+		}
+	})
+}
@@ -244,3 +244,29 @@ func (stg *cpuTimeTokenGranter) tryGrantLocked() bool {
 	}
 	return false
 }
+
+// refill adds delta tokens to the corresponding buckets, while respecting
+// the capacity info stored in bucketCapacity. That is, if a bucket is already
+// at capacity, no more tokens will be added. delta is always positive,
+// thus refill will always attempt to grant admission to waiting requests.
+func (stg *cpuTimeTokenGranter) refill(
+	delta [numResourceTiers][numBurstQualifications]int64,
+	bucketCapacity [numResourceTiers][numBurstQualifications]int64,
+) {
+	stg.mu.Lock()
+	defer stg.mu.Unlock()
+
+	for wc := range stg.mu.buckets {
+		for kind := range stg.mu.buckets[wc] {
+			tokens := stg.mu.buckets[wc][kind].tokens + delta[wc][kind]
+			if tokens > bucketCapacity[wc][kind] {
+				tokens = bucketCapacity[wc][kind]
+			}
+			stg.mu.buckets[wc][kind].tokens = tokens
+		}
+	}
+
+	// delta is always positive, thus refill should always attempt to grant
+	// admission to waiting requests.
+	stg.grantUntilNoWaitingRequestsLocked()
+}
@@ -149,6 +149,24 @@ func TestCPUTimeTokenGranter(t *testing.T) {
 			requesters[scanResourceTier(t, d)].tookWithoutPermission(int64(v))
 			return flushAndReset(false /* init */)
 
+		case "refill":
+			// The delta & the bucket capacity are hard-coded. It is unwiedly
+			// to make them data-driven arguments, and the payoff would be
+			// low anyway.
+			var delta [numResourceTiers][numBurstQualifications]int64
+			delta[testTier0][canBurst] = 5
+			delta[testTier0][noBurst] = 4
+			delta[testTier1][canBurst] = 3
+			delta[testTier1][noBurst] = 1
+			var bucketCapacity [numResourceTiers][numBurstQualifications]int64
+			bucketCapacity[testTier0][canBurst] = 4
+			bucketCapacity[testTier0][noBurst] = 3
+			bucketCapacity[testTier1][canBurst] = 10
+			bucketCapacity[testTier1][noBurst] = 1
+			granter.refill(delta, bucketCapacity)
+			fmt.Fprintf(&buf, "refill(%v %v)\n", delta, bucketCapacity)
+			return flushAndReset(false /* init */)
+
 		// For cpuTimeTokenChildGranter, this is a NOP. Still, it will be
 		// called in production. So best to test it doesn't panic, or similar.
 		case "continue-grant-chain":