|
| 1 | +// Copyright 2021 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package admission |
| 7 | + |
| 8 | +import ( |
| 9 | + "time" |
| 10 | + |
| 11 | + "github.com/cockroachdb/cockroach/pkg/util/timeutil" |
| 12 | + "github.com/cockroachdb/errors" |
| 13 | +) |
| 14 | + |
| 15 | +// timePerTick is how frequently cpuTimeTokenFiller ticks its time.Ticker & adds |
| 16 | +// tokens to the buckets. Must be < 1s. Must divide 1s evenly. |
| 17 | +const timePerTick = 1 * time.Millisecond |
| 18 | + |
| 19 | +// cpuTimeTokenFiller starts a goroutine which periodically calls |
| 20 | +// cpuTimeTokenAllocator to add tokens to a cpuTimeTokenGranter. For example, on |
| 21 | +// an 8 vCPU machine, we may want to allow burstable tier-0 work to use 6 seconds |
| 22 | +// of CPU time per second. Then cpuTimeTokenAllocator.rates[tier0][canBurst] would |
| 23 | +// equal 6 seconds per second, and cpuTimeTokenFiller would add 6 seconds of token |
| 24 | +// every second, but smoothly -- 1ms at a time. See cpuTimeTokenGranter for details |
| 25 | +// on the multi-dimensional token buckets owned by cpuTimeTokenGranter; the TLDR is |
| 26 | +// there is one bucket per <resource tier, burst qualification> pair. |
| 27 | +// |
| 28 | +// cpuTimeTokenFiller owns the time.Ticker logic. The details of the token allocation |
| 29 | +// are left to the cpuTimeTokenAllocator, in order to improve clarity & testability. |
| 30 | +// |
| 31 | +// Note that the combination of cpuTimeTokenFiller & cpuTimeTokenAllocator are written |
| 32 | +// to be robust against delayed and dropped time.Timer ticks. That |
| 33 | +// is, in the presence of delayed and dropped ticks, the correct number of tokens will |
| 34 | +// be added to the buckets; they just may be added in a less smooth fashion than |
| 35 | +// normal. If ticks are delayed more than roughly 1s, not enough tokens will be |
| 36 | +// added to the bucket, but we do not expect this significant of a delay in practice |
| 37 | +// (admission control will be running). |
| 38 | +// |
| 39 | +// See ticker docs, where it is mentioned ticks can be dropped, if receivers are |
| 40 | +// slow: https://pkg.go.dev/time#NewTicker |
| 41 | +// |
| 42 | +// The mechanism by which the goroutine adds the correct number of tokens, in the |
| 43 | +// presence of delayed or dropped ticks, is: |
| 44 | +// - time is split into intervals of 1s |
| 45 | +// - intervals are split into 1s / timePerTick(=1ms) time.Ticker ticks |
| 46 | +// - cpuTimeTokenAllocator attempts to allocate remaining tokens for interval evenly |
| 47 | +// across remaining ticks in the interval |
| 48 | +// - once interval is complete, all remaining tokens needed for that interval |
| 49 | +// are added (e.g. see t.allocateTokens(1) below), then a new interval starts |
| 50 | +type cpuTimeTokenFiller struct { |
| 51 | + allocator tokenAllocator |
| 52 | + timeSource timeutil.TimeSource |
| 53 | + closeCh chan struct{} |
| 54 | + // Used only in unit tests. |
| 55 | + tickCh *chan struct{} |
| 56 | +} |
| 57 | + |
| 58 | +// tokenAllocator abstracts cpuTimeTokenAllocator for testing. |
| 59 | +type tokenAllocator interface { |
| 60 | + allocateTokens(remainingTicksInInInterval int64) |
| 61 | + resetInterval() |
| 62 | +} |
| 63 | + |
| 64 | +func (f *cpuTimeTokenFiller) start() { |
| 65 | + ticker := f.timeSource.NewTicker(timePerTick) |
| 66 | + intervalStart := f.timeSource.Now() |
| 67 | + // Every 1s a new interval starts. every timePerTick time token allocation |
| 68 | + // is done. The expected number of ticks left in the interval is passed to |
| 69 | + // the allocator. The expected number of ticks left can jump around, if |
| 70 | + // time.Timer ticks are delayed or dropped. |
| 71 | + go func() { |
| 72 | + lastRemainingTicks := int64(0) |
| 73 | + for { |
| 74 | + select { |
| 75 | + case t := <-ticker.Ch(): |
| 76 | + var remainingTicks int64 |
| 77 | + elapsedSinceIntervalStart := t.Sub(intervalStart) |
| 78 | + if elapsedSinceIntervalStart >= time.Second { |
| 79 | + // INVARIANT: During each interval, allocateTokens(1) must be called, before |
| 80 | + // resetInterval() can be called. |
| 81 | + // |
| 82 | + // Without this invariant, cpuTimeTokenAllocator.rates tokens would not be |
| 83 | + // allocated every 1s. |
| 84 | + if lastRemainingTicks > 1 { |
| 85 | + f.allocator.allocateTokens(1) |
| 86 | + } |
| 87 | + intervalStart = t |
| 88 | + f.allocator.resetInterval() |
| 89 | + remainingTicks = int64(time.Second / timePerTick) |
| 90 | + } else { |
| 91 | + remainingSinceIntervalStart := time.Second - elapsedSinceIntervalStart |
| 92 | + if remainingSinceIntervalStart < 0 { |
| 93 | + panic(errors.AssertionFailedf("remainingSinceIntervalStart is negative %d", remainingSinceIntervalStart)) |
| 94 | + } |
| 95 | + // ceil(a / b) == (a + b - 1) / b, when using integer division. |
| 96 | + // Round up so that we don't accumulate tokens to give in a burst on the |
| 97 | + // last tick. |
| 98 | + remainingTicks = |
| 99 | + int64((remainingSinceIntervalStart + timePerTick - 1) / timePerTick) |
| 100 | + } |
| 101 | + f.allocator.allocateTokens(max(1, remainingTicks)) |
| 102 | + lastRemainingTicks = remainingTicks |
| 103 | + // Only non-nil in unit tests. |
| 104 | + if f.tickCh != nil { |
| 105 | + *f.tickCh <- struct{}{} |
| 106 | + } |
| 107 | + case <-f.closeCh: |
| 108 | + return |
| 109 | + } |
| 110 | + } |
| 111 | + }() |
| 112 | +} |
| 113 | + |
| 114 | +// cpuTimeTokenAllocator allocates tokens to a cpuTimeTokenGranter. See the comment |
| 115 | +// above cpuTimeTokenFiller for a high level picture. The responsibility of |
| 116 | +// cpuTimeTokenAllocator is to gradually allocate rates tokens every interval, |
| 117 | +// while respecting bucketCapacity. We have split up the ticking & token allocation |
| 118 | +// logic, in order to improve clarity & testability. |
| 119 | +type cpuTimeTokenAllocator struct { |
| 120 | + granter *cpuTimeTokenGranter |
| 121 | + |
| 122 | + // Mutable fields. No mutex, since only a single goroutine will call the |
| 123 | + // cpuTimeTokenAllocator. |
| 124 | + |
| 125 | + // rates stores the number of token added to each bucket every interval. |
| 126 | + rates [numResourceTiers][numBurstQualifications]int64 |
| 127 | + // bucketCapacity stores the maximum number of tokens that can be in each bucket. |
| 128 | + // That is, if a bucket is already at capacity, no more tokens will be added. |
| 129 | + bucketCapacity [numResourceTiers][numBurstQualifications]int64 |
| 130 | + // allocated stores the number of tokens added to each bucket in the current |
| 131 | + // interval. |
| 132 | + allocated [numResourceTiers][numBurstQualifications]int64 |
| 133 | +} |
| 134 | + |
| 135 | +var _ tokenAllocator = &cpuTimeTokenAllocator{} |
| 136 | + |
| 137 | +// allocateTokens allocates tokens to a cpuTimeTokenGranter. allocateTokens |
| 138 | +// adds rates tokens every interval, while respecting bucketCapacity. |
| 139 | +// allocateTokens adds tokens evenly among the expected remaining ticks in |
| 140 | +// the interval. |
| 141 | +// INVARIANT: remainingTicks >= 1. |
| 142 | +// TODO(josh): Expand to cover tenant-specific token buckets too. |
| 143 | +func (a *cpuTimeTokenAllocator) allocateTokens(expectedRemainingTicksInInterval int64) { |
| 144 | + allocateFunc := func(total int64, allocated int64, remainingTicks int64) (toAllocate int64) { |
| 145 | + remainingTokens := total - allocated |
| 146 | + // ceil(a / b) == (a + b - 1) / b, when using integer division. |
| 147 | + // Round up so that we don't accumulate tokens to give in a burst on the |
| 148 | + // last tick. |
| 149 | + toAllocate = (remainingTokens + remainingTicks - 1) / remainingTicks |
| 150 | + if toAllocate < 0 { |
| 151 | + panic(errors.AssertionFailedf("toAllocate is negative %d", toAllocate)) |
| 152 | + } |
| 153 | + if toAllocate+allocated > total { |
| 154 | + toAllocate = total - allocated |
| 155 | + } |
| 156 | + return toAllocate |
| 157 | + } |
| 158 | + |
| 159 | + var delta [numResourceTiers][numBurstQualifications]int64 |
| 160 | + for wc := range a.rates { |
| 161 | + for kind := range a.rates[wc] { |
| 162 | + toAllocateTokens := allocateFunc( |
| 163 | + a.rates[wc][kind], a.allocated[wc][kind], expectedRemainingTicksInInterval) |
| 164 | + a.allocated[wc][kind] += toAllocateTokens |
| 165 | + delta[wc][kind] = toAllocateTokens |
| 166 | + } |
| 167 | + } |
| 168 | + a.granter.refill(delta, a.bucketCapacity) |
| 169 | +} |
| 170 | + |
| 171 | +// resetInterval is called to signal the beginning of a new interval. allocateTokens |
| 172 | +// adds rates tokens every interval. |
| 173 | +func (a *cpuTimeTokenAllocator) resetInterval() { |
| 174 | + for wc := range a.allocated { |
| 175 | + for kind := range a.allocated[wc] { |
| 176 | + a.allocated[wc][kind] = 0 |
| 177 | + } |
| 178 | + } |
| 179 | +} |
0 commit comments