Skip to content

Commit e3e228f

Browse files
committed
metrics: redesign disk usage metrics
The way we maintain the metrics that are used for disk usage calculations are very messy. They are updated incrementally with special code paths which can be fragile. And they are not consistent with sibling metrics (for example: `Tables.Live.All` uses virtual table sizes, whereas `Tables.Live.Local` uses backing sizes) This change redesigns these metrics. We specialize the metrics to focus on disk usage, and thus only include physical tables and physical backings. We now use the (improved) disk space aggregator and new blob size aggregator on demand, only when metrics are needed.
1 parent 15d065b commit e3e228f

28 files changed

+1360
-924
lines changed

blob_rewrite.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ func (c *blobFileRewriteCompaction) Execute(jobID JobID, d *DB) error {
259259
// We don't know the size of the output blob file--it may have
260260
// been half-written. We use the input blob file size as an
261261
// approximation for deletion pacing.
262-
FileSize: c.input.Physical.Size,
263-
IsLocal: true,
262+
FileSize: c.input.Physical.Size,
263+
Placement: base.Local,
264264
},
265265
})
266266
}

compaction.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2677,7 +2677,7 @@ func (d *DB) cleanupVersionEdit(ve *manifest.VersionEdit) {
26772677
FileNum: ve.NewBlobFiles[i].Physical.FileNum,
26782678
FileSize: ve.NewBlobFiles[i].Physical.Size,
26792679
},
2680-
isLocal: objstorage.IsLocalBlobFile(d.objProvider, ve.NewBlobFiles[i].Physical.FileNum),
2680+
placement: objstorage.Placement(d.objProvider, base.FileTypeBlob, ve.NewBlobFiles[i].Physical.FileNum),
26812681
})
26822682
}
26832683
for i := range ve.NewTables {
@@ -2706,7 +2706,7 @@ func (d *DB) cleanupVersionEdit(ve *manifest.VersionEdit) {
27062706
FileNum: of.DiskFileNum,
27072707
FileSize: of.Size,
27082708
},
2709-
isLocal: objstorage.IsLocalTable(d.objProvider, of.DiskFileNum),
2709+
placement: objstorage.Placement(d.objProvider, base.FileTypeTable, of.DiskFileNum),
27102710
})
27112711
}
27122712
d.mu.versions.addObsoleteLocked(obsoleteFiles)

db.go

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/cockroachdb/pebble/internal/manifest"
3131
"github.com/cockroachdb/pebble/internal/manual"
3232
"github.com/cockroachdb/pebble/internal/problemspans"
33+
"github.com/cockroachdb/pebble/metrics"
3334
"github.com/cockroachdb/pebble/objstorage"
3435
"github.com/cockroachdb/pebble/objstorage/remote"
3536
"github.com/cockroachdb/pebble/rangekey"
@@ -539,7 +540,8 @@ type DB struct {
539540
}
540541
}
541542

542-
fileSizeAnnotator manifest.TableAnnotator[fileSizeByBacking]
543+
tableDiskUsageAnnotator manifest.TableAnnotator[TableUsageByPlacement]
544+
blobFileDiskUsageAnnotator manifest.BlobFileAnnotator[metrics.CountAndSizeByPlacement]
543545

544546
// problemSpans keeps track of spans of keys within LSM levels where
545547
// compactions have failed; used to avoid retrying these compactions too
@@ -1880,7 +1882,6 @@ func (d *DB) AsyncFlush() (<-chan struct{}, error) {
18801882
func (d *DB) Metrics() *Metrics {
18811883
metrics := &Metrics{}
18821884
walStats := d.mu.log.manager.Stats()
1883-
deletePacerMetrics := d.deletePacer.Metrics()
18841885

18851886
d.mu.Lock()
18861887
vers := d.mu.versions.currentVersion()
@@ -1940,35 +1941,28 @@ func (d *DB) Metrics() *Metrics {
19401941
metrics.Levels[level].CompensatedFillFactor = lm.compensatedFillFactor
19411942
}
19421943
}
1943-
metrics.Table.Zombie.All.Count = uint64(d.mu.versions.zombieTables.Count())
1944-
metrics.Table.Zombie.All.Bytes = d.mu.versions.zombieTables.TotalSize()
1945-
metrics.Table.Zombie.Local.Count, metrics.Table.Zombie.Local.Bytes = d.mu.versions.zombieTables.LocalStats()
1944+
metrics.Table.Physical.Zombie = d.mu.versions.zombieTables.Metrics()
1945+
metrics.BlobFiles.Zombie = d.mu.versions.zombieBlobs.Metrics()
19461946

19471947
// Populate obsolete blob/table metrics from both the not-yet-enqueued lists
19481948
// in the versionSet, and what is already in the delete pacer queue.
1949-
metrics.Table.Obsolete = deletePacerMetrics.InQueue.Tables
1949+
deletePacerMetrics := d.deletePacer.Metrics()
1950+
metrics.Table.Physical.Obsolete = deletePacerMetrics.InQueue.Tables
19501951
for _, fi := range d.mu.versions.obsoleteTables {
1951-
metrics.Table.Obsolete.Inc(fi.FileSize, fi.IsLocal)
1952+
metrics.Table.Physical.Obsolete.Inc(fi.FileSize, fi.Placement)
19521953
}
19531954
metrics.BlobFiles.Obsolete = deletePacerMetrics.InQueue.BlobFiles
19541955
for _, fi := range d.mu.versions.obsoleteBlobs {
1955-
metrics.BlobFiles.Obsolete.Inc(fi.FileSize, fi.IsLocal)
1956+
metrics.BlobFiles.Obsolete.Inc(fi.FileSize, fi.Placement)
19561957
}
19571958

19581959
metrics.private.optionsFileSize = d.optionsFileSize
19591960

19601961
d.mu.versions.logLock()
19611962
metrics.private.manifestFileSize = uint64(d.mu.versions.manifest.Size())
1962-
backingCount, backingTotalSize := d.mu.versions.latest.virtualBackings.Stats()
1963-
metrics.Table.BackingTable.Count = uint64(backingCount)
1964-
metrics.Table.BackingTable.Bytes = backingTotalSize
1963+
backingStats := d.mu.versions.latest.virtualBackings.Stats()
19651964
blobStats, _ := d.mu.versions.latest.blobFiles.Stats()
19661965
d.mu.versions.logUnlock()
1967-
metrics.BlobFiles.Live.All.Count = blobStats.Count
1968-
metrics.BlobFiles.Live.All.Bytes = blobStats.PhysicalSize
1969-
metrics.BlobFiles.ValueSize = blobStats.ValueSize
1970-
metrics.BlobFiles.ReferencedValueSize = blobStats.ReferencedValueSize
1971-
metrics.BlobFiles.ReferencedBackingValueSize = blobStats.ReferencedBackingValueSize
19721966

19731967
metrics.LogWriter.FsyncLatency = d.mu.log.metrics.fsyncLatency
19741968
if err := metrics.LogWriter.Merge(&d.mu.log.metrics.LogWriterMetrics); err != nil {
@@ -1984,6 +1978,13 @@ func (d *DB) Metrics() *Metrics {
19841978

19851979
d.mu.Unlock()
19861980

1981+
// The table disk usage is due to physical tables plus backings for virtual tables.
1982+
tableDiskUsage := d.tableDiskUsageAnnotator.MultiLevelAnnotation(vers.Levels[:])
1983+
metrics.Table.Physical.Live.Local = tableDiskUsage.Local.Physical
1984+
metrics.Table.Physical.Live.Shared = tableDiskUsage.Shared.Physical
1985+
metrics.Table.Physical.Live.External = tableDiskUsage.External.Physical
1986+
metrics.Table.Physical.Live.Accumulate(backingStats)
1987+
19871988
// TODO(jackson): Consider making these metrics optional.
19881989
aggProps := tablePropsAnnotator.MultiLevelAnnotation(vers.Levels[:])
19891990
metrics.Keys.RangeKeySetsCount = aggProps.NumRangeKeySets
@@ -1999,6 +2000,12 @@ func (d *DB) Metrics() *Metrics {
19992000
metrics.Table.Compression.MergeWith(&aggProps.CompressionMetrics)
20002001
}
20012002

2003+
metrics.BlobFiles.Live = d.blobFileDiskUsageAnnotator.Annotation(&vers.BlobFiles)
2004+
2005+
metrics.BlobFiles.ValueSize = blobStats.ValueSize
2006+
metrics.BlobFiles.ReferencedValueSize = blobStats.ReferencedValueSize
2007+
metrics.BlobFiles.ReferencedBackingValueSize = blobStats.ReferencedBackingValueSize
2008+
20022009
blobCompressionMetrics := blobCompressionStatsAnnotator.Annotation(&vers.BlobFiles)
20032010
metrics.BlobFiles.Compression.MergeWith(&blobCompressionMetrics)
20042011

disk_usage.go

Lines changed: 93 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"github.com/cockroachdb/errors"
99
"github.com/cockroachdb/pebble/internal/base"
1010
"github.com/cockroachdb/pebble/internal/manifest"
11+
"github.com/cockroachdb/pebble/metrics"
12+
"github.com/cockroachdb/pebble/objstorage"
1113
)
1214

1315
// EstimateDiskUsage returns the estimated filesystem space used in bytes for
@@ -48,71 +50,115 @@ func (d *DB) EstimateDiskUsageByBackingType(
4850
readState := d.loadReadState()
4951
defer readState.unref()
5052

51-
sizes := d.fileSizeAnnotator.VersionRangeAnnotation(readState.current, bounds)
52-
return sizes.totalSize, sizes.remoteSize, sizes.externalSize, nil
53+
sizes := d.tableDiskUsageAnnotator.VersionRangeAnnotation(readState.current, bounds)
54+
externalSize = sizes.External.TotalBytes()
55+
remoteSize = externalSize + sizes.Shared.TotalBytes()
56+
totalSize = remoteSize + sizes.Local.TotalBytes()
57+
return totalSize, remoteSize, externalSize, nil
5358
}
5459

55-
// fileSizeByBacking contains the estimated file size for LSM data within some
56-
// bounds. It is broken down by backing type. The file size refers to both the
57-
// sstable size and an estimate of the referenced blob sizes.
58-
type fileSizeByBacking struct {
59-
// totalSize is the estimated size of all files for the given bounds.
60-
totalSize uint64
61-
// remoteSize is the estimated size of remote files for the given bounds.
62-
remoteSize uint64
63-
// externalSize is the estimated size of external files for the given bounds.
64-
externalSize uint64
60+
// TableUsageByPlacement contains space usage information for tables, broken
61+
// down by where they are stored.
62+
//
63+
// Depending on context, this can refer to all tables in the LSM, all tables on
64+
// a level, or tables within some specified bounds (in the latter case, for
65+
// tables overlapping the bounds, the usage is a best-effort estimation).
66+
type TableUsageByPlacement struct {
67+
metrics.ByPlacement[TableDiskUsage]
6568
}
6669

67-
func (d *DB) singleFileSizeByBacking(
68-
fileSize uint64, t *manifest.TableMetadata,
69-
) (_ fileSizeByBacking, ok bool) {
70-
res := fileSizeByBacking{
71-
totalSize: fileSize,
72-
}
70+
// Accumulate adds the rhs counts and sizes to the receiver.
71+
func (u *TableUsageByPlacement) Accumulate(rhs TableUsageByPlacement) {
72+
u.Local.Accumulate(rhs.Local)
73+
u.Shared.Accumulate(rhs.Shared)
74+
u.External.Accumulate(rhs.External)
75+
}
76+
77+
// TableDiskUsage contains space usage information for a set of sstables.
78+
type TableDiskUsage struct {
79+
// Physical contains the count and total size of physical tables in the set.
80+
Physical metrics.CountAndSize
81+
82+
// Virtual contains the count and total estimated referenced bytes of virtual
83+
// tables in the set.
84+
Virtual metrics.CountAndSize
85+
86+
// ReferencedBytes contains the total estimated size of values stored in blob
87+
// files referenced by tables in this set (either physical or virtual).
88+
ReferencedBytes uint64
89+
}
90+
91+
// TotalBytes returns the sum of all the byte fields.
92+
func (u TableDiskUsage) TotalBytes() uint64 {
93+
return u.Physical.Bytes + u.Virtual.Bytes + u.ReferencedBytes
94+
}
95+
96+
// Accumulate adds the rhs counts and sizes to the receiver.
97+
func (u *TableDiskUsage) Accumulate(rhs TableDiskUsage) {
98+
u.Physical.Accumulate(rhs.Physical)
99+
u.Virtual.Accumulate(rhs.Virtual)
100+
u.ReferencedBytes += rhs.ReferencedBytes
101+
}
73102

74-
objMeta, err := d.objProvider.Lookup(base.FileTypeTable, t.TableBacking.DiskFileNum)
75-
if err != nil {
76-
return res, false
103+
func (d *DB) singleTableDiskUsage(
104+
fileSize uint64, referencedSize uint64, fileNum base.DiskFileNum, isVirtual bool,
105+
) TableUsageByPlacement {
106+
u := TableDiskUsage{
107+
ReferencedBytes: referencedSize,
77108
}
78-
if objMeta.IsRemote() {
79-
res.remoteSize += fileSize
80-
if objMeta.IsExternal() {
81-
res.externalSize += fileSize
82-
}
109+
if isVirtual {
110+
u.Virtual.Inc(fileSize)
111+
} else {
112+
u.Physical.Inc(fileSize)
83113
}
84-
return res, true
114+
placement := objstorage.Placement(d.objProvider, base.FileTypeTable, fileNum)
115+
var res TableUsageByPlacement
116+
res.Set(placement, u)
117+
return res
85118
}
86119

87-
var fileSizeAnnotatorIdx = manifest.NewTableAnnotationIdx()
120+
var tableDiskUsageAnnotatorIdx = manifest.NewTableAnnotationIdx()
88121

89-
// makeFileSizeAnnotator returns an annotator that computes the storage size of
90-
// files. When applicable, this includes both the sstable size and the size of
91-
// any referenced blob files.
92-
func (d *DB) makeFileSizeAnnotator() manifest.TableAnnotator[fileSizeByBacking] {
93-
return manifest.MakeTableAnnotator[fileSizeByBacking](
94-
fileSizeAnnotatorIdx,
95-
manifest.TableAnnotatorFuncs[fileSizeByBacking]{
96-
Merge: func(dst *fileSizeByBacking, src fileSizeByBacking) {
97-
dst.totalSize += src.totalSize
98-
dst.remoteSize += src.remoteSize
99-
dst.externalSize += src.externalSize
100-
},
101-
Table: func(f *manifest.TableMetadata) (v fileSizeByBacking, cacheOK bool) {
102-
return d.singleFileSizeByBacking(f.Size+f.EstimatedReferenceSize(), f)
122+
// makeTableDiskSpaceUsageAnnotator returns an annotator that computes the
123+
// storage size of files. When applicable, this includes both the sstable size
124+
// and the size of any referenced blob files.
125+
func (d *DB) makeTableDiskSpaceUsageAnnotator() manifest.TableAnnotator[TableUsageByPlacement] {
126+
return manifest.MakeTableAnnotator[TableUsageByPlacement](
127+
tableDiskUsageAnnotatorIdx,
128+
manifest.TableAnnotatorFuncs[TableUsageByPlacement]{
129+
Merge: (*TableUsageByPlacement).Accumulate,
130+
Table: func(f *manifest.TableMetadata) (v TableUsageByPlacement, cacheOK bool) {
131+
return d.singleTableDiskUsage(f.Size, f.EstimatedReferenceSize(), f.TableBacking.DiskFileNum, f.Virtual), true
103132
},
104-
PartialOverlap: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) fileSizeByBacking {
133+
PartialOverlap: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) TableUsageByPlacement {
105134
overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
106135
if err != nil {
107-
return fileSizeByBacking{}
136+
return TableUsageByPlacement{}
108137
}
109138
overlapFraction := float64(overlappingFileSize) / float64(f.Size)
110139
// Scale the blob reference size proportionally to the file
111140
// overlap from the bounds to approximate only the blob
112141
// references that overlap with the requested bounds.
113-
size := overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
114-
res, _ := d.singleFileSizeByBacking(size, f)
115-
return res
142+
referencedSize := uint64(float64(f.EstimatedReferenceSize()) * overlapFraction)
143+
return d.singleTableDiskUsage(overlappingFileSize, referencedSize, f.TableBacking.DiskFileNum, f.Virtual)
144+
},
145+
})
146+
}
147+
148+
var blobFileDiskUsageAnnotatorIdx = manifest.NewBlobAnnotationIdx()
149+
150+
// makeDiskSpaceUsageAnnotator returns an annotator that computes the storage size of
151+
// files. When applicable, this includes both the sstable size and the size of
152+
// any referenced blob files.
153+
func (d *DB) makeBlobFileDiskSpaceUsageAnnotator() manifest.BlobFileAnnotator[metrics.CountAndSizeByPlacement] {
154+
return manifest.MakeBlobFileAnnotator[metrics.CountAndSizeByPlacement](
155+
blobFileDiskUsageAnnotatorIdx,
156+
manifest.BlobFileAnnotatorFuncs[metrics.CountAndSizeByPlacement]{
157+
Merge: (*metrics.CountAndSizeByPlacement).Accumulate,
158+
BlobFile: func(m manifest.BlobFileMetadata) (res metrics.CountAndSizeByPlacement, cacheOK bool) {
159+
placement := objstorage.Placement(d.objProvider, base.FileTypeBlob, m.Physical.FileNum)
160+
res.Ptr(placement).Inc(m.Physical.Size)
161+
return res, true
116162
},
117163
})
118164
}

internal/base/placement.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright 2025 The LevelDB-Go and Pebble Authors. All rights reserved. Use
2+
// of this source code is governed by a BSD-style license that can be found in
3+
// the LICENSE file.
4+
5+
package base
6+
7+
import (
8+
"github.com/cockroachdb/errors"
9+
"github.com/cockroachdb/pebble/internal/invariants"
10+
"github.com/cockroachdb/redact"
11+
)
12+
13+
// Placement identifies where a file/object is stored.
14+
//
15+
// The zero value is invalid (this is intentional to determine accidantelly
16+
// uninitialized fields).
17+
type Placement uint8
18+
19+
const (
20+
Local Placement = 1 + iota
21+
Shared
22+
External
23+
)
24+
25+
func (p Placement) String() string {
26+
switch p {
27+
case Local:
28+
return "local"
29+
case Shared:
30+
return "shared"
31+
case External:
32+
return "external"
33+
default:
34+
if invariants.Enabled {
35+
panic(errors.AssertionFailedf("invalid placement type %d", p))
36+
}
37+
return "invalid"
38+
}
39+
}
40+
41+
// SafeFormat implements redact.SafeFormatter.
42+
func (p Placement) SafeFormat(w redact.SafePrinter, _ rune) {
43+
w.Print(redact.SafeString(p.String()))
44+
}

internal/deletepacer/delete_pacer.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ func (dp *DeletePacer) mainLoop() {
214214
defer dp.mu.Lock()
215215
dp.deleteFn(file.ObsoleteFile, file.JobID)
216216
}()
217-
dp.mu.metrics.InQueue.Dec(file.FileType, file.FileSize, file.IsLocal)
218-
dp.mu.metrics.Deleted.Inc(file.FileType, file.FileSize, file.IsLocal)
217+
dp.mu.metrics.InQueue.Dec(file.FileType, file.FileSize, file.Placement)
218+
dp.mu.metrics.Deleted.Inc(file.FileType, file.FileSize, file.Placement)
219219
dp.mu.deletedCond.Broadcast()
220220
}
221221
}
@@ -237,7 +237,7 @@ func (dp *DeletePacer) Enqueue(jobID int, files ...ObsoleteFile) {
237237
dp.mu.queuedPacingBytes += b
238238
dp.mu.queuedHistory.Add(now, b)
239239
}
240-
dp.mu.metrics.InQueue.Inc(file.FileType, file.FileSize, file.IsLocal)
240+
dp.mu.metrics.InQueue.Inc(file.FileType, file.FileSize, file.Placement)
241241
dp.mu.queue.PushBack(queueEntry{
242242
ObsoleteFile: file,
243243
JobID: jobID,
@@ -276,8 +276,8 @@ func (dp *DeletePacer) WaitForTesting() {
276276
dp.mu.Lock()
277277
defer dp.mu.Unlock()
278278

279-
n := dp.mu.metrics.Deleted.Totals().Count + dp.mu.metrics.InQueue.Totals().Count
280-
for dp.mu.metrics.Deleted.Totals().Count < n {
279+
n := dp.mu.metrics.Deleted.Total().Count + dp.mu.metrics.InQueue.Total().Count
280+
for dp.mu.metrics.Deleted.Total().Count < n {
281281
dp.mu.deletedCond.Wait()
282282
}
283283
}

0 commit comments

Comments
 (0)