Skip to content

Commit bf55e27

Browse files
authoredFeb 7, 2025··
BucketListDB Random Eviction Cache (#4632)
# Description Resolves #3696 Adds a random eviction cache to `LiveBucket` using `DiskIndex`. Currently, I've set the default to cache up to 25% of Bucket entries, which puts total stellar-core memory usage at about 5-6 GB by default. # Checklist - [x] Reviewed the [contributing](https://github.com/stellar/stellar-core/blob/master/CONTRIBUTING.md#submitting-changes) document - [x] Rebased on top of master (no merge commits) - [x] Ran `clang-format` v8.0.0 (via `make format` or the Visual Studio extension) - [x] Compiles - [x] Ran all tests - [ ] If change impacts performance, include supporting evidence per the [performance document](https://github.com/stellar/stellar-core/blob/master/performance-eval/performance-eval.md)
2 parents c99641d + 8af38d8 commit bf55e27

17 files changed

+231
-10
lines changed
 

‎docs/metrics.md

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ bucketlistDB-live.bulk.inflationWinners | timer | time to load inflation
4545
bucketlistDB-live.bulk.poolshareTrustlines | timer | time to load poolshare trustlines by accountID and assetID
4646
bucketlistDB-live.bulk.prefetch | timer | time to prefetch
4747
bucketlistDB-<X>.point.<y> | timer | time to load single entry of type <Y> on BucketList <X> (live/hotArchive)
48+
bucketlistDB-cache.hit | meter | number of cache hits on Live BucketList Disk random eviction cache
49+
bucketlistDB-cache.miss | meter | number of cache misses on Live BucketList Disk random eviction cache
4850
crypto.verify.hit | meter | number of signature cache hits
4951
crypto.verify.miss | meter | number of signature cache misses
5052
crypto.verify.total | meter | sum of both hits and misses

‎docs/stellar-core_example.cfg

+8
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,14 @@ MAX_DEX_TX_OPERATIONS_IN_TX_SET = 0
235235
# 0, indiviudal index is always used. Default page size 16 kb.
236236
BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT = 14
237237

238+
# BUCKETLIST_DB_CACHED_PERCENT (Integer) default 25
239+
# Percentage of entries cached by BucketListDB when Bucket size is larger
240+
# than BUCKETLIST_DB_INDEX_CUTOFF. Note that this value does not impact
241+
# Buckets smaller than BUCKETLIST_DB_INDEX_CUTOFF, as they are always
242+
# completely held in memory. Roughly speaking, RAM usage for BucketList
243+
# cache == BucketListSize * (BUCKETLIST_DB_CACHED_PERCENT / 100).
244+
BUCKETLIST_DB_CACHED_PERCENT = 25
245+
238246
# BUCKETLIST_DB_INDEX_CUTOFF (Integer) default 250
239247
# Size, in MB, determining whether a bucket should have an individual
240248
# key index or a key range index. If bucket size is below this value, range

‎src/bucket/BucketIndexUtils.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ namespace stellar
1919
std::streamoff
2020
getPageSizeFromConfig(Config const& cfg)
2121
{
22-
if (cfg.BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT == 0)
22+
if (cfg.BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT == 0 ||
23+
cfg.BUCKETLIST_DB_CACHED_PERCENT == 100)
2324
{
2425
return 0;
2526
}

‎src/bucket/BucketManager.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ BucketManager::BucketManager(Application& app)
157157
app.getMetrics().NewCounter({"bucketlist", "size", "bytes"}))
158158
, mArchiveBucketListSizeCounter(
159159
app.getMetrics().NewCounter({"bucketlist-archive", "size", "bytes"}))
160+
, mCacheHitMeter(app.getMetrics().NewMeter({"bucketlistDB", "cache", "hit"},
161+
"bucketlistDB"))
162+
, mCacheMissMeter(app.getMetrics().NewMeter(
163+
{"bucketlistDB", "cache", "miss"}, "bucketlistDB"))
160164
, mBucketListEvictionCounters(app)
161165
, mEvictionStatistics(std::make_shared<EvictionStatistics>())
162166
, mConfig(app.getConfig())
@@ -351,6 +355,18 @@ BucketManager::readMergeCounters()
351355
return mMergeCounters;
352356
}
353357

358+
medida::Meter&
359+
BucketManager::getCacheHitMeter() const
360+
{
361+
return mCacheHitMeter;
362+
}
363+
364+
medida::Meter&
365+
BucketManager::getCacheMissMeter() const
366+
{
367+
return mCacheMissMeter;
368+
}
369+
354370
void
355371
BucketManager::incrMergeCounters(MergeCounters const& delta)
356372
{

‎src/bucket/BucketManager.h

+4
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ class BucketManager : NonMovableOrCopyable
100100
medida::Counter& mSharedBucketsSize;
101101
medida::Counter& mLiveBucketListSizeCounter;
102102
medida::Counter& mArchiveBucketListSizeCounter;
103+
medida::Meter& mCacheHitMeter;
104+
medida::Meter& mCacheMissMeter;
103105
EvictionCounters mBucketListEvictionCounters;
104106
MergeCounters mMergeCounters;
105107
std::shared_ptr<EvictionStatistics> mEvictionStatistics{};
@@ -197,6 +199,8 @@ class BucketManager : NonMovableOrCopyable
197199

198200
template <class BucketT> medida::Meter& getBloomMissMeter() const;
199201
template <class BucketT> medida::Meter& getBloomLookupMeter() const;
202+
medida::Meter& getCacheHitMeter() const;
203+
medida::Meter& getCacheMissMeter() const;
200204

201205
// Reading and writing the merge counters is done in bulk, and takes a lock
202206
// briefly; this can be done from any thread.

‎src/bucket/BucketSnapshot.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ BucketSnapshotBase<BucketT>::isEmpty() const
3939
}
4040

4141
template <class BucketT>
42-
std::pair<std::shared_ptr<typename BucketT::EntryT>, bool>
42+
std::pair<std::shared_ptr<typename BucketT::EntryT const>, bool>
4343
BucketSnapshotBase<BucketT>::getEntryAtOffset(LedgerKey const& k,
4444
std::streamoff pos,
4545
size_t pageSize) const
@@ -58,12 +58,16 @@ BucketSnapshotBase<BucketT>::getEntryAtOffset(LedgerKey const& k,
5858
{
5959
if (stream.readOne(be))
6060
{
61-
return {std::make_shared<typename BucketT::EntryT>(be), false};
61+
auto entry = std::make_shared<typename BucketT::EntryT const>(be);
62+
mBucket->getIndex().maybeAddToCache(entry);
63+
return {entry, false};
6264
}
6365
}
6466
else if (stream.readPage(be, k, pageSize))
6567
{
66-
return {std::make_shared<typename BucketT::EntryT>(be), false};
68+
auto entry = std::make_shared<typename BucketT::EntryT const>(be);
69+
mBucket->getIndex().maybeAddToCache(entry);
70+
return {entry, false};
6771
}
6872

6973
mBucket->getIndex().markBloomMiss();

‎src/bucket/BucketSnapshot.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ template <class BucketT> class BucketSnapshotBase : public NonMovable
4141
// reads until key is found or the end of the page. Returns <BucketEntry,
4242
// bloomMiss>, where bloomMiss is true if a bloomMiss occurred during the
4343
// load.
44-
std::pair<std::shared_ptr<typename BucketT::EntryT>, bool>
44+
std::pair<std::shared_ptr<typename BucketT::EntryT const>, bool>
4545
getEntryAtOffset(LedgerKey const& k, std::streamoff pos,
4646
size_t pageSize) const;
4747

‎src/bucket/BucketUtils.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,17 @@ BucketEntryCounters::operator!=(BucketEntryCounters const& other) const
337337
return !(*this == other);
338338
}
339339

340+
size_t
341+
BucketEntryCounters::numEntries() const
342+
{
343+
size_t num = 0;
344+
for (auto const& [_, count] : entryTypeCounts)
345+
{
346+
num += count;
347+
}
348+
return num;
349+
}
350+
340351
template void
341352
BucketEntryCounters::count<LiveBucket>(LiveBucket::EntryT const& be);
342353
template void BucketEntryCounters::count<HotArchiveBucket>(

‎src/bucket/BucketUtils.h

+1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ struct BucketEntryCounters
196196
BucketEntryCounters& operator+=(BucketEntryCounters const& other);
197197
bool operator==(BucketEntryCounters const& other) const;
198198
bool operator!=(BucketEntryCounters const& other) const;
199+
size_t numEntries() const;
199200

200201
template <class Archive>
201202
void

‎src/bucket/HotArchiveBucketIndex.h

+8
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ class HotArchiveBucketIndex : public NonMovableOrCopyable
7474
return mDiskIndex.scan(mDiskIndex.begin(), k).first;
7575
}
7676

77+
// Hot Archive does not support the cache, so define empty function for
78+
// consistency with LiveBucketIndex
79+
void
80+
maybeAddToCache(
81+
std::shared_ptr<HotArchiveBucketEntry const> const& entry) const
82+
{
83+
}
84+
7785
std::pair<IndexReturnT, IterT> scan(IterT start, LedgerKey const& k) const;
7886

7987
BucketEntryCounters const&

‎src/bucket/LiveBucketIndex.cpp

+72
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "util/Logging.h"
1212
#include "xdr/Stellar-ledger-entries.h"
1313
#include <ios>
14+
#include <medida/meter.h>
15+
#include <shared_mutex>
1416
#include <vector>
1517

1618
namespace stellar
@@ -38,6 +40,8 @@ LiveBucketIndex::getPageSize(Config const& cfg, size_t bucketSize)
3840
LiveBucketIndex::LiveBucketIndex(BucketManager& bm,
3941
std::filesystem::path const& filename,
4042
Hash const& hash, asio::io_context& ctx)
43+
: mCacheHitMeter(bm.getCacheHitMeter())
44+
, mCacheMissMeter(bm.getCacheMissMeter())
4145
{
4246
ZoneScoped;
4347
releaseAssert(!filename.empty());
@@ -60,6 +64,21 @@ LiveBucketIndex::LiveBucketIndex(BucketManager& bm,
6064
pageSize, filename);
6165
mDiskIndex = std::make_unique<DiskIndex<LiveBucket>>(
6266
bm, filename, pageSize, hash, ctx);
67+
68+
auto percentCached = bm.getConfig().BUCKETLIST_DB_CACHED_PERCENT;
69+
if (percentCached > 0)
70+
{
71+
auto const& counters = mDiskIndex->getBucketEntryCounters();
72+
auto cacheSize = (counters.numEntries() * percentCached) / 100;
73+
74+
// Minimum cache size of 100 if we are going to cache a non-zero
75+
// number of entries
76+
// We don't want to reserve here, since caches only live as long as
77+
// the lifetime of the Bucket and fill relatively slowly
78+
mCache = std::make_unique<CacheT>(std::max<size_t>(cacheSize, 100),
79+
/*separatePRNG=*/false,
80+
/*reserve=*/false);
81+
}
6382
}
6483
}
6584

@@ -68,6 +87,8 @@ LiveBucketIndex::LiveBucketIndex(BucketManager const& bm, Archive& ar,
6887
std::streamoff pageSize)
6988

7089
: mDiskIndex(std::make_unique<DiskIndex<LiveBucket>>(ar, bm, pageSize))
90+
, mCacheHitMeter(bm.getCacheHitMeter())
91+
, mCacheMissMeter(bm.getCacheMissMeter())
7192
{
7293
// Only disk indexes are serialized
7394
releaseAssertOrThrow(pageSize != 0);
@@ -108,11 +129,39 @@ LiveBucketIndex::markBloomMiss() const
108129
mDiskIndex->markBloomMiss();
109130
}
110131

132+
std::shared_ptr<BucketEntry const>
133+
LiveBucketIndex::getCachedEntry(LedgerKey const& k) const
134+
{
135+
if (shouldUseCache())
136+
{
137+
std::shared_lock<std::shared_mutex> lock(mCacheMutex);
138+
auto cachePtr = mCache->maybeGet(k);
139+
if (cachePtr)
140+
{
141+
mCacheHitMeter.Mark();
142+
return *cachePtr;
143+
}
144+
145+
// In the case of a bloom filter false positive, we might have a cache
146+
// "miss" because we're searching for something that doesn't exist. We
147+
// don't cache non-existent entries, so we don't meter misses here.
148+
// Instead, we track misses when we insert a new entry, since we always
149+
// insert a new entry into the cache after a miss.
150+
}
151+
152+
return nullptr;
153+
}
154+
111155
IndexReturnT
112156
LiveBucketIndex::lookup(LedgerKey const& k) const
113157
{
114158
if (mDiskIndex)
115159
{
160+
if (auto cached = getCachedEntry(k); cached)
161+
{
162+
return IndexReturnT(cached);
163+
}
164+
116165
return mDiskIndex->scan(mDiskIndex->begin(), k).first;
117166
}
118167
else
@@ -127,6 +176,11 @@ LiveBucketIndex::scan(IterT start, LedgerKey const& k) const
127176
{
128177
if (mDiskIndex)
129178
{
179+
if (auto cached = getCachedEntry(k); cached)
180+
{
181+
return {IndexReturnT(cached), start};
182+
}
183+
130184
return mDiskIndex->scan(getDiskIter(start), k);
131185
}
132186

@@ -207,6 +261,24 @@ LiveBucketIndex::getBucketEntryCounters() const
207261
return mInMemoryIndex->getBucketEntryCounters();
208262
}
209263

264+
void
265+
LiveBucketIndex::maybeAddToCache(
266+
std::shared_ptr<BucketEntry const> const& entry) const
267+
{
268+
if (shouldUseCache())
269+
{
270+
releaseAssertOrThrow(entry);
271+
auto k = getBucketLedgerKey(*entry);
272+
273+
// If we are adding an entry to the cache, we must have missed it
274+
// earlier.
275+
mCacheMissMeter.Mark();
276+
277+
std::unique_lock<std::shared_mutex> lock(mCacheMutex);
278+
mCache->put(k, entry);
279+
}
280+
}
281+
210282
#ifdef BUILD_TESTS
211283
bool
212284
LiveBucketIndex::operator==(LiveBucketIndex const& in) const

‎src/bucket/LiveBucketIndex.h

+28
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@
99
#include "bucket/InMemoryIndex.h"
1010
#include "bucket/LedgerCmp.h"
1111
#include "bucket/LiveBucket.h"
12+
#include "ledger/LedgerHashUtils.h" // IWYU pragma: keep
1213
#include "util/NonCopyable.h"
14+
#include "util/RandomEvictionCache.h"
1315
#include "util/XDROperators.h" // IWYU pragma: keep
1416
#include "xdr/Stellar-ledger-entries.h"
1517
#include <filesystem>
1618
#include <optional>
1719

1820
#include <cereal/archives/binary.hpp>
21+
#include <shared_mutex>
1922

2023
namespace asio
2124
{
@@ -52,9 +55,21 @@ class LiveBucketIndex : public NonMovableOrCopyable
5255
using IterT =
5356
std::variant<InMemoryIndex::IterT, DiskIndex<LiveBucket>::IterT>;
5457

58+
using CacheT =
59+
RandomEvictionCache<LedgerKey, std::shared_ptr<BucketEntry const>>;
60+
5561
private:
5662
std::unique_ptr<DiskIndex<LiveBucket> const> mDiskIndex;
5763
std::unique_ptr<InMemoryIndex const> mInMemoryIndex;
64+
std::unique_ptr<CacheT> mCache;
65+
66+
// The indexes themselves are thread safe, as they are immutable after
67+
// construction. The cache is not, all accesses must first acquire this
68+
// mutex.
69+
mutable std::shared_mutex mCacheMutex;
70+
71+
medida::Meter& mCacheHitMeter;
72+
medida::Meter& mCacheMissMeter;
5873

5974
static inline DiskIndex<LiveBucket>::IterT
6075
getDiskIter(IterT const& iter)
@@ -68,6 +83,15 @@ class LiveBucketIndex : public NonMovableOrCopyable
6883
return std::get<InMemoryIndex::IterT>(iter);
6984
}
7085

86+
bool
87+
shouldUseCache() const
88+
{
89+
return mDiskIndex && mCache;
90+
}
91+
92+
// Returns nullptr if cache is not enabled or entry not found
93+
std::shared_ptr<BucketEntry const> getCachedEntry(LedgerKey const& k) const;
94+
7195
public:
7296
inline static const std::string DB_BACKEND_STATE = "bl";
7397
inline static const uint32_t BUCKET_INDEX_VERSION = 5;
@@ -97,6 +121,8 @@ class LiveBucketIndex : public NonMovableOrCopyable
97121
std::optional<std::pair<std::streamoff, std::streamoff>>
98122
getOfferRange() const;
99123

124+
void maybeAddToCache(std::shared_ptr<BucketEntry const> const& entry) const;
125+
100126
BucketEntryCounters const& getBucketEntryCounters() const;
101127
uint32_t getPageSize() const;
102128

@@ -105,6 +131,8 @@ class LiveBucketIndex : public NonMovableOrCopyable
105131
void markBloomMiss() const;
106132
#ifdef BUILD_TESTS
107133
bool operator==(LiveBucketIndex const& in) const;
134+
135+
void clearCache() const;
108136
#endif
109137
};
110138
}

‎src/bucket/readme.md

+5
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,8 @@ lookup speed and memory overhead. The following configuration flags control thes
9797
on startup. Defaults to true, should only be set to false for testing purposes.
9898
Validators do not currently support persisted indexes. If NODE_IS_VALIDATOR=true,
9999
this value is ignored and indexes are never persisted.
100+
- `BUCKETLIST_DB_CACHED_PERCENT`
101+
- Percentage of entries cached by BucketListDB when Bucket size is larger
102+
than `BUCKETLIST_DB_INDEX_CUTOFF`. Note that this value does not impact
103+
Buckets smaller than `BUCKETLIST_DB_INDEX_CUTOFF`, as they are always
104+
completely held in memory.

0 commit comments

Comments
 (0)
Please sign in to comment.