Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BucketListDB Random Eviction Cache #4632

Merged
merged 2 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ bucketlistDB-live.bulk.inflationWinners | timer | time to load inflation
bucketlistDB-live.bulk.poolshareTrustlines | timer | time to load poolshare trustlines by accountID and assetID
bucketlistDB-live.bulk.prefetch | timer | time to prefetch
bucketlistDB-<X>.point.<y> | timer | time to load single entry of type <Y> on BucketList <X> (live/hotArchive)
bucketlistDB-cache.hit | meter | number of cache hits on Live BucketList Disk random eviction cache
bucketlistDB-cache.miss | meter | number of cache misses on Live BucketList Disk random eviction cache
crypto.verify.hit | meter | number of signature cache hits
crypto.verify.miss | meter | number of signature cache misses
crypto.verify.total | meter | sum of both hits and misses
Expand Down
8 changes: 8 additions & 0 deletions docs/stellar-core_example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,14 @@ MAX_DEX_TX_OPERATIONS_IN_TX_SET = 0
# 0, indiviudal index is always used. Default page size 16 kb.
BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT = 14

# BUCKETLIST_DB_CACHED_PERCENT (Integer) default 25
# Percentage of entries cached by BucketListDB when Bucket size is larger
# than BUCKETLIST_DB_INDEX_CUTOFF. Note that this value does not impact
# Buckets smaller than BUCKETLIST_DB_INDEX_CUTOFF, as they are always
# completely held in memory. Roughly speaking, RAM usage for BucketList
# cache == BucketListSize * (BUCKETLIST_DB_CACHED_PERCENT / 100).
BUCKETLIST_DB_CACHED_PERCENT = 25

# BUCKETLIST_DB_INDEX_CUTOFF (Integer) default 250
# Size, in MB, determining whether a bucket should have an individual
# key index or a key range index. If bucket size is below this value, range
Expand Down
3 changes: 2 additions & 1 deletion src/bucket/BucketIndexUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ namespace stellar
std::streamoff
getPageSizeFromConfig(Config const& cfg)
{
if (cfg.BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT == 0)
if (cfg.BUCKETLIST_DB_INDEX_PAGE_SIZE_EXPONENT == 0 ||
cfg.BUCKETLIST_DB_CACHED_PERCENT == 100)
{
return 0;
}
Expand Down
16 changes: 16 additions & 0 deletions src/bucket/BucketManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ BucketManager::BucketManager(Application& app)
app.getMetrics().NewCounter({"bucketlist", "size", "bytes"}))
, mArchiveBucketListSizeCounter(
app.getMetrics().NewCounter({"bucketlist-archive", "size", "bytes"}))
, mCacheHitMeter(app.getMetrics().NewMeter({"bucketlistDB", "cache", "hit"},
"bucketlistDB"))
, mCacheMissMeter(app.getMetrics().NewMeter(
{"bucketlistDB", "cache", "miss"}, "bucketlistDB"))
, mBucketListEvictionCounters(app)
, mEvictionStatistics(std::make_shared<EvictionStatistics>())
, mConfig(app.getConfig())
Expand Down Expand Up @@ -351,6 +355,18 @@ BucketManager::readMergeCounters()
return mMergeCounters;
}

medida::Meter&
BucketManager::getCacheHitMeter() const
{
return mCacheHitMeter;
}

medida::Meter&
BucketManager::getCacheMissMeter() const
{
return mCacheMissMeter;
}

void
BucketManager::incrMergeCounters(MergeCounters const& delta)
{
Expand Down
4 changes: 4 additions & 0 deletions src/bucket/BucketManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class BucketManager : NonMovableOrCopyable
medida::Counter& mSharedBucketsSize;
medida::Counter& mLiveBucketListSizeCounter;
medida::Counter& mArchiveBucketListSizeCounter;
medida::Meter& mCacheHitMeter;
medida::Meter& mCacheMissMeter;
EvictionCounters mBucketListEvictionCounters;
MergeCounters mMergeCounters;
std::shared_ptr<EvictionStatistics> mEvictionStatistics{};
Expand Down Expand Up @@ -197,6 +199,8 @@ class BucketManager : NonMovableOrCopyable

template <class BucketT> medida::Meter& getBloomMissMeter() const;
template <class BucketT> medida::Meter& getBloomLookupMeter() const;
medida::Meter& getCacheHitMeter() const;
medida::Meter& getCacheMissMeter() const;

// Reading and writing the merge counters is done in bulk, and takes a lock
// briefly; this can be done from any thread.
Expand Down
10 changes: 7 additions & 3 deletions src/bucket/BucketSnapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ BucketSnapshotBase<BucketT>::isEmpty() const
}

template <class BucketT>
std::pair<std::shared_ptr<typename BucketT::EntryT>, bool>
std::pair<std::shared_ptr<typename BucketT::EntryT const>, bool>
BucketSnapshotBase<BucketT>::getEntryAtOffset(LedgerKey const& k,
std::streamoff pos,
size_t pageSize) const
Expand All @@ -58,12 +58,16 @@ BucketSnapshotBase<BucketT>::getEntryAtOffset(LedgerKey const& k,
{
if (stream.readOne(be))
{
return {std::make_shared<typename BucketT::EntryT>(be), false};
auto entry = std::make_shared<typename BucketT::EntryT const>(be);
mBucket->getIndex().maybeAddToCache(entry);
return {entry, false};
}
}
else if (stream.readPage(be, k, pageSize))
{
return {std::make_shared<typename BucketT::EntryT>(be), false};
auto entry = std::make_shared<typename BucketT::EntryT const>(be);
mBucket->getIndex().maybeAddToCache(entry);
return {entry, false};
}

mBucket->getIndex().markBloomMiss();
Expand Down
2 changes: 1 addition & 1 deletion src/bucket/BucketSnapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ template <class BucketT> class BucketSnapshotBase : public NonMovable
// reads until key is found or the end of the page. Returns <BucketEntry,
// bloomMiss>, where bloomMiss is true if a bloomMiss occurred during the
// load.
std::pair<std::shared_ptr<typename BucketT::EntryT>, bool>
std::pair<std::shared_ptr<typename BucketT::EntryT const>, bool>
getEntryAtOffset(LedgerKey const& k, std::streamoff pos,
size_t pageSize) const;

Expand Down
11 changes: 11 additions & 0 deletions src/bucket/BucketUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,17 @@ BucketEntryCounters::operator!=(BucketEntryCounters const& other) const
return !(*this == other);
}

size_t
BucketEntryCounters::numEntries() const
{
size_t num = 0;
for (auto const& [_, count] : entryTypeCounts)
{
num += count;
}
return num;
}

template void
BucketEntryCounters::count<LiveBucket>(LiveBucket::EntryT const& be);
template void BucketEntryCounters::count<HotArchiveBucket>(
Expand Down
1 change: 1 addition & 0 deletions src/bucket/BucketUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ struct BucketEntryCounters
BucketEntryCounters& operator+=(BucketEntryCounters const& other);
bool operator==(BucketEntryCounters const& other) const;
bool operator!=(BucketEntryCounters const& other) const;
size_t numEntries() const;

template <class Archive>
void
Expand Down
8 changes: 8 additions & 0 deletions src/bucket/HotArchiveBucketIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ class HotArchiveBucketIndex : public NonMovableOrCopyable
return mDiskIndex.scan(mDiskIndex.begin(), k).first;
}

// Hot Archive does not support the cache, so define empty function for
// consistency with LiveBucketIndex
void
maybeAddToCache(
std::shared_ptr<HotArchiveBucketEntry const> const& entry) const
{
}

std::pair<IndexReturnT, IterT> scan(IterT start, LedgerKey const& k) const;

BucketEntryCounters const&
Expand Down
72 changes: 72 additions & 0 deletions src/bucket/LiveBucketIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "util/Logging.h"
#include "xdr/Stellar-ledger-entries.h"
#include <ios>
#include <medida/meter.h>
#include <shared_mutex>
#include <vector>

namespace stellar
Expand Down Expand Up @@ -38,6 +40,8 @@ LiveBucketIndex::getPageSize(Config const& cfg, size_t bucketSize)
LiveBucketIndex::LiveBucketIndex(BucketManager& bm,
std::filesystem::path const& filename,
Hash const& hash, asio::io_context& ctx)
: mCacheHitMeter(bm.getCacheHitMeter())
, mCacheMissMeter(bm.getCacheMissMeter())
{
ZoneScoped;
releaseAssert(!filename.empty());
Expand All @@ -60,6 +64,21 @@ LiveBucketIndex::LiveBucketIndex(BucketManager& bm,
pageSize, filename);
mDiskIndex = std::make_unique<DiskIndex<LiveBucket>>(
bm, filename, pageSize, hash, ctx);

auto percentCached = bm.getConfig().BUCKETLIST_DB_CACHED_PERCENT;
if (percentCached > 0)
{
auto const& counters = mDiskIndex->getBucketEntryCounters();
auto cacheSize = (counters.numEntries() * percentCached) / 100;

// Minimum cache size of 100 if we are going to cache a non-zero
// number of entries
// We don't want to reserve here, since caches only live as long as
// the lifetime of the Bucket and fill relatively slowly
mCache = std::make_unique<CacheT>(std::max<size_t>(cacheSize, 100),
/*separatePRNG=*/false,
/*reserve=*/false);
}
}
}

Expand All @@ -68,6 +87,8 @@ LiveBucketIndex::LiveBucketIndex(BucketManager const& bm, Archive& ar,
std::streamoff pageSize)

: mDiskIndex(std::make_unique<DiskIndex<LiveBucket>>(ar, bm, pageSize))
, mCacheHitMeter(bm.getCacheHitMeter())
, mCacheMissMeter(bm.getCacheMissMeter())
{
// Only disk indexes are serialized
releaseAssertOrThrow(pageSize != 0);
Expand Down Expand Up @@ -108,11 +129,39 @@ LiveBucketIndex::markBloomMiss() const
mDiskIndex->markBloomMiss();
}

std::shared_ptr<BucketEntry const>
LiveBucketIndex::getCachedEntry(LedgerKey const& k) const
{
if (shouldUseCache())
{
std::shared_lock<std::shared_mutex> lock(mCacheMutex);
auto cachePtr = mCache->maybeGet(k);
if (cachePtr)
{
mCacheHitMeter.Mark();
return *cachePtr;
}

// In the case of a bloom filter false positive, we might have a cache
// "miss" because we're searching for something that doesn't exist. We
// don't cache non-existent entries, so we don't meter misses here.
// Instead, we track misses when we insert a new entry, since we always
// insert a new entry into the cache after a miss.
}

return nullptr;
}

IndexReturnT
LiveBucketIndex::lookup(LedgerKey const& k) const
{
if (mDiskIndex)
{
if (auto cached = getCachedEntry(k); cached)
{
return IndexReturnT(cached);
}

return mDiskIndex->scan(mDiskIndex->begin(), k).first;
}
else
Expand All @@ -127,6 +176,11 @@ LiveBucketIndex::scan(IterT start, LedgerKey const& k) const
{
if (mDiskIndex)
{
if (auto cached = getCachedEntry(k); cached)
{
return {IndexReturnT(cached), start};
}

return mDiskIndex->scan(getDiskIter(start), k);
}

Expand Down Expand Up @@ -207,6 +261,24 @@ LiveBucketIndex::getBucketEntryCounters() const
return mInMemoryIndex->getBucketEntryCounters();
}

void
LiveBucketIndex::maybeAddToCache(
std::shared_ptr<BucketEntry const> const& entry) const
{
if (shouldUseCache())
{
releaseAssertOrThrow(entry);
auto k = getBucketLedgerKey(*entry);

// If we are adding an entry to the cache, we must have missed it
// earlier.
mCacheMissMeter.Mark();

std::unique_lock<std::shared_mutex> lock(mCacheMutex);
mCache->put(k, entry);
}
}

#ifdef BUILD_TESTS
bool
LiveBucketIndex::operator==(LiveBucketIndex const& in) const
Expand Down
28 changes: 28 additions & 0 deletions src/bucket/LiveBucketIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
#include "bucket/InMemoryIndex.h"
#include "bucket/LedgerCmp.h"
#include "bucket/LiveBucket.h"
#include "ledger/LedgerHashUtils.h" // IWYU pragma: keep
#include "util/NonCopyable.h"
#include "util/RandomEvictionCache.h"
#include "util/XDROperators.h" // IWYU pragma: keep
#include "xdr/Stellar-ledger-entries.h"
#include <filesystem>
#include <optional>

#include <cereal/archives/binary.hpp>
#include <shared_mutex>

namespace asio
{
Expand Down Expand Up @@ -52,9 +55,21 @@ class LiveBucketIndex : public NonMovableOrCopyable
using IterT =
std::variant<InMemoryIndex::IterT, DiskIndex<LiveBucket>::IterT>;

using CacheT =
RandomEvictionCache<LedgerKey, std::shared_ptr<BucketEntry const>>;

private:
std::unique_ptr<DiskIndex<LiveBucket> const> mDiskIndex;
std::unique_ptr<InMemoryIndex const> mInMemoryIndex;
std::unique_ptr<CacheT> mCache;

// The indexes themselves are thread safe, as they are immutable after
// construction. The cache is not, all accesses must first acquire this
// mutex.
mutable std::shared_mutex mCacheMutex;

medida::Meter& mCacheHitMeter;
medida::Meter& mCacheMissMeter;

static inline DiskIndex<LiveBucket>::IterT
getDiskIter(IterT const& iter)
Expand All @@ -68,6 +83,15 @@ class LiveBucketIndex : public NonMovableOrCopyable
return std::get<InMemoryIndex::IterT>(iter);
}

bool
shouldUseCache() const
{
return mDiskIndex && mCache;
}

// Returns nullptr if cache is not enabled or entry not found
std::shared_ptr<BucketEntry const> getCachedEntry(LedgerKey const& k) const;

public:
inline static const std::string DB_BACKEND_STATE = "bl";
inline static const uint32_t BUCKET_INDEX_VERSION = 5;
Expand Down Expand Up @@ -97,6 +121,8 @@ class LiveBucketIndex : public NonMovableOrCopyable
std::optional<std::pair<std::streamoff, std::streamoff>>
getOfferRange() const;

void maybeAddToCache(std::shared_ptr<BucketEntry const> const& entry) const;

BucketEntryCounters const& getBucketEntryCounters() const;
uint32_t getPageSize() const;

Expand All @@ -105,6 +131,8 @@ class LiveBucketIndex : public NonMovableOrCopyable
void markBloomMiss() const;
#ifdef BUILD_TESTS
bool operator==(LiveBucketIndex const& in) const;

void clearCache() const;
#endif
};
}
5 changes: 5 additions & 0 deletions src/bucket/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ lookup speed and memory overhead. The following configuration flags control thes
on startup. Defaults to true, should only be set to false for testing purposes.
Validators do not currently support persisted indexes. If NODE_IS_VALIDATOR=true,
this value is ignored and indexes are never persisted.
- `BUCKETLIST_DB_CACHED_PERCENT`
- Percentage of entries cached by BucketListDB when Bucket size is larger
than `BUCKETLIST_DB_INDEX_CUTOFF`. Note that this value does not impact
Buckets smaller than `BUCKETLIST_DB_INDEX_CUTOFF`, as they are always
completely held in memory.
Loading
Loading