Skip to content

Commit 9dce230

Browse files
committed
Implement parallel ledger close, off by default
1 parent a757b61 commit 9dce230

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1133
-621
lines changed

src/bucket/BucketListBase.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ template <typename BucketT>
5757
void
5858
BucketLevel<BucketT>::setNext(FutureBucket<BucketT> const& fb)
5959
{
60-
releaseAssert(threadIsMain());
6160
mNextCurr = fb;
6261
}
6362

@@ -79,7 +78,6 @@ template <typename BucketT>
7978
void
8079
BucketLevel<BucketT>::setCurr(std::shared_ptr<BucketT> b)
8180
{
82-
releaseAssert(threadIsMain());
8381
mNextCurr.clear();
8482
mCurr = b;
8583
}

src/bucket/BucketListSnapshotBase.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ BucketListSnapshot<BucketT>::BucketListSnapshot(
1919
BucketListBase<BucketT> const& bl, LedgerHeader header)
2020
: mHeader(std::move(header))
2121
{
22-
releaseAssert(threadIsMain());
23-
2422
for (uint32_t i = 0; i < BucketListBase<BucketT>::kNumLevels; ++i)
2523
{
2624
auto const& level = bl.getLevel(i);

src/bucket/BucketManager.cpp

+36-38
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void
6262
BucketManager::initialize()
6363
{
6464
ZoneScoped;
65+
releaseAssert(threadIsMain());
6566
std::string d = mConfig.BUCKET_DIR_PATH;
6667

6768
if (!fs::exists(d))
@@ -729,7 +730,7 @@ BucketManager::getBucketListReferencedBuckets() const
729730
}
730731

731732
std::set<Hash>
732-
BucketManager::getAllReferencedBuckets() const
733+
BucketManager::getAllReferencedBuckets(HistoryArchiveState const& has) const
733734
{
734735
ZoneScoped;
735736
auto referenced = getBucketListReferencedBuckets();
@@ -740,8 +741,7 @@ BucketManager::getAllReferencedBuckets() const
740741

741742
// retain any bucket referenced by the last closed ledger as recorded in the
742743
// database (as merges complete, the bucket list drifts from that state)
743-
auto lclHas = mApp.getLedgerManager().getLastClosedLedgerHAS();
744-
auto lclBuckets = lclHas.allBuckets();
744+
auto lclBuckets = has.allBuckets();
745745
for (auto const& h : lclBuckets)
746746
{
747747
auto rit = referenced.emplace(hexToBin256(h));
@@ -752,39 +752,38 @@ BucketManager::getAllReferencedBuckets() const
752752
}
753753

754754
// retain buckets that are referenced by a state in the publish queue.
755-
auto pub = mApp.getHistoryManager().getBucketsReferencedByPublishQueue();
755+
for (auto const& h :
756+
HistoryManager::getBucketsReferencedByPublishQueue(mApp.getConfig()))
756757
{
757-
for (auto const& h : pub)
758+
auto rhash = hexToBin256(h);
759+
auto rit = referenced.emplace(rhash);
760+
if (rit.second)
758761
{
759-
auto rhash = hexToBin256(h);
760-
auto rit = referenced.emplace(rhash);
761-
if (rit.second)
762-
{
763-
CLOG_TRACE(Bucket, "{} referenced by publish queue", h);
764-
765-
// Project referenced bucket `rhash` -- which might be a merge
766-
// input captured before a merge finished -- through our weak
767-
// map of merge input/output relationships, to find any outputs
768-
// we'll want to retain in order to resynthesize the merge in
769-
// the future, rather than re-run it.
770-
mFinishedMerges.getOutputsUsingInput(rhash, referenced);
771-
}
762+
CLOG_TRACE(Bucket, "{} referenced by publish queue", h);
763+
764+
// Project referenced bucket `rhash` -- which might be a merge
765+
// input captured before a merge finished -- through our weak
766+
// map of merge input/output relationships, to find any outputs
767+
// we'll want to retain in order to resynthesize the merge in
768+
// the future, rather than re-run it.
769+
mFinishedMerges.getOutputsUsingInput(rhash, referenced);
772770
}
773771
}
774772
return referenced;
775773
}
776774

777775
void
778-
BucketManager::cleanupStaleFiles()
776+
BucketManager::cleanupStaleFiles(HistoryArchiveState const& has)
779777
{
780778
ZoneScoped;
779+
releaseAssert(threadIsMain());
781780
if (mConfig.DISABLE_BUCKET_GC)
782781
{
783782
return;
784783
}
785784

786785
std::lock_guard<std::recursive_mutex> lock(mBucketMutex);
787-
auto referenced = getAllReferencedBuckets();
786+
auto referenced = getAllReferencedBuckets(has);
788787
std::transform(std::begin(mSharedLiveBuckets), std::end(mSharedLiveBuckets),
789788
std::inserter(referenced, std::end(referenced)),
790789
[](std::pair<Hash, std::shared_ptr<LiveBucket>> const& p) {
@@ -818,11 +817,11 @@ BucketManager::cleanupStaleFiles()
818817
}
819818

820819
void
821-
BucketManager::forgetUnreferencedBuckets()
820+
BucketManager::forgetUnreferencedBuckets(HistoryArchiveState const& has)
822821
{
823822
ZoneScoped;
824823
std::lock_guard<std::recursive_mutex> lock(mBucketMutex);
825-
auto referenced = getAllReferencedBuckets();
824+
auto referenced = getAllReferencedBuckets(has);
826825
auto blReferenced = getBucketListReferencedBuckets();
827826

828827
auto bucketMapLoop = [&](auto& bucketMap, auto& futureMap) {
@@ -867,7 +866,7 @@ BucketManager::forgetUnreferencedBuckets()
867866
Bucket,
868867
"BucketManager::forgetUnreferencedBuckets dropping {}",
869868
filename);
870-
if (!filename.empty() && !mApp.getConfig().DISABLE_BUCKET_GC)
869+
if (!filename.empty() && !mConfig.DISABLE_BUCKET_GC)
871870
{
872871
CLOG_TRACE(Bucket, "removing bucket file: {}", filename);
873872
std::filesystem::remove(filename);
@@ -1048,15 +1047,15 @@ BucketManager::maybeSetIndex(std::shared_ptr<BucketBase> b,
10481047
}
10491048

10501049
void
1051-
BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq)
1050+
BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq,
1051+
SorobanNetworkConfig const& cfg)
10521052
{
10531053
releaseAssert(mSnapshotManager);
10541054
releaseAssert(!mEvictionFuture.valid());
10551055
releaseAssert(mEvictionStatistics);
10561056

10571057
auto searchableBL =
10581058
mSnapshotManager->copySearchableLiveBucketListSnapshot();
1059-
auto const& cfg = mApp.getLedgerManager().getSorobanNetworkConfigForApply();
10601059
auto const& sas = cfg.stateArchivalSettings();
10611060

10621061
using task_t = std::packaged_task<EvictionResult()>;
@@ -1076,30 +1075,26 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq)
10761075
}
10771076

10781077
void
1079-
BucketManager::resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx,
1080-
uint32_t ledgerSeq,
1081-
LedgerKeySet const& modifiedKeys)
1078+
BucketManager::resolveBackgroundEvictionScan(
1079+
AbstractLedgerTxn& ltx, uint32_t ledgerSeq,
1080+
LedgerKeySet const& modifiedKeys, SorobanNetworkConfig& networkConfig)
10821081
{
10831082
ZoneScoped;
1084-
releaseAssert(threadIsMain());
10851083
releaseAssert(mEvictionStatistics);
10861084

10871085
if (!mEvictionFuture.valid())
10881086
{
1089-
startBackgroundEvictionScan(ledgerSeq);
1087+
startBackgroundEvictionScan(ledgerSeq, networkConfig);
10901088
}
10911089

10921090
auto evictionCandidates = mEvictionFuture.get();
10931091

1094-
auto const& networkConfig =
1095-
mApp.getLedgerManager().getSorobanNetworkConfigForApply();
1096-
10971092
// If eviction related settings changed during the ledger, we have to
10981093
// restart the scan
10991094
if (!evictionCandidates.isValid(ledgerSeq,
11001095
networkConfig.stateArchivalSettings()))
11011096
{
1102-
startBackgroundEvictionScan(ledgerSeq);
1097+
startBackgroundEvictionScan(ledgerSeq, networkConfig);
11031098
evictionCandidates = mEvictionFuture.get();
11041099
}
11051100

@@ -1209,6 +1204,7 @@ BucketManager::assumeState(HistoryArchiveState const& has,
12091204
uint32_t maxProtocolVersion, bool restartMerges)
12101205
{
12111206
ZoneScoped;
1207+
releaseAssert(threadIsMain());
12121208
releaseAssertOrThrow(mConfig.MODE_ENABLES_BUCKETLIST);
12131209

12141210
// TODO: Assume archival bucket state
@@ -1257,7 +1253,7 @@ BucketManager::assumeState(HistoryArchiveState const& has,
12571253
mLiveBucketList->restartMerges(mApp, maxProtocolVersion,
12581254
has.currentLedger);
12591255
}
1260-
cleanupStaleFiles();
1256+
cleanupStaleFiles(has);
12611257
}
12621258

12631259
void
@@ -1358,7 +1354,7 @@ std::shared_ptr<LiveBucket>
13581354
BucketManager::mergeBuckets(HistoryArchiveState const& has)
13591355
{
13601356
ZoneScoped;
1361-
1357+
releaseAssert(threadIsMain());
13621358
std::map<LedgerKey, LedgerEntry> ledgerMap = loadCompleteLedgerState(has);
13631359
BucketMetadata meta;
13641360
MergeCounters mc;
@@ -1548,9 +1544,11 @@ BucketManager::visitLedgerEntries(
15481544
}
15491545

15501546
std::shared_ptr<BasicWork>
1551-
BucketManager::scheduleVerifyReferencedBucketsWork()
1547+
BucketManager::scheduleVerifyReferencedBucketsWork(
1548+
HistoryArchiveState const& has)
15521549
{
1553-
std::set<Hash> hashes = getAllReferencedBuckets();
1550+
releaseAssert(threadIsMain());
1551+
std::set<Hash> hashes = getAllReferencedBuckets(has);
15541552
std::vector<std::shared_ptr<BasicWork>> seq;
15551553
for (auto const& h : hashes)
15561554
{

src/bucket/BucketManager.h

+15-6
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ class BucketManager : NonMovableOrCopyable
7070

7171
static std::string const kLockFilename;
7272

73+
// NB: ideally, BucketManager should have no access to mApp, as it's too
74+
// dangerous in the context of parallel application. BucketManager is quite
75+
// bloated, with lots of legacy code, so to ensure safety, annotate all
76+
// functions using mApp with `releaseAssert(threadIsMain())` and avoid
77+
// accessing mApp in the background.
7378
Application& mApp;
7479
std::unique_ptr<LiveBucketList> mLiveBucketList;
7580
std::unique_ptr<HotArchiveBucketList> mHotArchiveBucketList;
@@ -124,7 +129,7 @@ class BucketManager : NonMovableOrCopyable
124129

125130
std::atomic<bool> mIsShutdown{false};
126131

127-
void cleanupStaleFiles();
132+
void cleanupStaleFiles(HistoryArchiveState const& has);
128133
void deleteTmpDirAndUnlockBucketDir();
129134
void deleteEntireBucketDir();
130135

@@ -260,7 +265,7 @@ class BucketManager : NonMovableOrCopyable
260265
// not immediately cause the buckets to delete themselves, if someone else
261266
// is using them via a shared_ptr<>, but the BucketManager will no longer
262267
// independently keep them alive.
263-
void forgetUnreferencedBuckets();
268+
void forgetUnreferencedBuckets(HistoryArchiveState const& has);
264269

265270
// Feed a new batch of entries to the bucket list. This interface expects to
266271
// be given separate init (created) and live (updated) entry vectors. The
@@ -290,10 +295,12 @@ class BucketManager : NonMovableOrCopyable
290295
// Scans BucketList for non-live entries to evict starting at the entry
291296
// pointed to by EvictionIterator. Evicts until `maxEntriesToEvict` entries
292297
// have been evicted or maxEvictionScanSize bytes have been scanned.
293-
void startBackgroundEvictionScan(uint32_t ledgerSeq);
298+
void startBackgroundEvictionScan(uint32_t ledgerSeq,
299+
SorobanNetworkConfig const& cfg);
294300
void resolveBackgroundEvictionScan(AbstractLedgerTxn& ltx,
295301
uint32_t ledgerSeq,
296-
LedgerKeySet const& modifiedKeys);
302+
LedgerKeySet const& modifiedKeys,
303+
SorobanNetworkConfig& networkConfig);
297304

298305
medida::Meter& getBloomMissMeter() const;
299306
medida::Meter& getBloomLookupMeter() const;
@@ -318,7 +325,8 @@ class BucketManager : NonMovableOrCopyable
318325

319326
// Return the set of buckets referenced by the BucketList, LCL HAS,
320327
// and publish queue.
321-
std::set<Hash> getAllReferencedBuckets() const;
328+
std::set<Hash>
329+
getAllReferencedBuckets(HistoryArchiveState const& has) const;
322330

323331
// Check for missing bucket files that would prevent `assumeState` from
324332
// succeeding
@@ -375,7 +383,8 @@ class BucketManager : NonMovableOrCopyable
375383

376384
// Schedule a Work class that verifies the hashes of all referenced buckets
377385
// on background threads.
378-
std::shared_ptr<BasicWork> scheduleVerifyReferencedBucketsWork();
386+
std::shared_ptr<BasicWork>
387+
scheduleVerifyReferencedBucketsWork(HistoryArchiveState const& has);
379388

380389
Config const& getConfig() const;
381390

src/bucket/BucketSnapshotManager.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ BucketSnapshotManager::recordBulkLoadMetrics(std::string const& label,
9898
{
9999
// For now, only keep metrics for the main thread. We can decide on what
100100
// metrics make sense when more background services are added later.
101-
releaseAssert(threadIsMain());
102101

103102
if (numEntries != 0)
104103
{
@@ -153,8 +152,6 @@ BucketSnapshotManager::updateCurrentSnapshot(
153152
SnapshotPtrT<LiveBucket>&& liveSnapshot,
154153
SnapshotPtrT<HotArchiveBucket>&& hotArchiveSnapshot)
155154
{
156-
releaseAssert(threadIsMain());
157-
158155
auto updateSnapshot = [numHistoricalSnapshots = mNumHistoricalSnapshots](
159156
auto& currentSnapshot, auto& historicalSnapshots,
160157
auto&& newSnapshot) {

src/bucket/LiveBucketList.h

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
namespace stellar
1111
{
12+
13+
class SorobanNetworkConfig;
14+
1215
// The LiveBucketList stores the current canonical state of the ledger. It is
1316
// made up of LiveBucket buckets, which in turn store individual entries of type
1417
// BucketEntry. When an entry is "evicted" from the ledger, it is removed from

src/bucket/SearchableBucketList.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ SearchableLiveBucketListSnapshot::loadPoolShareTrustLinesByAccountAndAsset(
108108
ZoneScoped;
109109

110110
// This query should only be called during TX apply
111-
releaseAssert(threadIsMain());
112111
releaseAssert(mSnapshot);
113112

114113
LedgerKeySet trustlinesToLoad;
@@ -153,7 +152,6 @@ SearchableLiveBucketListSnapshot::loadInflationWinners(size_t maxWinners,
153152

154153
// This is a legacy query, should only be called by main thread during
155154
// catchup
156-
releaseAssert(threadIsMain());
157155
auto timer = mSnapshotManager.recordBulkLoadMetrics("inflationWinners", 0)
158156
.TimeScope();
159157

src/bucket/test/BucketListTests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ TEST_CASE_VERSIONS("network config snapshots BucketList size", "[bucketlist]")
869869
LedgerManagerForBucketTests& lm = app->getLedgerManager();
870870

871871
auto& networkConfig =
872-
app->getLedgerManager().getSorobanNetworkConfigReadOnly();
872+
app->getLedgerManager().getMutableSorobanNetworkConfig();
873873

874874
uint32_t windowSize = networkConfig.stateArchivalSettings()
875875
.bucketListSizeWindowSampleSize;

0 commit comments

Comments
 (0)