From 74972a0788514d5178f44001ee794ea9f47d2dc7 Mon Sep 17 00:00:00 2001 From: Hanqing Wu Date: Tue, 31 Oct 2023 19:27:20 +0800 Subject: [PATCH] Fix metaserver deadlock caused by bthread coroutine switching Signed-off-by: Hanqing Wu --- curvefs/src/metaserver/partition.cpp | 14 ++++++- curvefs/src/metaserver/partition.h | 2 +- src/common/concurrent/rw_lock.h | 58 +++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/curvefs/src/metaserver/partition.cpp b/curvefs/src/metaserver/partition.cpp index 3c3e7d7b99..08c739e876 100644 --- a/curvefs/src/metaserver/partition.cpp +++ b/curvefs/src/metaserver/partition.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "curvefs/proto/metaserver.pb.h" #include "curvefs/src/metaserver/copyset/copyset_node_manager.h" @@ -537,8 +538,17 @@ MetaStatusCode Partition::GetAllBlockGroup( } void Partition::StartS3Compact() { - S3CompactManager::GetInstance().Register( - S3Compact{inodeManager_, partitionInfo_}); + // register s3 compaction task in a separate thread, since the caller may + // holds a pthread wrlock when calling this function, and create `S3Compact` + // will acquire a bthread rwlock, may cause thread switching, thus causing a + // deadlock. + // FIXME(wuhanqing): handle it in a more elegant way + auto handle = std::async(std::launch::async, [this]() { + S3CompactManager::GetInstance().Register( + S3Compact{inodeManager_, partitionInfo_}); + }); + + handle.wait(); } void Partition::CancelS3Compact() { diff --git a/curvefs/src/metaserver/partition.h b/curvefs/src/metaserver/partition.h index c78738cb85..f11ccb184b 100644 --- a/curvefs/src/metaserver/partition.h +++ b/curvefs/src/metaserver/partition.h @@ -54,7 +54,7 @@ constexpr uint64_t kMinPartitionStartId = ROOTINODEID + 2; class Partition { public: Partition(PartitionInfo partition, std::shared_ptr kvStorage, - bool startCompact = true, bool startVolumeDeallocate = true); + bool startCompact = true, bool startVolumeDeallocate = false); Partition() = default; // dentry diff --git a/src/common/concurrent/rw_lock.h b/src/common/concurrent/rw_lock.h index d7c47c7d3c..9eef924223 100644 --- a/src/common/concurrent/rw_lock.h +++ b/src/common/concurrent/rw_lock.h @@ -23,13 +23,31 @@ #ifndef SRC_COMMON_CONCURRENT_RW_LOCK_H_ #define SRC_COMMON_CONCURRENT_RW_LOCK_H_ -#include #include -#include #include +#include +#include +#include // gettid +#include "include/curve_compiler_specific.h" #include "src/common/uncopyable.h" +// Due to the mixed use of bthread and pthread in some cases, acquiring another +// bthread lock(mutex/rwlock) after acquiring a write lock on a pthread rwlock +// may result in switching the bthread coroutine, and then the operation of +// releasing the previous write lock in the other pthread will not take effect +// (implying that the write lock is still held), thus causing a deadlock. + +// Check pthread rwlock tid between wrlock and unlock +#if defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) && \ + (ENABLE_CHECK_PTHREAD_WRLOCK_TID == 1) +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1 +#elif !defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1 +#else +#define CURVE_CHECK_PTHREAD_WRLOCK_TID 0 +#endif + namespace curve { namespace common { @@ -51,10 +69,21 @@ class PthreadRWLockBase : public RWLockBase { void WRLock() override { int ret = pthread_rwlock_wrlock(&rwlock_); CHECK(0 == ret) << "wlock failed: " << ret << ", " << strerror(ret); +#if CURVE_CHECK_PTHREAD_WRLOCK_TID + tid_ = gettid(); +#endif } int TryWRLock() override { - return pthread_rwlock_trywrlock(&rwlock_); + int ret = pthread_rwlock_trywrlock(&rwlock_); + if (CURVE_UNLIKELY(ret != 0)) { + return ret; + } + +#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID + tid_ = gettid(); +#endif + return 0; } void RDLock() override { @@ -67,6 +96,19 @@ class PthreadRWLockBase : public RWLockBase { } void Unlock() override { +#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID + if (tid_ != 0) { + const pid_t current = gettid(); + // If CHECK here is triggered, please look at the comments at the + // beginning of the file. + // In the meantime, the simplest solution might be to use + // `BthreadRWLock` locks everywhere. + CHECK(tid_ == current) + << ", tid has changed, previous tid: " << tid_ + << ", current tid: " << current; + tid_ = 0; + } +#endif pthread_rwlock_unlock(&rwlock_); } @@ -76,8 +118,14 @@ class PthreadRWLockBase : public RWLockBase { pthread_rwlock_t rwlock_; pthread_rwlockattr_t rwlockAttr_; + +#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID + pid_t tid_ = 0; +#endif }; +#undef CURVE_CHECK_PTHREAD_WRLOCK_TID + class RWLock : public PthreadRWLockBase { public: RWLock() { @@ -122,7 +170,7 @@ class BthreadRWLock : public RWLockBase { } int TryWRLock() override { - // not support yet + LOG(WARNING) << "TryWRLock not support yet"; return EINVAL; } @@ -132,7 +180,7 @@ class BthreadRWLock : public RWLockBase { } int TryRDLock() override { - // not support yet + LOG(WARNING) << "TryRDLock not support yet"; return EINVAL; }