Skip to content

Commit

Permalink
Fix metaserver deadlock caused by bthread coroutine switching
Browse files Browse the repository at this point in the history
Signed-off-by: Hanqing Wu <[email protected]>
  • Loading branch information
wu-hanqing committed Oct 31, 2023
1 parent e1aa430 commit 4bd0b6a
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 4 deletions.
2 changes: 1 addition & 1 deletion curvefs/src/metaserver/metastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ class MetaStoreImpl : public MetaStore {
bool ClearInternal();

private:
RWLock rwLock_; // protect partitionMap_
curve::common::BthreadRWLock rwLock_; // protect partitionMap_
std::shared_ptr<KVStorage> kvStorage_;
std::map<uint32_t, std::shared_ptr<Partition>> partitionMap_;
std::list<uint32_t> partitionIds_;
Expand Down
52 changes: 49 additions & 3 deletions src/common/concurrent/rw_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,30 @@
#ifndef SRC_COMMON_CONCURRENT_RW_LOCK_H_
#define SRC_COMMON_CONCURRENT_RW_LOCK_H_

#include <pthread.h>
#include <assert.h>
#include <glog/logging.h>
#include <bthread/bthread.h>
#include <glog/logging.h>
#include <pthread.h>
#include <sys/types.h> // gettid

#include "include/curve_compiler_specific.h"
#include "src/common/uncopyable.h"

// Due to the mixed use of bthread and pthread in some cases, after acquiring a
// write lock on pthread rwlock, when the bthread coroutine is switched, the
// operation of releasing the write lock in the other pthread will not take
// effect (meaning that the write lock is still held), resulting in a deadlock.

// Check pthread rwlock tid between wrlock and unlock
#if defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID) && \
(ENABLE_CHECK_PTHREAD_WRLOCK_TID == 1)
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1
#elif !defined(ENABLE_CHECK_PTHREAD_WRLOCK_TID)
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 1
#else
#define CURVE_CHECK_PTHREAD_WRLOCK_TID 0
#endif

namespace curve {
namespace common {

Expand All @@ -51,10 +68,21 @@ class PthreadRWLockBase : public RWLockBase {
void WRLock() override {
int ret = pthread_rwlock_wrlock(&rwlock_);
CHECK(0 == ret) << "wlock failed: " << ret << ", " << strerror(ret);
#if CURVE_CHECK_PTHREAD_WRLOCK_TID
tid_ = gettid();
#endif
}

int TryWRLock() override {
return pthread_rwlock_trywrlock(&rwlock_);
int ret = pthread_rwlock_trywrlock(&rwlock_);
if (CURVE_UNLIKELY(ret != 0)) {
return ret;
}

#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID
tid_ = gettid();
#endif
return 0;
}

void RDLock() override {
Expand All @@ -67,6 +95,18 @@ class PthreadRWLockBase : public RWLockBase {
}

void Unlock() override {
#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID
if (tid_ != 0) {
const pid_t current = gettid();
// If CHECK here is triggered, please look at the comments at the
// beginning of the file.
// In the meantime, the simplest solution might be to use
// `BthreadRWLock` locks everywhere.
CHECK(tid_ == current) << "tid has changed, previous tid: " << tid_
<< ", current tid: " << current;
tid_ = 0;
}
#endif
pthread_rwlock_unlock(&rwlock_);
}

Expand All @@ -76,8 +116,14 @@ class PthreadRWLockBase : public RWLockBase {

pthread_rwlock_t rwlock_;
pthread_rwlockattr_t rwlockAttr_;

#ifdef CURVE_CHECK_PTHREAD_WRLOCK_TID
pid_t tid_ = 0;
#endif
};

#undef CURVE_CHECK_PTHREAD_WRLOCK_TID

class RWLock : public PthreadRWLockBase {
public:
RWLock() {
Expand Down

0 comments on commit 4bd0b6a

Please sign in to comment.