Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

delete partition after fail #2788

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions curvefs/src/mds/heartbeat/heartbeat_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,15 @@ namespace curvefs {
namespace mds {
namespace heartbeat {
HeartbeatManager::HeartbeatManager(
const HeartbeatOption &option, const std::shared_ptr<Topology> &topology,
const std::shared_ptr<Coordinator> &coordinator,
const std::shared_ptr<SpaceManager> &spaceManager)
const HeartbeatOption& option, const std::shared_ptr<Topology>& topology,
const std::shared_ptr<Coordinator>& coordinator,
const std::shared_ptr<TopologyManager>& topologyManager,
const std::shared_ptr<SpaceManager>& spaceManager)
: topology_(topology), spaceManager_(spaceManager) {
healthyChecker_ =
std::make_shared<MetaserverHealthyChecker>(option, topology);

topoUpdater_ = std::make_shared<TopoUpdater>(topology);
topoUpdater_ = std::make_shared<TopoUpdater>(topology, topologyManager);

copysetConfGenerator_ = std::make_shared<CopysetConfGenerator>(
topology, coordinator, option.mdsStartTime,
Expand Down
9 changes: 5 additions & 4 deletions curvefs/src/mds/heartbeat/heartbeat_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ namespace heartbeat {

class HeartbeatManager {
public:
HeartbeatManager(const HeartbeatOption &option,
const std::shared_ptr<Topology> &topology,
const std::shared_ptr<Coordinator> &coordinator,
const std::shared_ptr<SpaceManager> &spaceManager);
HeartbeatManager(const HeartbeatOption& option,
const std::shared_ptr<Topology>& topology,
const std::shared_ptr<Coordinator>& coordinator,
const std::shared_ptr<TopologyManager>& topologyManager,
const std::shared_ptr<SpaceManager>& spaceManager);

~HeartbeatManager() { Stop(); }

Expand Down
24 changes: 24 additions & 0 deletions curvefs/src/mds/heartbeat/topo_updater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,30 @@ void TopoUpdater::UpdatePartitionTopo(
LOG(WARNING) << "hearbeat report partition which is not in topo"
<< ", copysetId = " << copySetId
<< ", partitionId = " << it.GetPartitionId();

const int maxRetries = 3;
int retries = 0;

// get copyset members
std::set<std::string> copysetMemberAddr;
TopoStatusCode ret;
do {
ret = topologyManager_->GetCopysetMembers(
it.GetPoolId(), copySetId, &copysetMemberAddr);
if (ret == TopoStatusCode::TOPO_OK) {
break;
}
++retries;
} while (retries < maxRetries);

if (ret != TopoStatusCode::TOPO_OK) {
LOG(ERROR) << "GetCopysetMembers failed, poolId = "
<< it.GetPoolId() << ", copysetId = " << copySetId;
} else {
topologyManager_->DeleteAbnormalPartition(
it.GetPoolId(), copySetId, it.GetPartitionId(),
copysetMemberAddr);
}
continue;
}

Expand Down
6 changes: 5 additions & 1 deletion curvefs/src/mds/heartbeat/topo_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ namespace heartbeat {
using curvefs::mds::topology::CopySetIdType;
class TopoUpdater {
public:
explicit TopoUpdater(const std::shared_ptr<Topology> &topo) : topo_(topo) {}
explicit TopoUpdater(
const std::shared_ptr<Topology>& topo,
const std::shared_ptr<TopologyManager>& topologyManager)
: topo_(topo), topologyManager_(topologyManager) {}
~TopoUpdater() {}

/*
Expand Down Expand Up @@ -65,6 +68,7 @@ class TopoUpdater {

private:
std::shared_ptr<Topology> topo_;
std::shared_ptr<TopologyManager> topologyManager_;
};
} // namespace heartbeat
} // namespace mds
Expand Down
3 changes: 2 additions & 1 deletion curvefs/src/mds/mds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,8 @@ void MDS::InitHeartbeatManager() {

heartbeatOption.mdsStartTime = steady_clock::now();
heartbeatManager_ = std::make_shared<HeartbeatManager>(
heartbeatOption, topology_, coordinator_, spaceManager_);
heartbeatOption, topology_, coordinator_, topologyManager_,
spaceManager_);
heartbeatManager_->Init();
}

Expand Down
16 changes: 16 additions & 0 deletions curvefs/src/mds/topology/topology_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,18 @@ TopoStatusCode TopologyManager::DeletePartition(uint32_t partitionId) {
return TopoStatusCode::TOPO_OK;
}

void TopologyManager::DeleteAbnormalPartition(
uint32_t poolId, uint32_t copysetId, uint32_t partitionId,
const std::set<std::string>& addrs) {
auto fret = metaserverClient_->DeletePartition(poolId, copysetId,
partitionId, addrs);
if (fret != FSStatusCode::OK) {
LOG(ERROR) << "Failed to delete partition. PoolId: " << poolId
<< ", CopysetId: " << copysetId
<< ", PartitionId: " << partitionId;
}
}

void TopologyManager::DeletePartition(const DeletePartitionRequest *request,
DeletePartitionResponse *response) {
uint32_t partitionId = request->partitionid();
Expand Down Expand Up @@ -1258,6 +1270,10 @@ void TopologyManager::GetTopology(ListTopologyResponse *response) {
ListMetaserverOfCluster(response->mutable_metaservers());
}

std::shared_ptr<MetaserverClient> TopologyManager::GetMetaserverClient() {
return metaserverclient_;
}

void TopologyManager::ListZone(ListZoneResponse *response) {
response->set_statuscode(TopoStatusCode::TOPO_OK);
auto zoneIdVec = topology_->GetZoneInCluster();
Expand Down
6 changes: 6 additions & 0 deletions curvefs/src/mds/topology/topology_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ class TopologyManager {
virtual void CreatePartitions(const CreatePartitionRequest *request,
CreatePartitionResponse *response);

virtual void DeleteAbnormalPartition(uint32_t poolId, uint32_t copysetId,
uint32_t partitionId,
const std::set<std::string>& addrs);

virtual void DeletePartition(const DeletePartitionRequest *request,
DeletePartitionResponse *response);

Expand Down Expand Up @@ -162,6 +166,8 @@ class TopologyManager {

virtual void GetTopology(ListTopologyResponse* response);

virtual std::shared_ptr<MetaserverClient> GetMetaserverClient();

virtual void ListZone(ListZoneResponse* response);

virtual void ListServer(ListServerResponse* response);
Expand Down
9 changes: 9 additions & 0 deletions curvefs/test/mds/heartbeat/topo_update_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ TEST_F(TestTopoUpdater, test_UpdatePartitionTopo_case4) {

EXPECT_CALL(*topology_, GetPartition(_, _)).WillOnce(Return(false));

std::set<std::string> copysetMemberAddr;
EXPECT_CALL(*topologyManager_, GetCopysetMembers(_, _, _))
.Times(AtMost(3))
.WillRepeatedly(DoAll(SetArgPointee<2>(copysetMemberAddr),
Return(TopoStatusCode::TOPO_OK)));

EXPECT_CALL(*topologyManager_, DeleteAbnormalPartition(_, _, _, _))
.Times(1);

std::list<::curvefs::mds::topology::Partition> partitionList;
partitionList.push_back(partition);

Expand Down