Skip to content

Commit

Permalink
delete partition after fail
Browse files Browse the repository at this point in the history
  • Loading branch information
yanchaomei committed Oct 9, 2023
1 parent 3aeaeb4 commit 7165c43
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 3 deletions.
3 changes: 2 additions & 1 deletion curvefs/src/mds/heartbeat/heartbeat_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ namespace heartbeat {
HeartbeatManager::HeartbeatManager(
const HeartbeatOption &option, const std::shared_ptr<Topology> &topology,
const std::shared_ptr<Coordinator> &coordinator,
const std::shared_ptr<TopologyManager> &topologyManager,
const std::shared_ptr<SpaceManager> &spaceManager)
: topology_(topology), spaceManager_(spaceManager) {
healthyChecker_ =
std::make_shared<MetaserverHealthyChecker>(option, topology);

topoUpdater_ = std::make_shared<TopoUpdater>(topology);
topoUpdater_ = std::make_shared<TopoUpdater>(topology, topologyManager);

copysetConfGenerator_ = std::make_shared<CopysetConfGenerator>(
topology, coordinator, option.mdsStartTime,
Expand Down
1 change: 1 addition & 0 deletions curvefs/src/mds/heartbeat/heartbeat_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class HeartbeatManager {
HeartbeatManager(const HeartbeatOption &option,
const std::shared_ptr<Topology> &topology,
const std::shared_ptr<Coordinator> &coordinator,
const std::shared_ptr<TopologyManager> &topologyManager,
const std::shared_ptr<SpaceManager> &spaceManager);

~HeartbeatManager() { Stop(); }
Expand Down
22 changes: 22 additions & 0 deletions curvefs/src/mds/heartbeat/topo_updater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,28 @@ void TopoUpdater::UpdatePartitionTopo(
LOG(WARNING) << "hearbeat report partition which is not in topo"
<< ", copysetId = " << copySetId
<< ", partitionId = " << it.GetPartitionId();

const int maxRetries = 3;
int retries = 0;

// get copyset members
std::set<std::string> copysetMemberAddr;
TopoStatusCode ret;
do {
ret = topologyManager_->GetCopysetMembers(it.GetPoolId(), copySetId, &copysetMemberAddr);
if (ret == TopoStatusCode::TOPO_OK) {
break;
}
++retries;
} while (retries < maxRetries);

if (ret != TopoStatusCode::TOPO_OK) {
LOG(ERROR) << "GetCopysetMembers failed, poolId = " << it.GetPoolId()
<< ", copysetId = " << copySetId;
}
else {
topologyManager_->DeleteAbnormalPartition(it.GetPoolId(), copySetId, it.GetPartitionId(), copysetMemberAddr);
}
continue;
}

Expand Down
4 changes: 3 additions & 1 deletion curvefs/src/mds/heartbeat/topo_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ namespace heartbeat {
using curvefs::mds::topology::CopySetIdType;
class TopoUpdater {
public:
explicit TopoUpdater(const std::shared_ptr<Topology> &topo) : topo_(topo) {}
explicit TopoUpdater(const std::shared_ptr<Topology> &topo,
const std::shared_ptr<TopologyManager> &topologyManager) : topo_(topo), topologyManager_(topologyManager){}
~TopoUpdater() {}

/*
Expand Down Expand Up @@ -65,6 +66,7 @@ class TopoUpdater {

private:
std::shared_ptr<Topology> topo_;
std::shared_ptr<TopologyManager> topologyManager_;
};
} // namespace heartbeat
} // namespace mds
Expand Down
2 changes: 1 addition & 1 deletion curvefs/src/mds/mds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ void MDS::InitHeartbeatManager() {

heartbeatOption.mdsStartTime = steady_clock::now();
heartbeatManager_ = std::make_shared<HeartbeatManager>(
heartbeatOption, topology_, coordinator_, spaceManager_);
heartbeatOption, topology_, coordinator_, topologyManager_, spaceManager_);
heartbeatManager_->Init();
}

Expand Down
14 changes: 14 additions & 0 deletions curvefs/src/mds/topology/topology_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,16 @@ TopoStatusCode TopologyManager::DeletePartition(uint32_t partitionId) {
return TopoStatusCode::TOPO_OK;
}

void TopologyManager::DeleteAbnormalPartition(uint32_t poolId, uint32_t copysetId, uint32_t partitionId,
const std::set<std::string> &addrs){
auto fret = metaserverClient_->DeletePartition(poolId, copysetId, partitionId, addrs);
if (fret != FSStatusCode::OK) {
LOG(ERROR) << "Failed to delete partition. PoolId: " << poolId
<< ", CopysetId: " << copysetId
<< ", PartitionId: " << partitionId;
}
}

void TopologyManager::DeletePartition(const DeletePartitionRequest *request,
DeletePartitionResponse *response) {
uint32_t partitionId = request->partitionid();
Expand Down Expand Up @@ -1258,6 +1268,10 @@ void TopologyManager::GetTopology(ListTopologyResponse *response) {
ListMetaserverOfCluster(response->mutable_metaservers());
}

std::shared_ptr<MetaserverClient> TopologyManager::GetMetaserverClient(){
return metaserverclient_;
}

void TopologyManager::ListZone(ListZoneResponse *response) {
response->set_statuscode(TopoStatusCode::TOPO_OK);
auto zoneIdVec = topology_->GetZoneInCluster();
Expand Down
5 changes: 5 additions & 0 deletions curvefs/src/mds/topology/topology_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ class TopologyManager {
virtual void CreatePartitions(const CreatePartitionRequest *request,
CreatePartitionResponse *response);

virtual void DeleteAbnormalPartition(uint32_t poolId, uint32_t copysetId, uint32_t partitionId,
const std::set<std::string> &addrs);

virtual void DeletePartition(const DeletePartitionRequest *request,
DeletePartitionResponse *response);

Expand Down Expand Up @@ -162,6 +165,8 @@ class TopologyManager {

virtual void GetTopology(ListTopologyResponse* response);

virtual std::shared_ptr<MetaserverClient> GetMetaserverClient();

virtual void ListZone(ListZoneResponse* response);

virtual void ListServer(ListServerResponse* response);
Expand Down
7 changes: 7 additions & 0 deletions curvefs/test/mds/heartbeat/topo_update_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@ TEST_F(TestTopoUpdater, test_UpdatePartitionTopo_case4) {

EXPECT_CALL(*topology_, GetPartition(_, _)).WillOnce(Return(false));

std::set<std::string> copysetMemberAddr;
EXPECT_CALL(*topologyManager_, GetCopysetMembers(_, _, _))
.Times(AtMost(3))
.WillRepeatedly(DoAll(SetArgPointee<2>(copysetMemberAddr), Return(TopoStatusCode::TOPO_OK)));

EXPECT_CALL(*topologyManager_, DeleteAbnormalPartition(_, _, _, _)).Times(1);

std::list<::curvefs::mds::topology::Partition> partitionList;
partitionList.push_back(partition);

Expand Down

0 comments on commit 7165c43

Please sign in to comment.