Skip to content

Commit

Permalink
mds/snapshotcloneserver : fix snapshot delete retry
Browse files Browse the repository at this point in the history
  • Loading branch information
xu-chaojie committed Feb 2, 2021
1 parent d863271 commit 12c23c0
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 27 deletions.
24 changes: 17 additions & 7 deletions src/mds/nameserver2/curvefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1212,25 +1212,35 @@ StatusCode CurveFS::CheckSnapShotFileStatus(const std::string &fileName,
TaskIDType taskID = static_cast<TaskIDType>(snapShotFileInfo.id());
auto task = cleanManager_->GetTask(taskID);
if (task == nullptr) {
*progress = 100;
return StatusCode::kOK;
// GetSnapShotFileInfo again
StatusCode ret2 =
GetSnapShotFileInfo(fileName, seq, &snapShotFileInfo);
// if not exist, means delete succeed.
if (StatusCode::kSnapshotFileNotExists == ret2) {
*progress = 100;
return StatusCode::kSnapshotFileNotExists;
// else the snapshotFile still exist,
// means delete failed and retry times exceed.
} else {
*progress = 0;
LOG(ERROR) << "snapshot file delete fail, fileName = "
<< fileName << ", seq = " << seq;
return StatusCode::kSnapshotFileDeleteError;
}
}

TaskStatus taskStatus = task->GetTaskProgress().GetStatus();
switch (taskStatus) {
case TaskStatus::PROGRESSING:
case TaskStatus::FAILED: // FAILED task will retry
*progress = task->GetTaskProgress().GetProgress();
break;
case TaskStatus::FAILED:
*progress = 0;
LOG(ERROR) << "snapshot file delete fail, fileName = "
<< fileName << ", seq = " << seq;
return StatusCode::kSnapshotFileDeleteError;
case TaskStatus::SUCCESS:
*progress = 100;
break;
}
} else {
// means delete haven't begin.
*progress = 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/mds/schedule/operatorController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ bool OperatorController::ApplyOperator(const CopySetInfo &originInfo,

// operator timeout or finish
if (operators_[originInfo.id].IsTimeout()) {
LOG(ERROR) << "apply operator: "
LOG(WARNING) << "apply operator: "
<< operators_[originInfo.id].OpToString()
<< " on " << originInfo.CopySetInfoStr()
<< " fail, operator is timeout";
Expand Down
10 changes: 3 additions & 7 deletions src/snapshotcloneserver/snapshot/snapshot_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,15 +616,12 @@ int SnapshotCoreImpl::DeleteSnapshotOnCurvefs(const SnapshotInfo &info) {
<< ", seqNum = " << seqNum
<< ", status = " << static_cast<int>(status)
<< ", uuid = " << info.GetUuid();
// NOTEXIST means delete succeed.
if (-LIBCURVE_ERROR::NOTEXIST == ret) {
LOG(INFO) << "Check snapShot delete success"
<< ", uuid = " << info.GetUuid();
break;
// 目前mds删除快照失败会重试, 失败返回错误码DELETE_ERROR,
// 返回其他错误码一律InternalError
} else if (LIBCURVE_ERROR::OK == ret ||
-LIBCURVE_ERROR::DELETE_ERROR == ret) {
// nothing
} else if (LIBCURVE_ERROR::OK == ret) {
if (status != FileStatus::Deleting) {
LOG(ERROR) << "CheckSnapShotStatus fail"
<< ", ret = " << ret
Expand All @@ -640,8 +637,7 @@ int SnapshotCoreImpl::DeleteSnapshotOnCurvefs(const SnapshotInfo &info) {
}
std::this_thread::sleep_for(
std::chrono::milliseconds(checkSnapshotStatusIntervalMs_));
} while (LIBCURVE_ERROR::OK == ret ||
-LIBCURVE_ERROR::DELETE_ERROR == ret);
} while (LIBCURVE_ERROR::OK == ret);
return kErrCodeSuccess;
}

Expand Down
55 changes: 48 additions & 7 deletions test/mds/nameserver2/curvefs_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2362,7 +2362,7 @@ TEST_F(CurveFSTest, CheckSnapShotFileStatus) {
ASSERT_EQ(progress, 0);
}

// snapshot file is deleting, task is not exist
// snapshot file is deleting, task is not exist, delete success
{
FileInfo originalFile;
originalFile.set_id(1);
Expand All @@ -2371,18 +2371,22 @@ TEST_F(CurveFSTest, CheckSnapShotFileStatus) {
originalFile.set_filetype(FileType::INODE_PAGEFILE);

EXPECT_CALL(*storage_, GetFile(_, _, _))
.Times(1)
.WillOnce(DoAll(SetArgPointee<2>(originalFile),
.Times(2)
.WillRepeatedly(DoAll(SetArgPointee<2>(originalFile),
Return(StoreStatus::OK)));

std::vector<FileInfo> snapShotFiles;
FileInfo snapInfo;
snapInfo.set_seqnum(1);
snapInfo.set_filestatus(FileStatus::kFileDeleting);
snapShotFiles.push_back(snapInfo);

std::vector<FileInfo> snapShotFiles2;
EXPECT_CALL(*storage_, ListSnapshotFile(_, _, _))
.Times(1)
.Times(2)
.WillOnce(DoAll(SetArgPointee<2>(snapShotFiles),
Return(StoreStatus::OK)))
.WillOnce(DoAll(SetArgPointee<2>(snapShotFiles2),
Return(StoreStatus::OK)));

EXPECT_CALL(*mockcleanManager_,
Expand All @@ -2393,11 +2397,46 @@ TEST_F(CurveFSTest, CheckSnapShotFileStatus) {
FileStatus fileStatus;
uint32_t progress;
ASSERT_EQ(curvefs_->CheckSnapShotFileStatus("/originalFile",
1, &fileStatus, &progress), StatusCode::kOK);
ASSERT_EQ(fileStatus, FileStatus::kFileDeleting);
1, &fileStatus, &progress), StatusCode::kSnapshotFileNotExists);
ASSERT_EQ(progress, 100);
}

// snapshot file is deleting, task is not exist, delete failed
{
FileInfo originalFile;
originalFile.set_id(1);
originalFile.set_seqnum(1);
originalFile.set_filename("originalFile");
originalFile.set_filetype(FileType::INODE_PAGEFILE);

EXPECT_CALL(*storage_, GetFile(_, _, _))
.Times(2)
.WillRepeatedly(DoAll(SetArgPointee<2>(originalFile),
Return(StoreStatus::OK)));

std::vector<FileInfo> snapShotFiles;
FileInfo snapInfo;
snapInfo.set_seqnum(1);
snapInfo.set_filestatus(FileStatus::kFileDeleting);
snapShotFiles.push_back(snapInfo);
EXPECT_CALL(*storage_, ListSnapshotFile(_, _, _))
.Times(2)
.WillRepeatedly(DoAll(SetArgPointee<2>(snapShotFiles),
Return(StoreStatus::OK)));

EXPECT_CALL(*mockcleanManager_,
GetTask(_))
.Times(1)
.WillOnce(Return(nullptr));

FileStatus fileStatus;
uint32_t progress;
ASSERT_EQ(curvefs_->CheckSnapShotFileStatus("/originalFile",
1, &fileStatus, &progress), StatusCode::kSnapshotFileDeleteError);
ASSERT_EQ(fileStatus, FileStatus::kFileDeleting);
ASSERT_EQ(progress, 0);
}

// snapshot file is deleting, task is PROGRESSING
{
FileInfo originalFile;
Expand Down Expand Up @@ -2475,7 +2514,9 @@ TEST_F(CurveFSTest, CheckSnapShotFileStatus) {
FileStatus fileStatus;
uint32_t progress;
ASSERT_EQ(curvefs_->CheckSnapShotFileStatus("/originalFile",
1, &fileStatus, &progress), StatusCode::kSnapshotFileDeleteError);
1, &fileStatus, &progress), StatusCode::kOK);
ASSERT_EQ(fileStatus, FileStatus::kFileDeleting);
ASSERT_EQ(progress, 50);
}

// snapshot file is deleting, task is SUCCESS
Expand Down
155 changes: 150 additions & 5 deletions test/snapshotcloneserver/test_snapshot_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,11 +501,8 @@ TEST_F(TestSnapshotCoreImpl,
EXPECT_CALL(*client_, DeleteSnapshot(fileName, user, seqNum))
.WillOnce(Return(LIBCURVE_ERROR::OK));

// 返回一次错误,以覆盖返回DELETE_ERROR的情况
EXPECT_CALL(*client_, CheckSnapShotStatus(_, _, _, _))
.Times(3)
.WillOnce(DoAll(SetArgPointee<3>(FileStatus::Deleting),
Return(-LIBCURVE_ERROR::DELETE_ERROR)))
.Times(2)
.WillOnce(DoAll(SetArgPointee<3>(FileStatus::Deleting),
Return(LIBCURVE_ERROR::OK)))
.WillOnce(Return(-LIBCURVE_ERROR::NOTEXIST));
Expand Down Expand Up @@ -1713,6 +1710,154 @@ TEST_F(TestSnapshotCoreImpl,
ASSERT_EQ(Status::error, task->GetSnapshotInfo().GetStatus());
}

TEST_F(TestSnapshotCoreImpl,
TestHandleCreateSnapshotTask_CheckSnapShotStatusFailOnDeleteError) {
UUID uuid = "uuid1";
std::string user = "user1";
std::string fileName = "file1";
std::string desc = "snap1";
uint64_t seqNum = 100;

SnapshotInfo info(uuid, user, fileName, desc);
info.SetStatus(Status::pending);
auto snapshotInfoMetric = std::make_shared<SnapshotInfoMetric>(uuid);
std::shared_ptr<SnapshotTaskInfo> task =
std::make_shared<SnapshotTaskInfo>(info, snapshotInfoMetric);


EXPECT_CALL(*client_, CreateSnapshot(fileName, user, _))
.WillOnce(DoAll(
SetArgPointee<2>(seqNum),
Return(LIBCURVE_ERROR::OK)));

FInfo snapInfo;
snapInfo.seqnum = 100;
snapInfo.chunksize = 2 * option.chunkSplitSize;
snapInfo.segmentsize = 2 * snapInfo.chunksize;
snapInfo.length = 2 * snapInfo.segmentsize;
snapInfo.ctime = 10;
EXPECT_CALL(*client_, GetSnapshot(fileName, user, seqNum, _))
.WillOnce(DoAll(
SetArgPointee<3>(snapInfo),
Return(LIBCURVE_ERROR::OK)));


EXPECT_CALL(*metaStore_, UpdateSnapshot(_))
.Times(2)
.WillRepeatedly(Return(kErrCodeSuccess));

LogicPoolID lpid1 = 1;
CopysetID cpid1 = 1;
ChunkID chunkId1 = 1;
LogicPoolID lpid2 = 2;
CopysetID cpid2 = 2;
ChunkID chunkId2 = 2;

SegmentInfo segInfo1;
segInfo1.chunkvec.push_back(
ChunkIDInfo(chunkId1, lpid1, cpid1));
segInfo1.chunkvec.push_back(
ChunkIDInfo(chunkId2, lpid2, cpid2));

LogicPoolID lpid3 = 3;
CopysetID cpid3 = 3;
ChunkID chunkId3 = 3;
LogicPoolID lpid4 = 4;
CopysetID cpid4 = 4;
ChunkID chunkId4 = 4;

SegmentInfo segInfo2;
segInfo2.chunkvec.push_back(
ChunkIDInfo(chunkId3, lpid3, cpid3));
segInfo2.chunkvec.push_back(
ChunkIDInfo(chunkId4, lpid4, cpid4));

EXPECT_CALL(*client_, GetSnapshotSegmentInfo(fileName,
user,
seqNum,
_,
_))
.Times(2)
.WillOnce(DoAll(SetArgPointee<4>(segInfo1),
Return(LIBCURVE_ERROR::OK)))
.WillOnce(DoAll(SetArgPointee<4>(segInfo2),
Return(LIBCURVE_ERROR::OK)));

uint64_t chunkSn = 100;
ChunkInfoDetail chunkInfo;
chunkInfo.chunkSn.push_back(chunkSn);
EXPECT_CALL(*client_, GetChunkInfo(_, _))
.Times(4)
.WillRepeatedly(DoAll(SetArgPointee<1>(chunkInfo),
Return(LIBCURVE_ERROR::OK)));

EXPECT_CALL(*dataStore_, PutChunkIndexData(_, _))
.WillOnce(Return(kErrCodeSuccess));

UUID uuid2 = "uuid2";
std::string desc2 = "desc2";

std::vector<SnapshotInfo> snapInfos;
SnapshotInfo info2(uuid2, user, fileName, desc2);
info.SetSeqNum(seqNum);
info2.SetSeqNum(seqNum - 1);
info2.SetStatus(Status::done);
snapInfos.push_back(info);
snapInfos.push_back(info2);

EXPECT_CALL(*metaStore_, GetSnapshotList(fileName, _))
.Times(2)
.WillRepeatedly(DoAll(
SetArgPointee<1>(snapInfos),
Return(kErrCodeSuccess)));

ChunkIndexData indexData;
indexData.PutChunkDataName(ChunkDataName(fileName, 1, 0));
EXPECT_CALL(*dataStore_, GetChunkIndexData(_, _))
.WillOnce(DoAll(
SetArgPointee<1>(indexData),
Return(kErrCodeSuccess)));

EXPECT_CALL(*dataStore_, DataChunkTranferInit(_, _))
.Times(4)
.WillRepeatedly(Return(kErrCodeSuccess));


EXPECT_CALL(*client_, ReadChunkSnapshot(_, _, _, _, _, _))
.Times(8)
.WillRepeatedly(DoAll(
Invoke([](ChunkIDInfo cidinfo,
uint64_t seq,
uint64_t offset,
uint64_t len,
char *buf,
SnapCloneClosure* scc){
scc->SetRetCode(LIBCURVE_ERROR::OK);
scc->Run();
}),
Return(LIBCURVE_ERROR::OK)));

EXPECT_CALL(*dataStore_, DataChunkTranferAddPart(_, _, _, _, _))
.Times(8)
.WillRepeatedly(Return(kErrCodeSuccess));


EXPECT_CALL(*dataStore_, DataChunkTranferComplete(_, _))
.Times(4)
.WillRepeatedly(Return(kErrCodeSuccess));

EXPECT_CALL(*client_, DeleteSnapshot(fileName, user, seqNum))
.WillOnce(Return(LIBCURVE_ERROR::OK));

EXPECT_CALL(*client_, CheckSnapShotStatus(_, _, _, _))
.WillOnce(Return(-LIBCURVE_ERROR::DELETE_ERROR));

core_->HandleCreateSnapshotTask(task);

ASSERT_TRUE(task->IsFinish());
ASSERT_EQ(Status::error, task->GetSnapshotInfo().GetStatus());
}

TEST_F(TestSnapshotCoreImpl,
TestHandleCreateSnapshotTask_CheckSnapShotStatusFailOnFileStatusError) {
UUID uuid = "uuid1";
Expand Down Expand Up @@ -1854,7 +1999,7 @@ TEST_F(TestSnapshotCoreImpl,

EXPECT_CALL(*client_, CheckSnapShotStatus(_, _, _, _))
.WillOnce(DoAll(SetArgPointee<3>(FileStatus::Created),
Return(-LIBCURVE_ERROR::DELETE_ERROR)));
Return(LIBCURVE_ERROR::OK)));

core_->HandleCreateSnapshotTask(task);

Expand Down

0 comments on commit 12c23c0

Please sign in to comment.