From aa64bfc36439ff453d4566daef4032bb9c749897 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Tue, 12 Nov 2024 11:23:32 +0800 Subject: [PATCH 01/10] init --- be/src/io/fs/s3_file_system.h | 4 +- be/src/olap/olap_define.h | 1 + be/src/olap/rowset/beta_rowset.cpp | 77 + be/src/olap/rowset/beta_rowset.h | 2 + be/src/olap/rowset/rowset.h | 4 + be/src/olap/rowset/rowset_meta.h | 2 + be/src/olap/single_replica_compaction.cpp | 2 +- be/src/olap/snapshot_manager.cpp | 69 +- be/src/olap/snapshot_manager.h | 3 +- be/src/olap/tablet.cpp | 12 + be/src/olap/tablet.h | 2 + be/src/olap/tablet_meta.cpp | 9 + be/src/olap/tablet_meta.h | 2 + be/src/olap/task/engine_clone_task.cpp | 2 +- .../task/engine_storage_migration_task.cpp | 2 +- be/src/runtime/snapshot_loader.cpp | 159 +- be/src/runtime/snapshot_loader.h | 2 - .../Backup-and-Restore/RESTORE.md | 2 + .../apache/doris/analysis/RestoreStmt.java | 35 + .../apache/doris/backup/BackupHandler.java | 6 +- .../org/apache/doris/backup/BackupJob.java | 29 +- .../apache/doris/backup/BackupJobInfo.java | 37 + .../org/apache/doris/backup/BackupMeta.java | 39 +- .../org/apache/doris/backup/RestoreJob.java | 244 ++- .../org/apache/doris/catalog/OlapTable.java | 6 +- .../apache/doris/catalog/PartitionInfo.java | 8 +- .../org/apache/doris/catalog/ResourceMgr.java | 7 + .../org/apache/doris/catalog/S3Resource.java | 37 + .../java/org/apache/doris/policy/Policy.java | 4 + .../org/apache/doris/policy/PolicyMgr.java | 2 +- .../apache/doris/policy/StoragePolicy.java | 57 +- .../doris/service/FrontendServiceImpl.java | 15 + .../doris/backup/BackupHandlerTest.java | 4 +- .../apache/doris/backup/RestoreJobTest.java | 6 +- .../test_backup_restore_cold_data.groovy | 1384 +++++++++++++++++ 35 files changed, 2215 insertions(+), 61 deletions(-) create mode 100644 regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy diff --git a/be/src/io/fs/s3_file_system.h b/be/src/io/fs/s3_file_system.h index 0da142881fe353..19d4801c27b38f 100644 --- a/be/src/io/fs/s3_file_system.h +++ b/be/src/io/fs/s3_file_system.h @@ -103,7 +103,9 @@ class S3FileSystem final : public RemoteFileSystem { return path; } else { // path with no schema - return _root_path / path; + return std::filesystem::path( + fmt::format("s3://{}/{}", _s3_conf.bucket, _s3_conf.prefix)) / + path; } } diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index e0e1d919a5048d..ee6c012cfd3088 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -97,6 +97,7 @@ static const std::string ERROR_LOG_PREFIX = "error_log"; static const std::string PENDING_DELTA_PREFIX = "pending_delta"; static const std::string INCREMENTAL_DELTA_PREFIX = "incremental_delta"; static const std::string CLONE_PREFIX = "clone"; +static const std::string REMOTE_FILE_INFO = "remote_file_info"; // define paths static inline std::string remote_tablet_path(int64_t tablet_id) { diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index d07b0b2254c123..35e394cfcdb5bc 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -355,6 +355,83 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row return Status::OK(); } +Status BetaRowset::download(io::RemoteFileSystem* fs, const std::string& dir) { + if (is_local()) { + DCHECK(false) << _rowset_meta->tablet_id() << ' ' << rowset_id(); + return Status::InternalError("should be remote rowset. tablet_id={} rowset_id={}", + _rowset_meta->tablet_id(), rowset_id().to_string()); + } + + if (num_segments() < 1) { + return Status::OK(); + } + + Status status; + std::vector linked_success_files; + Defer remove_linked_files {[&]() { // clear download files if errors happen + if (!status.ok()) { + LOG(WARNING) << "will delete download success files due to error " << status; + std::vector paths; + for (auto& file : linked_success_files) { + paths.emplace_back(file); + LOG(WARNING) << "will delete download success file " << file << " due to error"; + } + static_cast(fs->batch_delete(paths)); + LOG(WARNING) << "done delete download success files due to error " << status; + } + }}; + + for (int i = 0; i < num_segments(); ++i) { + // Note: Here we use relative path for remote. + auto remote_seg_path = + remote_segment_path(_rowset_meta->tablet_id(), rowset_id().to_string(), i); + + auto local_seg_path = segment_file_path(dir, rowset_id(), i); + + RETURN_IF_ERROR(fs->download(remote_seg_path, local_seg_path)); + + linked_success_files.push_back(local_seg_path); + + if (_schema->get_inverted_index_storage_format() != InvertedIndexStorageFormatPB::V1) { + if (_schema->has_inverted_index()) { + std::string inverted_index_src_file = + InvertedIndexDescriptor::get_index_file_name(remote_seg_path); + + std::string inverted_index_dst_file_path = + InvertedIndexDescriptor::get_index_file_name(local_seg_path); + + RETURN_IF_ERROR( + fs->download(inverted_index_src_file, inverted_index_dst_file_path)); + + linked_success_files.push_back(inverted_index_dst_file_path); + } + } else { + for (const auto& index : _schema->indexes()) { + if (index.index_type() != IndexType::INVERTED) { + continue; + } + + auto index_id = index.index_id(); + std::string inverted_index_src_file = InvertedIndexDescriptor::get_index_file_name( + remote_seg_path, index_id, index.get_index_suffix()); + + std::string inverted_index_dst_file_path = + InvertedIndexDescriptor::get_index_file_name(local_seg_path, index_id, + index.get_index_suffix()); + + RETURN_IF_ERROR( + fs->download(inverted_index_src_file, inverted_index_dst_file_path)); + + linked_success_files.push_back(inverted_index_dst_file_path); + LOG(INFO) << "success to download. from=" << inverted_index_src_file << ", " + << "to=" << inverted_index_dst_file_path; + } + } + } + + return Status::OK(); +} + Status BetaRowset::upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) { DCHECK(is_local()); if (num_segments() < 1) { diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index ed30f76d45033b..1c2e1051aa4038 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -76,6 +76,8 @@ class BetaRowset final : public Rowset { Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override; + Status download(io::RemoteFileSystem* fs, const std::string& dir) override; + Status upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) override; // only applicable to alpha rowset, no op here diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index a7617779014132..62d6759de97ae5 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -213,6 +213,10 @@ class Rowset : public std::enable_shared_from_this { // copy all files to `dir` virtual Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) = 0; + virtual Status download(io::RemoteFileSystem* fs, const std::string& dir) { + return Status::OK(); + } + virtual Status upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) { return Status::OK(); } diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 24e7dfbefb73b6..df526dbd321962 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -107,6 +107,8 @@ class RowsetMeta { _fs = std::move(fs); } + void clear_resource_id(); + const std::string& resource_id() const { return _rowset_meta_pb.resource_id(); } bool is_local() const { return !_rowset_meta_pb.has_resource_id(); } diff --git a/be/src/olap/single_replica_compaction.cpp b/be/src/olap/single_replica_compaction.cpp index fdccc78816f09b..269a968488600b 100644 --- a/be/src/olap/single_replica_compaction.cpp +++ b/be/src/olap/single_replica_compaction.cpp @@ -322,7 +322,7 @@ Status SingleReplicaCompaction::_fetch_rowset(const TReplicaInfo& addr, const st // change all rowset ids because they maybe its id same with local rowset auto olap_st = SnapshotManager::instance()->convert_rowset_ids( local_path, _tablet->tablet_id(), _tablet->replica_id(), _tablet->table_id(), - _tablet->partition_id(), _tablet->schema_hash()); + _tablet->partition_id(), _tablet->schema_hash(), 0); if (!olap_st.ok()) { LOG(WARNING) << "fail to convert rowset ids, path=" << local_path << ", tablet_id=" << _tablet->tablet_id() << ", error=" << olap_st; diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 6b97591014a25a..2eb39ff0827669 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -38,6 +38,7 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" +#include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" #include "olap/data_dir.h" #include "olap/olap_common.h" @@ -146,7 +147,8 @@ Status SnapshotManager::release_snapshot(const string& snapshot_path) { Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t tablet_id, int64_t replica_id, int64_t table_id, - int64_t partition_id, const int32_t& schema_hash) { + int64_t partition_id, const int32_t& schema_hash, + int64_t storage_policy_id) { SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); Status res = Status::OK(); // check clone dir existed @@ -181,6 +183,7 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t new_tablet_meta_pb.set_tablet_id(tablet_id); *new_tablet_meta_pb.mutable_tablet_uid() = TabletUid::gen_uid().to_proto(); new_tablet_meta_pb.set_replica_id(replica_id); + new_tablet_meta_pb.set_storage_policy_id(storage_policy_id); if (table_id > 0) { new_tablet_meta_pb.set_table_id(table_id); } @@ -493,6 +496,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet "missed version is a cooldowned rowset, must make full " "snapshot. missed_version={}, tablet_id={}", missed_version, ref_tablet->tablet_id()); + //todozy break; } consistent_rowsets.push_back(rowset); @@ -524,8 +528,8 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet LOG(WARNING) << "currently not support backup tablet with cooldowned remote " "data. tablet=" << request.tablet_id; - return Status::NotSupported( - "currently not support backup tablet with cooldowned remote data"); + // return Status::NotSupported( + // "currently not support backup tablet with cooldowned remote data"); } /// not all missing versions are found, fall back to full snapshot. res = Status::OK(); // reset res @@ -598,6 +602,10 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet } std::vector rs_metas; + RowsetMetaSharedPtr rsm; + bool have_remote_file = false; + io::FileWriterPtr file_writer; + for (auto& rs : consistent_rowsets) { if (rs->is_local()) { // local rowset @@ -605,12 +613,56 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (!res.ok()) { break; } + rsm = rs->rowset_meta(); + } else { + std::string rowset_meta_str; + RowsetMetaPB rs_meta_pb; + rs->rowset_meta()->to_rowset_pb(&rs_meta_pb); + rs_meta_pb.SerializeToString(&rowset_meta_str); + + RowsetMetaSharedPtr rowset_meta(new RowsetMeta()); + rowset_meta->init(rowset_meta_str); + + rsm = rowset_meta; + + // save_remote_file info + // tableid|storage_policy_id| + // rowset_id|num_segments|has_inverted_index| + // ...... + // rowset_id|num_segments|has_inverted_index + { + // write file + std::string delimeter = "|"; + + if (!have_remote_file) { + auto romote_file_info = + fmt::format("{}/{}", schema_full_path, REMOTE_FILE_INFO); + RETURN_IF_ERROR(io::global_local_filesystem()->create_file(romote_file_info, + &file_writer)); + RETURN_IF_ERROR(file_writer->append( + std::to_string(rs->rowset_meta()->tablet_id()))); + RETURN_IF_ERROR(file_writer->append(delimeter)); + RETURN_IF_ERROR(file_writer->append( + std::to_string(ref_tablet->tablet_meta()->storage_policy_id()))); + have_remote_file = true; + } + RETURN_IF_ERROR(file_writer->append(delimeter)); + RETURN_IF_ERROR(file_writer->append(rs->rowset_id().to_string())); + RETURN_IF_ERROR(file_writer->append(delimeter)); + RETURN_IF_ERROR(file_writer->append(std::to_string(rs->num_segments()))); + RETURN_IF_ERROR(file_writer->append(delimeter)); + RETURN_IF_ERROR(file_writer->append( + std::to_string(rs->tablet_schema()->has_inverted_index()))); + } } - rs_metas.push_back(rs->rowset_meta()); + rs_metas.push_back(rsm); VLOG_NOTICE << "add rowset meta to clone list. " - << " start version " << rs->rowset_meta()->start_version() - << " end version " << rs->rowset_meta()->end_version() << " empty " - << rs->rowset_meta()->empty(); + << " start version " << rsm->start_version() << " end version " + << rsm->end_version() << " empty " << rsm->empty(); + } + + if (have_remote_file) { + RETURN_IF_ERROR(file_writer->close()); } if (!res.ok()) { LOG(WARNING) << "fail to create hard link. path=" << snapshot_id_path @@ -628,6 +680,9 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet new_tablet_meta->revise_delete_bitmap_unlocked(delete_bitmap_snapshot); } + //clear cooldown meta + new_tablet_meta->revise_clear_resource_id(); + if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) { res = new_tablet_meta->save(header_path); if (res.ok() && request.__isset.is_copy_tablet_task && request.is_copy_tablet_task) { diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index 78b9db8659b3e9..5e218db28f82e7 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -55,7 +55,8 @@ class SnapshotManager { static SnapshotManager* instance(); Status convert_rowset_ids(const std::string& clone_dir, int64_t tablet_id, int64_t replica_id, - int64_t table_id, int64_t partition_id, const int32_t& schema_hash); + int64_t table_id, int64_t partition_id, const int32_t& schema_hash, + int64_t storage_policy_id); private: SnapshotManager() : _snapshot_base_id(0) { diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 7c85e8238f8f36..98ea3f3706af33 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2231,6 +2231,18 @@ Status Tablet::cooldown() { return Status::OK(); } +Status Tablet::download(RowsetSharedPtr rowset, const std::string& dir) { + std::shared_ptr dest_fs; + RETURN_IF_ERROR(get_remote_file_system(storage_policy_id(), &dest_fs)); + Status st; + + if (st = rowset->download(dest_fs.get(), dir); !st.ok()) { + return st; + } + + return Status::OK(); +} + // hold SHARED `cooldown_conf_lock` Status Tablet::_cooldown_data() { DCHECK(_cooldown_replica_id == replica_id()); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index 5ca2248c5b5ac2..cc078938f9dcd0 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -390,6 +390,8 @@ class Tablet : public BaseTablet { // Cooldown to remote fs. Status cooldown(); + Status download(RowsetSharedPtr rowset, const std::string& dir); + RowsetSharedPtr pick_cooldown_rowset(); bool need_cooldown(int64_t* cooldown_timestamp, size_t* file_size); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index a5324c9a6edd2f..2711534412b319 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -837,6 +837,15 @@ void TabletMeta::revise_rs_metas(std::vector&& rs_metas) { _stale_rs_metas.clear(); } +void TabletMeta::revise_clear_resource_id() { + for (auto rs : _rs_metas) { + rs->clear_resource_id(); + } + for (auto rs : _stale_rs_metas) { + rs->clear_resource_id(); + } +} + // This method should call after revise_rs_metas, since new rs_metas might be a subset // of original tablet, we should revise the delete_bitmap according to current rowset. // diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index a79cdd272962da..01a3ddd2e7c73d 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -187,6 +187,8 @@ class TabletMeta { void modify_rs_metas(const std::vector& to_add, const std::vector& to_delete, bool same_version = false); + + void revise_clear_resource_id(); void revise_rs_metas(std::vector&& rs_metas); void revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 0b077076e574cb..6e5a15d9e3bb79 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -407,7 +407,7 @@ Status EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir, // change all rowset ids because they maybe its id same with local rowset status = SnapshotManager::instance()->convert_rowset_ids( local_data_path, _clone_req.tablet_id, _clone_req.replica_id, - _clone_req.table_id, _clone_req.partition_id, _clone_req.schema_hash); + _clone_req.table_id, _clone_req.partition_id, _clone_req.schema_hash, 0); } else { LOG_WARNING("failed to download snapshot from remote BE") .tag("url", _mask_token(remote_url_prefix)) diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 218922069c7bd6..5033c67b2dc38f 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -158,7 +158,7 @@ Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file( // rowset create time is useful when load tablet from meta to check which tablet is the tablet to load return SnapshotManager::instance()->convert_rowset_ids( full_path, tablet_id, _tablet->replica_id(), _tablet->table_id(), - _tablet->partition_id(), schema_hash); + _tablet->partition_id(), schema_hash, 0); } Status EngineStorageMigrationTask::_reload_tablet(const std::string& full_path) { diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index d4891bb383839e..89eb378b6372d3 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -41,6 +41,7 @@ #include "gutil/strings/split.h" #include "http/http_client.h" #include "io/fs/broker_file_system.h" +#include "io/fs/file_reader.h" #include "io/fs/file_system.h" #include "io/fs/hdfs_file_system.h" #include "io/fs/local_file_system.h" @@ -51,6 +52,7 @@ #include "olap/data_dir.h" #include "olap/snapshot_manager.h" #include "olap/storage_engine.h" +#include "olap/storage_policy.h" #include "olap/tablet.h" #include "olap/tablet_manager.h" #include "runtime/client_cache.h" @@ -100,6 +102,142 @@ Status SnapshotLoader::init(TStorageBackendType::type type, const std::string& l SnapshotLoader::~SnapshotLoader() = default; +bool _end_with(std::string_view str, std::string_view match) { + return str.size() >= match.size() && + str.compare(str.size() - match.size(), match.size(), match) == 0; +} + +static Status list_segment_inverted_index_file(io::RemoteFileSystem* cold_fs, + const std::string& dir, const std::string& rowset, + std::vector* remote_files) { + bool exists = true; + std::vector files; + RETURN_IF_ERROR(cold_fs->list(dir, true, &files, &exists)); + for (auto& tmp_file : files) { + io::Path path(tmp_file.file_name); + std::string file_name = path.filename(); + + if (file_name.substr(0, rowset.length()).compare(rowset) != 0 || + !_end_with(file_name, ".idx")) { + continue; + } + remote_files->push_back(file_name); + } + + return Status::OK(); +} + +static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs, + io::RemoteFileSystem* cold_fs, + const std::string& remote_seg_path, + const std::string& local_seg_path, + const std::string& dest_seg_path) { + RETURN_IF_ERROR(cold_fs->download(remote_seg_path, local_seg_path)); + + // calc md5sum of localfile + std::string md5sum; + RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(local_seg_path, &md5sum)); + + RETURN_IF_ERROR(upload_with_checksum(dest_fs, local_seg_path, dest_seg_path, md5sum)); + + //delete local file + RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(local_seg_path)); + + return Status::OK(); +} + +static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet_id, + const std::string& local_path, const std::string& dest_path, + io::RemoteFileSystem* cold_fs, const std::string& rowset, + int segments, int have_inverted_index) { + Status res = Status::OK(); + + for (int i = 0; i < segments; i++) { + std::string remote_seg_path = + fmt::format("{}/{}_{}.dat", remote_tablet_path(tablet_id), rowset, i); + std::string local_seg_path = fmt::format("{}/{}_{}.dat", local_path, rowset, i); + std::string dest_seg_path = fmt::format("{}/{}_{}.dat", dest_path, rowset, i); + + RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs, remote_seg_path, + local_seg_path, dest_seg_path)); + } + + if (!have_inverted_index) { + return res; + } + + std::vector remote_index_files; + RETURN_IF_ERROR(list_segment_inverted_index_file(cold_fs, remote_tablet_path(tablet_id), rowset, + &remote_index_files)); + + for (auto& index_file : remote_index_files) { + std::string remote_index_path = + fmt::format("{}/{}", remote_tablet_path(tablet_id), index_file); + std::string local_seg_path = fmt::format("{}/{}", local_path, index_file); + std::string dest_seg_path = fmt::format("{}/{}", dest_path, index_file); + + RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs, remote_index_path, + local_seg_path, dest_seg_path)); + } + return res; +} + +static Status upload_remote_file(io::RemoteFileSystem& dest_fs, int64_t tablet_id, + const std::string& local_path, const std::string& dest_path, + const std::string& remote_file) { + io::FileReaderSPtr file_reader; + Status res = Status::OK(); + + std::string full_remote_path = local_path + '/' + remote_file; + RETURN_IF_ERROR(io::global_local_filesystem()->open_file(full_remote_path, &file_reader)); + size_t bytes_read = 0; + char* buff = (char*)malloc(file_reader->size() + 1); + RETURN_IF_ERROR(file_reader->read_at(0, {buff, file_reader->size()}, &bytes_read)); + string str(buff, file_reader->size()); + size_t start = 0; + string delimiter = "|"; + size_t end = str.find(delimiter); + int64_t tablet_id_tmp = std::stol(str.substr(start, end - start)); + start = end + delimiter.length(); + + if (tablet_id_tmp != tablet_id) { + return Status::InternalError("Invalid tablet {}", tablet_id_tmp); + } + + end = str.find(delimiter, start); // + int64_t storage_policy_id = std::stol(str.substr(start, end - start)); + start = end + delimiter.length(); + + string rowset_id; + int segments; + int have_inverted_index; + + std::shared_ptr colddata_fs; + RETURN_IF_ERROR(get_remote_file_system(storage_policy_id, &colddata_fs)); + + while (end != std::string::npos) { + end = str.find(delimiter, start); // + rowset_id = str.substr(start, end - start); + start = end + delimiter.length(); + + end = str.find(delimiter, start); + segments = std::stoi(str.substr(start, end - start)); + start = end + delimiter.length(); + + end = str.find(delimiter, start); + have_inverted_index = std::stoi(str.substr(start, end - start)); + start = end + delimiter.length(); + + if (segments > 0) { + RETURN_IF_ERROR(upload_remote_rowset(dest_fs, tablet_id, local_path, dest_path, + colddata_fs.get(), rowset_id, segments, + have_inverted_index)); + } + } + + return res; +} + Status SnapshotLoader::upload(const std::map& src_to_dest_path, std::map>* tablet_files) { if (!_remote_fs) { @@ -150,6 +288,12 @@ Status SnapshotLoader::upload(const std::map& src_to_d TTaskType::type::UPLOAD)); const std::string& local_file = *it; + if (local_file.compare("remote_file_info") == 0) { + RETURN_IF_ERROR(upload_remote_file(*_remote_fs, tablet_id, src_path, dest_path, + local_file)); + // continue; + } + // calc md5sum of localfile std::string md5sum; RETURN_IF_ERROR( @@ -269,12 +413,17 @@ Status SnapshotLoader::download(const std::map& src_to const FileStat& file_stat = iter.second; auto find = std::find(local_files.begin(), local_files.end(), remote_file); if (find == local_files.end()) { + if (remote_file.compare(REMOTE_FILE_INFO) == 0) { + continue; + } // remote file does not exist in local, download it need_download = true; } else { if (_end_with(remote_file, ".hdr")) { // this is a header file, download it. need_download = true; + } else if (remote_file.compare(REMOTE_FILE_INFO) == 0) { + continue; } else { // check checksum std::string local_md5sum; @@ -734,7 +883,7 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta // rename the rowset ids and tabletid info in rowset meta Status convert_status = SnapshotManager::instance()->convert_rowset_ids( snapshot_path, tablet_id, tablet->replica_id(), tablet->table_id(), - tablet->partition_id(), schema_hash); + tablet->partition_id(), schema_hash, tablet->storage_policy_id()); if (!convert_status.ok()) { std::stringstream ss; ss << "failed to convert rowsetids in snapshot: " << snapshot_path @@ -804,14 +953,6 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta return status; } -bool SnapshotLoader::_end_with(const std::string& str, const std::string& match) { - if (str.size() >= match.size() && - str.compare(str.size() - match.size(), match.size(), match) == 0) { - return true; - } - return false; -} - Status SnapshotLoader::_get_tablet_id_and_schema_hash_from_file_path(const std::string& src_path, int64_t* tablet_id, int32_t* schema_hash) { diff --git a/be/src/runtime/snapshot_loader.h b/be/src/runtime/snapshot_loader.h index c0d1f0f70864ce..ed8124a6f8788b 100644 --- a/be/src/runtime/snapshot_loader.h +++ b/be/src/runtime/snapshot_loader.h @@ -94,8 +94,6 @@ class SnapshotLoader { Status _get_existing_files_from_local(const std::string& local_path, std::vector* local_files); - bool _end_with(const std::string& str, const std::string& match); - Status _replace_tablet_id(const std::string& file_name, int64_t tablet_id, std::string* new_file_name); diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/RESTORE.md index 8541500331b7dd..ac860eb0f2a280 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/RESTORE.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/RESTORE.md @@ -60,6 +60,8 @@ PROPERTIES ("key"="value", ...); - "reserve_dynamic_partition_enable" = "true":默认为 false。当该属性为 true 时,恢复的表会保留该表备份之前的'dynamic_partition_enable'属性值。该值不为true时,则恢复出来的表的'dynamic_partition_enable'属性值会设置为false。 - "timeout" = "3600":任务超时时间,默认为一天。单位秒。 - "meta_version" = 40:使用指定的 meta_version 来读取之前备份的元数据。注意,该参数作为临时方案,仅用于恢复老版本 Doris 备份的数据。最新版本的备份数据中已经包含 meta version,无需再指定。 + - "reserve_storage_policy" = "true":指定的恢复的表是否保留冷热分层属性。默认为true,备份集中保存的storage policy和对应的resource信息将在新集群中重建。恢复时数据都会下载到本地,再由降冷策略上传到远程。reserve_storage_policy设置为false,恢复后的表去除了冷热属性, 变为普通表。 + - "storage_resource" = "resource_name":指定恢复后表的冷数据使用的resource。建议在跨集群恢复时指定此属性。注意恢复后的storage policy中的storage_resource属性也会更新为指定的storage_resource。若指定了"reserve_storage_policy"="false",则忽略storage_resource属性。 ### Example diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java index 9585a2e5069237..9b4d06421f1805 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java @@ -18,7 +18,9 @@ package org.apache.doris.analysis; import org.apache.doris.backup.Repository; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ReplicaAllocation; +import org.apache.doris.catalog.Resource; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; @@ -44,18 +46,22 @@ public class RestoreStmt extends AbstractBackupStmt { public static final String PROP_CLEAN_TABLES = "clean_tables"; public static final String PROP_CLEAN_PARTITIONS = "clean_partitions"; public static final String PROP_ATOMIC_RESTORE = "atomic_restore"; + public static final String PROP_STORAGE_RESOURCE = "storage_resource"; + public static final String PROP_RESERVE_STORAGE_POLICY = "reserve_storage_policy"; private boolean allowLoad = false; private ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; private String backupTimestamp = null; private int metaVersion = -1; private boolean reserveReplica = false; + private boolean reserveStoragePolicy = true; private boolean reserveDynamicPartitionEnable = false; private boolean isLocal = false; private boolean isBeingSynced = false; private boolean isCleanTables = false; private boolean isCleanPartitions = false; private boolean isAtomicRestore = false; + private String storageResource = ""; private byte[] meta = null; private byte[] jobInfo = null; @@ -83,6 +89,10 @@ public String getBackupTimestamp() { return backupTimestamp; } + public String getStorageResource() { + return storageResource; + } + public int getMetaVersion() { return metaVersion; } @@ -91,6 +101,10 @@ public boolean reserveReplica() { return reserveReplica; } + public boolean reserveStoragePolicy() { + return reserveStoragePolicy; + } + public boolean reserveDynamicPartitionEnable() { return reserveDynamicPartitionEnable; } @@ -208,6 +222,27 @@ public void analyzeProperties() throws AnalysisException { // is atomic restore isAtomicRestore = eatBooleanProperty(copiedProperties, PROP_ATOMIC_RESTORE, isAtomicRestore); + if (copiedProperties.containsKey(PROP_STORAGE_RESOURCE)) { + storageResource = copiedProperties.get(PROP_STORAGE_RESOURCE); + Resource localResource = Env.getCurrentEnv().getResourceMgr().getResource(storageResource); + + if (localResource == null) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_COMMON_ERROR, + "Restore storage resource " + storageResource + " is not exist"); + } + + if (localResource.getType() != Resource.ResourceType.S3) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_COMMON_ERROR, + "The type of local resource " + + storageResource + " is not same as restored resource"); + } + + copiedProperties.remove(PROP_STORAGE_RESOURCE); + } + + // reserve storage policy + reserveStoragePolicy = eatBooleanProperty(copiedProperties, PROP_RESERVE_STORAGE_POLICY, reserveStoragePolicy); + if (!copiedProperties.isEmpty()) { ErrorReport.reportAnalysisException(ErrorCode.ERR_COMMON_ERROR, "Unknown restore job properties: " + copiedProperties.keySet()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index a3fd66692a2928..758af5d2b2ed91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -460,14 +460,14 @@ private void restore(Repository repository, Database db, RestoreStmt stmt) throw db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicaAlloc(), stmt.getTimeoutMs(), metaVersion, stmt.reserveReplica(), stmt.reserveDynamicPartitionEnable(), stmt.isBeingSynced(), - stmt.isCleanTables(), stmt.isCleanPartitions(), stmt.isAtomicRestore(), - env, Repository.KEEP_ON_LOCAL_REPO_ID, backupMeta); + stmt.isCleanTables(), stmt.isCleanPartitions(), stmt.isAtomicRestore(), stmt.getStorageResource(), + stmt.reserveStoragePolicy(), env, Repository.KEEP_ON_LOCAL_REPO_ID, backupMeta); } else { restoreJob = new RestoreJob(stmt.getLabel(), stmt.getBackupTimestamp(), db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicaAlloc(), stmt.getTimeoutMs(), stmt.getMetaVersion(), stmt.reserveReplica(), stmt.reserveDynamicPartitionEnable(), stmt.isBeingSynced(), stmt.isCleanTables(), stmt.isCleanPartitions(), stmt.isAtomicRestore(), - env, repository.getId()); + stmt.getStorageResource(), stmt.reserveStoragePolicy(), env, repository.getId()); } env.getEditLog().logRestoreJob(restoreJob); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index dc92e9a07c3c1f..38704462339de0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.OdbcTable; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.Table; @@ -39,6 +40,7 @@ import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.property.S3ClientBEProperties; import org.apache.doris.persist.BarrierLog; +import org.apache.doris.policy.StoragePolicy; import org.apache.doris.task.AgentBatchTask; import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskExecutor; @@ -58,6 +60,7 @@ import com.google.common.collect.Collections2; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -444,6 +447,7 @@ private void prepareAndSendSnapshotTask() { // copy all related schema at this moment List copiedTables = Lists.newArrayList(); List copiedResources = Lists.newArrayList(); + List copiedStoragePolicys = Lists.newArrayList(); AgentBatchTask batchTask = new AgentBatchTask(Config.backup_restore_batch_task_num_per_rpc); for (TableRef tableRef : tableRefs) { String tblName = tableRef.getName().getTbl(); @@ -461,7 +465,8 @@ private void prepareAndSendSnapshotTask() { if (getContent() == BackupContent.ALL) { prepareSnapshotTaskForOlapTableWithoutLock(db, (OlapTable) tbl, tableRef, batchTask); } - prepareBackupMetaForOlapTableWithoutLock(tableRef, olapTable, copiedTables); + prepareBackupMetaForOlapTableWithoutLock(tableRef, olapTable, copiedTables, + copiedStoragePolicys); break; case VIEW: prepareBackupMetaForViewWithoutLock((View) tbl, copiedTables); @@ -490,7 +495,7 @@ private void prepareAndSendSnapshotTask() { return; } - backupMeta = new BackupMeta(copiedTables, copiedResources); + backupMeta = new BackupMeta(copiedTables, copiedResources, copiedStoragePolicys); // send tasks for (AgentTask task : batchTask.getAllTasks()) { @@ -604,7 +609,8 @@ private void checkResourceForOdbcTable(OdbcTable odbcTable) { } private void prepareBackupMetaForOlapTableWithoutLock(TableRef tableRef, OlapTable olapTable, - List
copiedTables) { + List
copiedTables, + List copiedStoragePolicys) { // only copy visible indexes List reservedPartitions = tableRef.getPartitionNames() == null ? null : tableRef.getPartitionNames().getPartitionNames(); @@ -616,6 +622,23 @@ private void prepareBackupMetaForOlapTableWithoutLock(TableRef tableRef, OlapTab removeUnsupportProperties(copiedTbl); copiedTables.add(copiedTbl); + + PartitionInfo partitionInfo = olapTable.getPartitionInfo(); + // classify a table's all partitions by storage policy + for (Long partitionId : olapTable.getPartitionIds()) { + String policyName = partitionInfo.getDataProperty(partitionId).getStoragePolicy(); + if (StringUtils.isEmpty(policyName)) { + continue; + } + + StoragePolicy checkedPolicyCondition = StoragePolicy.ofCheck(policyName); + StoragePolicy storagePolicy = (StoragePolicy) Env.getCurrentEnv().getPolicyMgr() + .getPolicy(checkedPolicyCondition); + + if (storagePolicy != null && !copiedStoragePolicys.contains(storagePolicy)) { + copiedStoragePolicys.add(storagePolicy); + } + } } private void prepareBackupMetaForViewWithoutLock(View view, List
copiedTables) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java index b918cddef56691..cfbc685b2488f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java @@ -28,6 +28,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Resource; +import org.apache.doris.catalog.S3Resource; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.Tablet; @@ -38,6 +39,7 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.policy.StoragePolicy; import org.apache.doris.thrift.TNetworkAddress; import com.google.common.base.Joiner; @@ -330,6 +332,10 @@ public static class BriefBackupJobInfo { public List odbcTableList = Lists.newArrayList(); @SerializedName("odbc_resource_list") public List odbcResourceList = Lists.newArrayList(); + @SerializedName("s3_resource_list") + public List s3ResourceList = Lists.newArrayList(); + @SerializedName("storage_policy_list") + public List storagePolicyList = Lists.newArrayList(); public static BriefBackupJobInfo fromBackupJobInfo(BackupJobInfo backupJobInfo) { BriefBackupJobInfo briefBackupJobInfo = new BriefBackupJobInfo(); @@ -347,6 +353,8 @@ public static BriefBackupJobInfo fromBackupJobInfo(BackupJobInfo backupJobInfo) briefBackupJobInfo.viewList = backupJobInfo.newBackupObjects.views; briefBackupJobInfo.odbcTableList = backupJobInfo.newBackupObjects.odbcTables; briefBackupJobInfo.odbcResourceList = backupJobInfo.newBackupObjects.odbcResources; + briefBackupJobInfo.s3ResourceList = backupJobInfo.newBackupObjects.s3Resources; + briefBackupJobInfo.storagePolicyList = backupJobInfo.newBackupObjects.storagePolicies; return briefBackupJobInfo; } } @@ -365,6 +373,10 @@ public static class NewBackupObjects { public List odbcTables = Lists.newArrayList(); @SerializedName("odbc_resources") public List odbcResources = Lists.newArrayList(); + @SerializedName("s3_resources") + public List s3Resources = Lists.newArrayList(); + @SerializedName("storage_policy") + public List storagePolicies = Lists.newArrayList(); } public static class BackupOlapTableInfo { @@ -483,6 +495,16 @@ public static class BackupOdbcResourceInfo { public String name; } + public static class BackupS3ResourceInfo { + @SerializedName("name") + public String name; + } + + public static class BackupStoragePolicyInfo { + @SerializedName("name") + public String name; + } + // eg: __db_10001/__tbl_10002/__part_10003/__idx_10002/__10004 public String getFilePath(String db, String tbl, String part, String idx, long tabletId) { if (!db.equalsIgnoreCase(dbName)) { @@ -674,6 +696,21 @@ public static BackupJobInfo fromCatalog(long backupTime, String label, String db backupOdbcResourceInfo.name = odbcCatalogResource.getName(); jobInfo.newBackupObjects.odbcResources.add(backupOdbcResourceInfo); } + + if (resource instanceof S3Resource) { + S3Resource s3Resource = (S3Resource) resource; + BackupS3ResourceInfo backupS3ResourceInfo = new BackupS3ResourceInfo(); + backupS3ResourceInfo.name = s3Resource.getName(); + jobInfo.newBackupObjects.s3Resources.add(backupS3ResourceInfo); + } + } + + // storage policies + Collection storagePolicies = backupMeta.getStoragePolicyNameMap().values(); + for (StoragePolicy storagePolicy : storagePolicies) { + BackupStoragePolicyInfo backupStoragePolicyInfo = new BackupStoragePolicyInfo(); + backupStoragePolicyInfo.name = storagePolicy.getName(); + jobInfo.newBackupObjects.storagePolicies.add(backupStoragePolicyInfo); } return jobInfo; diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java index 6a973ea45a2221..7acad89a7fa74e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java @@ -17,11 +17,13 @@ package org.apache.doris.backup; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.Table; import org.apache.doris.common.io.Writable; import org.apache.doris.meta.MetaContext; import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.policy.StoragePolicy; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; @@ -50,11 +52,14 @@ public class BackupMeta implements Writable { // resource name -> resource @SerializedName(value = "resourceNameMap") private Map resourceNameMap = Maps.newHashMap(); + // storagePolicy name -> resource + @SerializedName(value = "storagePolicyNameMap") + private Map storagePolicyNameMap = Maps.newHashMap(); private BackupMeta() { } - public BackupMeta(List
tables, List resources) { + public BackupMeta(List
tables, List resources, List storagePolicys) { for (Table table : tables) { tblNameMap.put(table.getName(), table); tblIdMap.put(table.getId(), table); @@ -62,6 +67,21 @@ public BackupMeta(List
tables, List resources) { for (Resource resource : resources) { resourceNameMap.put(resource.getName(), resource); } + + for (StoragePolicy policy : storagePolicys) { + storagePolicyNameMap.put(policy.getName(), policy); + + if (resourceNameMap.get(policy.getStorageResource()) != null) { + continue; + } + Resource resource = Env.getCurrentEnv().getResourceMgr() + .getResource(policy.getStorageResource()); + Resource copiedResource = resource.clone(); + if (copiedResource == null) { + continue; + } + resourceNameMap.put(policy.getStorageResource(), copiedResource); + } } public Map getTables() { @@ -72,6 +92,10 @@ public Map getResourceNameMap() { return resourceNameMap; } + public Map getStoragePolicyNameMap() { + return storagePolicyNameMap; + } + public Table getTable(String tblName) { return tblNameMap.get(tblName); } @@ -80,6 +104,10 @@ public Resource getResource(String resourceName) { return resourceNameMap.get(resourceName); } + public StoragePolicy getStoragePolicy(String policyName) { + return storagePolicyNameMap.get(policyName); + } + public Table getTable(Long tblId) { return tblIdMap.get(tblId); } @@ -130,6 +158,10 @@ public void write(DataOutput out) throws IOException { for (Resource resource : resourceNameMap.values()) { resource.write(out); } + out.writeInt(storagePolicyNameMap.size()); + for (StoragePolicy storagePolicy : storagePolicyNameMap.values()) { + storagePolicy.write(out); + } } public void readFields(DataInput in) throws IOException { @@ -144,6 +176,11 @@ public void readFields(DataInput in) throws IOException { Resource resource = Resource.read(in); resourceNameMap.put(resource.getName(), resource); } + size = in.readInt(); + for (int i = 0; i < size; i++) { + StoragePolicy policy = StoragePolicy.read(in); + storagePolicyNameMap.put(policy.getName(), policy); + } } public String toJson() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 1db289dbaa9cb7..23997f621ebfbd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -47,6 +47,7 @@ import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.ResourceMgr; +import org.apache.doris.catalog.S3Resource; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.Tablet; @@ -66,6 +67,11 @@ import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.property.S3ClientBEProperties; +import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.policy.Policy; +import org.apache.doris.policy.PolicyMgr; +import org.apache.doris.policy.PolicyTypeEnum; +import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.Tag; import org.apache.doris.task.AgentBatchTask; import org.apache.doris.task.AgentTask; @@ -94,6 +100,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Table.Cell; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -107,6 +114,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -121,6 +129,9 @@ public class RestoreJob extends AbstractJob { private static final String PROP_CLEAN_TABLES = RestoreStmt.PROP_CLEAN_TABLES; private static final String PROP_CLEAN_PARTITIONS = RestoreStmt.PROP_CLEAN_PARTITIONS; private static final String PROP_ATOMIC_RESTORE = RestoreStmt.PROP_ATOMIC_RESTORE; + private static final String PROP_STORAGE_RESOURCE = RestoreStmt.PROP_STORAGE_RESOURCE; + private static final String PROP_RESERVE_STORAGE_POLICY = RestoreStmt.PROP_RESERVE_STORAGE_POLICY; + private static final String ATOMIC_RESTORE_TABLE_PREFIX = "__doris_atomic_restore_prefix__"; private static final Logger LOG = LogManager.getLogger(RestoreJob.class); @@ -167,6 +178,7 @@ public enum RestoreJobState { private List> restoredPartitions = Lists.newArrayList(); private List
restoredTbls = Lists.newArrayList(); private List restoredResources = Lists.newArrayList(); + private List storagePolicies = Lists.newArrayList(); // save all restored partitions' version info which are already exist in catalog // table id -> partition id -> (version, version hash) @@ -193,7 +205,10 @@ public enum RestoreJobState { private boolean isCleanPartitions = false; // Whether to restore the data into a temp table, and then replace the origin one. private boolean isAtomicRestore = false; - + // the target storage resource + private String storageResource = ""; + // whether to reserve storage policy + private boolean reserveStoragePolicy = false; // restore properties private Map properties = Maps.newHashMap(); @@ -211,7 +226,8 @@ public RestoreJob(JobType jobType) { public RestoreJob(String label, String backupTs, long dbId, String dbName, BackupJobInfo jobInfo, boolean allowLoad, ReplicaAllocation replicaAlloc, long timeoutMs, int metaVersion, boolean reserveReplica, boolean reserveDynamicPartitionEnable, boolean isBeingSynced, boolean isCleanTables, - boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId) { + boolean isCleanPartitions, boolean isAtomicRestore, String storageResource, + boolean reserveStoragePolicy, Env env, long repoId) { super(JobType.RESTORE, label, dbId, dbName, timeoutMs, env, repoId); this.backupTimestamp = backupTs; this.jobInfo = jobInfo; @@ -229,21 +245,26 @@ public RestoreJob(String label, String backupTs, long dbId, String dbName, Backu this.isCleanTables = isCleanTables; this.isCleanPartitions = isCleanPartitions; this.isAtomicRestore = isAtomicRestore; + this.storageResource = storageResource; + this.reserveStoragePolicy = reserveStoragePolicy; properties.put(PROP_RESERVE_REPLICA, String.valueOf(reserveReplica)); properties.put(PROP_RESERVE_DYNAMIC_PARTITION_ENABLE, String.valueOf(reserveDynamicPartitionEnable)); properties.put(PROP_IS_BEING_SYNCED, String.valueOf(isBeingSynced)); properties.put(PROP_CLEAN_TABLES, String.valueOf(isCleanTables)); properties.put(PROP_CLEAN_PARTITIONS, String.valueOf(isCleanPartitions)); properties.put(PROP_ATOMIC_RESTORE, String.valueOf(isAtomicRestore)); + properties.put(PROP_STORAGE_RESOURCE, storageResource); + properties.put(PROP_RESERVE_STORAGE_POLICY, String.valueOf(reserveStoragePolicy)); } public RestoreJob(String label, String backupTs, long dbId, String dbName, BackupJobInfo jobInfo, boolean allowLoad, ReplicaAllocation replicaAlloc, long timeoutMs, int metaVersion, boolean reserveReplica, boolean reserveDynamicPartitionEnable, boolean isBeingSynced, boolean isCleanTables, - boolean isCleanPartitions, boolean isAtomicRestore, Env env, long repoId, BackupMeta backupMeta) { + boolean isCleanPartitions, boolean isAtomicRestore, String storageResource, + boolean reserveStoragePolicy, Env env, long repoId, BackupMeta backupMeta) { this(label, backupTs, dbId, dbName, jobInfo, allowLoad, replicaAlloc, timeoutMs, metaVersion, reserveReplica, - reserveDynamicPartitionEnable, isBeingSynced, isCleanTables, isCleanPartitions, isAtomicRestore, env, - repoId); + reserveDynamicPartitionEnable, isBeingSynced, isCleanTables, isCleanPartitions, isAtomicRestore, + storageResource, reserveStoragePolicy, env, repoId); this.backupMeta = backupMeta; } @@ -624,6 +645,32 @@ private void checkAndPrepareMeta() { } } + for (BackupJobInfo.BackupS3ResourceInfo backupS3ResourceInfo : jobInfo.newBackupObjects.s3Resources) { + Resource resource = Env.getCurrentEnv().getResourceMgr().getResource(StringUtils.isNotEmpty(storageResource) + ? storageResource : backupS3ResourceInfo.name); + if (resource == null) { + continue; + } + if (resource.getType() != Resource.ResourceType.S3) { + status = new Status(ErrCode.COMMON_ERROR, + "The local resource " + resource.getName() + + " with the same name but a different type of backup meta."); + return; + } + } + + for (BackupJobInfo.BackupStoragePolicyInfo backupStoragePolicyInfo : jobInfo.newBackupObjects.storagePolicies) { + String backupStoragePoliceName = backupStoragePolicyInfo.name; + Optional localPolicy = Env.getCurrentEnv().getPolicyMgr().findPolicy(backupStoragePoliceName, + PolicyTypeEnum.STORAGE); + if (localPolicy.isPresent() && localPolicy.get().getType() != PolicyTypeEnum.STORAGE) { + status = new Status(ErrCode.COMMON_ERROR, + "The local policy " + backupStoragePoliceName + + " with the same name but a different type of backup meta."); + return; + } + } + // the new tablets -> { local tablet, schema hash, storage medium }, used in atomic restore. Map tabletBases = new HashMap<>(); @@ -680,6 +727,16 @@ private void checkAndPrepareMeta() { BackupPartitionInfo backupPartInfo = partitionEntry.getValue(); Partition localPartition = localOlapTbl.getPartition(partitionName); Partition remotePartition = remoteOlapTbl.getPartition(partitionName); + + String policyName = remoteOlapTbl.getPartitionInfo() + .getDataProperty(remotePartition.getId()).getStoragePolicy(); + if (StringUtils.isNotEmpty(policyName)) { + status = new Status(ErrCode.COMMON_ERROR, "Can't restore remote partition " + + partitionName + " in table " + remoteTbl.getName() + " with storage policy " + + policyName + " when local table " + localTbl.getName() + " exist." + + " Please drop old table and restore again."); + return; + } if (localPartition != null) { // Partition already exist. PartitionInfo localPartInfo = localOlapTbl.getPartitionInfo(); @@ -766,7 +823,8 @@ private void checkAndPrepareMeta() { // reset all ids in this table String srcDbName = jobInfo.dbName; - Status st = remoteOlapTbl.resetIdsForRestore(env, db, replicaAlloc, reserveReplica, srcDbName); + Status st = remoteOlapTbl.resetIdsForRestore(env, db, replicaAlloc, + reserveReplica, reserveStoragePolicy, srcDbName); if (!st.ok()) { status = st; return; @@ -855,6 +913,18 @@ private void checkAndPrepareMeta() { if (isAtomicRestore && !restoredPartitions.isEmpty()) { throw new RuntimeException("atomic restore is set, but the restored partitions is not empty"); } + + // check and restore resources + checkAndRestoreResources(); + if (!status.ok()) { + return; + } + // check and restore storage policies, should before createReplicas to get storage_policy_id + checkAndRestoreStoragePolicies(); + if (!status.ok()) { + return; + } + for (Pair entry : restoredPartitions) { OlapTable localTbl = (OlapTable) db.getTableNullable(entry.first); Preconditions.checkNotNull(localTbl, localTbl.getName()); @@ -908,11 +978,6 @@ private void checkAndPrepareMeta() { db.readUnlock(); } - // check and restore resources - checkAndRestoreResources(); - if (!status.ok()) { - return; - } LOG.debug("finished to restore resources. {}", this.jobId); // Send create replica task to BE outside the db lock @@ -1206,7 +1271,7 @@ private void checkAndRestoreResources() { } else { try { // restore resource - resourceMgr.createResource(remoteOdbcResource, false); + resourceMgr.createResource(remoteOdbcResource); } catch (DdlException e) { status = new Status(ErrCode.COMMON_ERROR, e.getMessage()); return; @@ -1214,6 +1279,107 @@ private void checkAndRestoreResources() { restoredResources.add(remoteOdbcResource); } } + + if (!reserveStoragePolicy) { + return; + } + + for (BackupJobInfo.BackupS3ResourceInfo backupS3ResourceInfo : jobInfo.newBackupObjects.s3Resources) { + String backupResourceName = backupS3ResourceInfo.name; + Resource localResource = resourceMgr.getResource(StringUtils.isNotEmpty(storageResource) + ? storageResource : backupResourceName); + S3Resource remoteS3Resource = (S3Resource) backupMeta.getResource(backupResourceName); + + if (StringUtils.isNotEmpty(storageResource)) { + if (localResource != null) { + if (localResource.getType() != Resource.ResourceType.S3) { + status = new Status(ErrCode.COMMON_ERROR, "The type of local resource " + + backupResourceName + " is not same as restored resource"); + return; + } + S3Resource localS3Resource = (S3Resource) localResource; + if (localS3Resource.getProperty(S3Properties.ENDPOINT) + .equals(remoteS3Resource.getProperty(S3Properties.ENDPOINT)) + && localS3Resource.getProperty(S3Properties.BUCKET) + .equals(remoteS3Resource.getProperty(S3Properties.BUCKET)) + && localS3Resource.getProperty(S3Properties.ROOT_PATH) + .equals(remoteS3Resource.getProperty(S3Properties.ROOT_PATH))) { + status = new Status(ErrCode.COMMON_ERROR, "local S3 resource " + + storageResource + " root path " + localS3Resource.getProperty(S3Properties.ROOT_PATH) + + " should not same as restored resource root path"); + return; + } + } else { + status = new Status(ErrCode.COMMON_ERROR, + "The local resource " + storageResource + " is not exist."); + return; + } + } else { + if (localResource != null) { + if (localResource.getType() != Resource.ResourceType.S3) { + status = new Status(ErrCode.COMMON_ERROR, "The type of local resource " + + backupResourceName + " is not same as restored resource"); + return; + } + S3Resource localS3Resource = (S3Resource) localResource; + if (localS3Resource.getSignature(BackupHandler.SIGNATURE_VERSION) + != remoteS3Resource.getSignature(BackupHandler.SIGNATURE_VERSION)) { + status = new Status(ErrCode.COMMON_ERROR, "S3 resource " + + jobInfo.getAliasByOriginNameIfSet(backupResourceName) + + " already exist but with different properties"); + return; + } + } else { + try { + // restore resource + resourceMgr.createResource(remoteS3Resource); + } catch (DdlException e) { + status = new Status(ErrCode.COMMON_ERROR, e.getMessage()); + return; + } + restoredResources.add(remoteS3Resource); + } + } + } + } + + private void checkAndRestoreStoragePolicies() { + if (!reserveStoragePolicy) { + return; + } + PolicyMgr policyMgr = Env.getCurrentEnv().getPolicyMgr(); + for (BackupJobInfo.BackupStoragePolicyInfo backupStoragePolicyInfo : jobInfo.newBackupObjects.storagePolicies) { + String backupStoragePoliceName = backupStoragePolicyInfo.name; + Optional localPolicy = policyMgr.findPolicy(backupStoragePoliceName, + PolicyTypeEnum.STORAGE); + StoragePolicy backupStoargePolicy = backupMeta.getStoragePolicy(backupStoragePoliceName); + + // use specified storageResource + if (StringUtils.isNotEmpty(storageResource)) { + backupStoargePolicy.setStorageResource(storageResource); + } + if (localPolicy.isPresent()) { + StoragePolicy localStoargePolicy = (StoragePolicy) localPolicy.get(); + // storage policy name and resource name should be same + if (localStoargePolicy.getSignature(BackupHandler.SIGNATURE_VERSION) + != backupStoargePolicy.getSignature(BackupHandler.SIGNATURE_VERSION)) { + status = new Status(ErrCode.COMMON_ERROR, "Storage policy " + + jobInfo.getAliasByOriginNameIfSet(backupStoragePoliceName) + + " already exist but with different properties"); + return; + } + + } else { + // restore storage policy + try { + policyMgr.replayCreate(backupStoargePolicy); + Env.getCurrentEnv().getEditLog().logCreatePolicy(backupStoargePolicy); + } catch (Exception e) { + LOG.error("restore user property fail should not happen", e); + } + storagePolicies.add(backupStoargePolicy); + } + } } private boolean genFileMappingWhenBackupReplicasEqual(PartitionInfo localPartInfo, Partition localPartition, @@ -1272,6 +1438,11 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc Env.getCurrentInvertedIndex().addTablet(restoreTablet.getId(), tabletMeta); for (Replica restoreReplica : restoreTablet.getReplicas()) { Env.getCurrentInvertedIndex().addReplica(restoreTablet.getId(), restoreReplica); + String storagePolicy = ""; + if (reserveStoragePolicy) { + storagePolicy = localTbl.getPartitionInfo() + .getDataProperty(restorePart.getId()).getStoragePolicy(); + } CreateReplicaTask task = new CreateReplicaTask(restoreReplica.getBackendId(), dbId, localTbl.getId(), restorePart.getId(), restoredIdx.getId(), restoreTablet.getId(), restoreReplica.getId(), indexMeta.getShortKeyColumnCount(), @@ -1284,7 +1455,8 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc localTbl.getPartitionInfo().getTabletType(restorePart.getId()), null, localTbl.getCompressionType(), - localTbl.getEnableUniqueKeyMergeOnWrite(), localTbl.getStoragePolicy(), + localTbl.getEnableUniqueKeyMergeOnWrite(), + storagePolicy, localTbl.disableAutoCompaction(), localTbl.enableSingleReplicaCompaction(), localTbl.skipWriteIndexOnLoad(), @@ -1302,7 +1474,8 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc if (baseTabletRef != null) { // ensure this replica is bound to the same backend disk as the origin table's replica. task.setBaseTablet(baseTabletRef.tabletId, baseTabletRef.schemaHash); - LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}", + LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}," + + " storage_policy={}", baseTabletRef.tabletId, restoreReplica.getId(), jobId, restoreTablet.getId()); } batchTask.addTask(task); @@ -1485,6 +1658,22 @@ private void replayCheckAndPrepareMeta() { } } + // restored resource + ResourceMgr resourceMgr = Env.getCurrentEnv().getResourceMgr(); + for (Resource resource : restoredResources) { + resourceMgr.replayCreateResource(resource); + } + + // restored storage policy + PolicyMgr policyMgr = Env.getCurrentEnv().getPolicyMgr(); + for (StoragePolicy storagePolicy : storagePolicies) { + Optional localPolicy = policyMgr.findPolicy(storagePolicy.getPolicyName(), + PolicyTypeEnum.STORAGE); + if (!localPolicy.isPresent()) { + policyMgr.replayCreate(storagePolicy); + } + } + // restored partitions for (Pair entry : restoredPartitions) { OlapTable localTbl = (OlapTable) db.getTableNullable(entry.first); @@ -1556,12 +1745,6 @@ private void replayCheckAndPrepareMeta() { } } - // restored resource - ResourceMgr resourceMgr = Env.getCurrentEnv().getResourceMgr(); - for (Resource resource : restoredResources) { - resourceMgr.replayCreateResource(resource); - } - LOG.info("replay check and prepare meta. {}", this); } @@ -2034,6 +2217,7 @@ private Status allTabletCommitted(boolean isReplay) { restoredPartitions.clear(); restoredTbls.clear(); restoredResources.clear(); + storagePolicies.clear(); // release snapshot before clearing snapshotInfos releaseSnapshots(); @@ -2289,6 +2473,13 @@ private void cancelInternal(boolean isReplay) { LOG.info("remove restored resource when cancelled: {}", resource.getName()); resourceMgr.dropResource(resource); } + + // remove restored storage policy + PolicyMgr policyMgr = Env.getCurrentEnv().getPolicyMgr(); + for (StoragePolicy storagePolicy : storagePolicies) { + LOG.info("remove restored storage polciy when cancelled: {}", storagePolicy.getName()); + policyMgr.replayDrop(storagePolicy); + } } if (!isReplay) { @@ -2546,6 +2737,11 @@ private void writeOthers(DataOutput out) throws IOException { resource.write(out); } + out.writeInt(storagePolicies.size()); + for (StoragePolicy policy : storagePolicies) { + policy.write(out); + } + // write properties out.writeInt(properties.size()); for (Map.Entry entry : properties.entrySet()) { @@ -2644,6 +2840,12 @@ private void readOthers(DataInput in) throws IOException { restoredResources.add(Resource.read(in)); } + // restored storage policy + size = in.readInt(); + for (int i = 0; i < size; i++) { + storagePolicies.add(StoragePolicy.read(in)); + } + // read properties size = in.readInt(); for (int i = 0; i < size; i++) { @@ -2657,6 +2859,8 @@ private void readOthers(DataInput in) throws IOException { isCleanTables = Boolean.parseBoolean(properties.get(PROP_CLEAN_TABLES)); isCleanPartitions = Boolean.parseBoolean(properties.get(PROP_CLEAN_PARTITIONS)); isAtomicRestore = Boolean.parseBoolean(properties.get(PROP_ATOMIC_RESTORE)); + storageResource = properties.get(PROP_STORAGE_RESOURCE); + reserveStoragePolicy = Boolean.parseBoolean(properties.get(PROP_RESERVE_STORAGE_POLICY)); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 9b95b1b20ec6ec..68b8e8aaa92750 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -565,7 +565,7 @@ public void resetVersionForRestore() { } public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restoreReplicaAlloc, - boolean reserveReplica, String srcDbName) { + boolean reserveReplica, boolean reserveStoragePolicy, String srcDbName) { // ATTN: The meta of the restore may come from different clusters, so the // original ID in the meta may conflict with the ID of the new cluster. For // example, if a newly allocated ID happens to be the same as an original ID, @@ -614,7 +614,7 @@ public Status resetIdsForRestore(Env env, Database db, ReplicaAllocation restore boolean isSinglePartition = partitionInfo.getType() != PartitionType.RANGE && partitionInfo.getType() != PartitionType.LIST; partitionInfo.resetPartitionIdForRestore(partitionMap, - reserveReplica ? null : restoreReplicaAlloc, isSinglePartition); + reserveReplica ? null : restoreReplicaAlloc, reserveStoragePolicy, isSinglePartition); // for each partition, reset rollup index map Map nextIndexs = Maps.newHashMap(); @@ -1682,7 +1682,7 @@ public OlapTable selectiveCopy(Collection reservedPartitions, IndexExtSt // set storage medium to HDD for backup job, because we want that the backuped table // can be able to restored to another Doris cluster without SSD disk. // But for other operation such as truncate table, keep the origin storage medium. - copied.getPartitionInfo().setDataProperty(partition.getId(), new DataProperty(TStorageMedium.HDD)); + copied.getPartitionInfo().getDataProperty(partition.getId()).setStorageMedium(TStorageMedium.HDD); } for (MaterializedIndex idx : partition.getMaterializedIndices(extState)) { idx.setState(IndexState.NORMAL); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java index 8cf1d664f5293c..8b4a2c78ec2900 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java @@ -351,7 +351,7 @@ public void moveFromTempToFormal(long tempPartitionId) { public void resetPartitionIdForRestore( Map partitionIdMap, - ReplicaAllocation restoreReplicaAlloc, boolean isSinglePartitioned) { + ReplicaAllocation restoreReplicaAlloc, boolean reserveStoragePolicy, boolean isSinglePartitioned) { Map origIdToDataProperty = idToDataProperty; Map origIdToReplicaAllocation = idToReplicaAllocation; Map origIdToItem = idToItem; @@ -364,7 +364,8 @@ public void resetPartitionIdForRestore( idToStoragePolicy = Maps.newHashMap(); for (Map.Entry entry : partitionIdMap.entrySet()) { - idToDataProperty.put(entry.getKey(), origIdToDataProperty.get(entry.getValue())); + idToDataProperty.put(entry.getKey(), reserveStoragePolicy + ? origIdToDataProperty.get(entry.getValue()) : DataProperty.DEFAULT_HDD_DATA_PROPERTY); idToReplicaAllocation.put(entry.getKey(), restoreReplicaAlloc == null ? origIdToReplicaAllocation.get(entry.getValue()) : restoreReplicaAlloc); @@ -372,7 +373,8 @@ public void resetPartitionIdForRestore( idToItem.put(entry.getKey(), origIdToItem.get(entry.getValue())); } idToInMemory.put(entry.getKey(), origIdToInMemory.get(entry.getValue())); - idToStoragePolicy.put(entry.getKey(), origIdToStoragePolicy.get(entry.getValue())); + idToStoragePolicy.put(entry.getKey(), reserveStoragePolicy + ? origIdToStoragePolicy.get(entry.getValue()) : ""); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java index b6631ab519b474..c33007b2a0a838 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ResourceMgr.java @@ -81,6 +81,13 @@ public void createResource(CreateResourceStmt stmt) throws DdlException { } } + public void createResource(Resource resource) throws DdlException { + if (createResource(resource, false)) { + Env.getCurrentEnv().getEditLog().logCreateResource(resource); + LOG.info("Create resource success. Resource: {}", resource.getName()); + } + } + // Return true if the resource is truly added, // otherwise, return false or throw exception. public boolean createResource(Resource resource, boolean ifNotExists) throws DdlException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java index 1dd09cbf981621..677f6d370032e3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java @@ -34,11 +34,13 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.UnsupportedEncodingException; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.zip.Adler32; /** * S3 resource @@ -250,4 +252,39 @@ protected void getProcNodeData(BaseProcResult result) { } readUnlock(); } + + public int getSignature(int signatureVersion) { + Adler32 adler32 = new Adler32(); + adler32.update(signatureVersion); + final String charsetName = "UTF-8"; + + try { + // table name + adler32.update(name.getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. view name: {}", name); + } + // type + adler32.update(type.name().getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. view type: {}", type.name()); + } + // configs + for (Map.Entry config : properties.entrySet()) { + adler32.update(config.getKey().getBytes(charsetName)); + adler32.update(config.getValue().getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. view config: {}", config); + } + } + } catch (UnsupportedEncodingException e) { + LOG.error("encoding error", e); + return -1; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("signature: {}", Math.abs((int) adler32.getValue())); + } + return Math.abs((int) adler32.getValue()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/Policy.java b/fe/fe-core/src/main/java/org/apache/doris/policy/Policy.java index b06cd19d0cf8c2..b1052ce2e1cb6c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/Policy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/Policy.java @@ -102,6 +102,10 @@ public Policy(long id, final PolicyTypeEnum type, final String policyName) { this.version = 0; } + public String getName() { + return policyName; + } + /** * Trans stmt to Policy. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java index 7ca5d4fcbeac76..344c333c4966d9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java @@ -247,7 +247,7 @@ public List getCopiedPoliciesByType(PolicyTypeEnum policyType) { } } - private List getPoliciesByType(PolicyTypeEnum policyType) { + public List getPoliciesByType(PolicyTypeEnum policyType) { if (typeToPolicyMap == null) { return new ArrayList<>(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/StoragePolicy.java b/fe/fe-core/src/main/java/org/apache/doris/policy/StoragePolicy.java index e18495c50d0fd5..72c99484dd279c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/StoragePolicy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/StoragePolicy.java @@ -24,8 +24,12 @@ import org.apache.doris.catalog.ScalarType; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.persist.gson.GsonPostProcessable; +import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ShowResultSetMetaData; import com.google.common.base.Strings; @@ -36,18 +40,22 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.DataInput; +import java.io.DataOutput; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.time.LocalDateTime; import java.time.format.DateTimeParseException; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.zip.Adler32; /** * Save policy for storage migration. **/ @Data -public class StoragePolicy extends Policy { +public class StoragePolicy extends Policy implements Writable, GsonPostProcessable { public static final String DEFAULT_STORAGE_POLICY_NAME = "default_storage_policy"; public static boolean checkDefaultStoragePolicyValid(final String storagePolicyName, Optional defaultPolicy) @@ -381,4 +389,51 @@ public boolean removeResourceReference() { } return false; } + + @Override + public void write(DataOutput out) throws IOException { + String json = GsonUtils.GSON.toJson(this); + Text.writeString(out, json); + } + + /** + * Read Policy from file. + **/ + public static StoragePolicy read(DataInput in) throws IOException { + String json = Text.readString(in); + return GsonUtils.GSON.fromJson(json, StoragePolicy.class); + } + + public int getSignature(int signatureVersion) { + Adler32 adler32 = new Adler32(); + adler32.update(signatureVersion); + final String charsetName = "UTF-8"; + + //ignore check id, version, cooldownTimestampMs, cooldownTtl + try { + // policy name + adler32.update(policyName.getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. policy name: {}", policyName); + } + // storageResource name + adler32.update(storageResource.getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. storageResource name: {}", storageResource); + } + // type + adler32.update(String.valueOf(getType()).getBytes(charsetName)); + if (LOG.isDebugEnabled()) { + LOG.debug("signature. type : {}", getType()); + } + } catch (UnsupportedEncodingException e) { + LOG.error("encoding error", e); + return -1; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("signature: {}", Math.abs((int) adler32.getValue())); + } + return Math.abs((int) adler32.getValue()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index d83ff7e08156d1..b9e4be8e118527 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -29,6 +29,8 @@ import org.apache.doris.analysis.TableRef; import org.apache.doris.analysis.TypeDef; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.backup.AbstractJob; +import org.apache.doris.backup.BackupJob; import org.apache.doris.backup.Snapshot; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; @@ -277,6 +279,19 @@ public TConfirmUnusedRemoteFilesResult confirmUnusedRemoteFiles(TConfirmUnusedRe LOG.warn("tablet {} not found", info.tablet_id); return; } + + List jobs = Env.getCurrentEnv().getBackupHandler() + .getJobs(tabletMeta.getDbId(), label -> true); + + List runningBackupJobs = jobs.stream().filter(job -> job instanceof BackupJob) + .filter(job -> !((BackupJob) job).isDone()) + .map(job -> (BackupJob) job).collect(Collectors.toList()); + + if (runningBackupJobs.size() > 0) { + LOG.warn("Backup is running on this tablet {} ", info.tablet_id); + return; + } + Tablet tablet; int replicaNum; try { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java index 97e689b697256c..5258aea0815524 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupHandlerTest.java @@ -43,6 +43,7 @@ import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.persist.EditLog; +import org.apache.doris.policy.StoragePolicy; import org.apache.doris.task.DirMoveTask; import org.apache.doris.task.DownloadTask; import org.apache.doris.task.SnapshotTask; @@ -212,7 +213,8 @@ public Status getSnapshotInfoFile(String label, String backupTimestamp, List tbls = Lists.newArrayList(); tbls.add(tbl); List resources = Lists.newArrayList(); - BackupMeta backupMeta = new BackupMeta(tbls, resources); + List storagePolicys = Lists.newArrayList(); + BackupMeta backupMeta = new BackupMeta(tbls, resources, storagePolicys); Map snapshotInfos = Maps.newHashMap(); for (Partition part : tbl.getPartitions()) { for (MaterializedIndex idx : part.getMaterializedIndices(IndexExtState.VISIBLE)) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index 7e8e55eea327c4..4904f26ca7f4a4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -42,6 +42,7 @@ import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.persist.EditLog; +import org.apache.doris.policy.StoragePolicy; import org.apache.doris.resource.Tag; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TStorageMedium; @@ -251,13 +252,14 @@ boolean await(long timeout, TimeUnit unit) { db.dropTable(expectedRestoreTbl.getName()); job = new RestoreJob(label, "2018-01-01 01:01:01", db.getId(), db.getFullName(), jobInfo, false, - new ReplicaAllocation((short) 3), 100000, -1, false, false, false, false, false, false, + new ReplicaAllocation((short) 3), 100000, -1, false, false, false, false, false, false, null, false, env, repo.getId()); List
tbls = Lists.newArrayList(); List resources = Lists.newArrayList(); + List storagePolicies = Lists.newArrayList(); tbls.add(expectedRestoreTbl); - backupMeta = new BackupMeta(tbls, resources); + backupMeta = new BackupMeta(tbls, resources, storagePolicies); } @Test diff --git a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy new file mode 100644 index 00000000000000..11d890a20ef019 --- /dev/null +++ b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy @@ -0,0 +1,1384 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_backup_cooldown", "backup_cooldown_data") { + + String suiteName = "test_backup_cooldown" + String resource_name1 = "resource_${suiteName}_1" + String policy_name1 = "policy_${suiteName}_1" + String resource_name2 = "resource_${suiteName}_2" + String policy_name2 = "policy_${suiteName}_2" + String dbName = "${suiteName}_db" + String tableName = "${suiteName}_table" + String snapshotName = "${suiteName}_snapshot" + String repoName = "${suiteName}_repo" + + def syncer = getSyncer() + syncer.createS3Repository(repoName) + + + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name1}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown1", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name2}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown2", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name1} + PROPERTIES( + "storage_resource" = "${resource_name1}", + "cooldown_ttl" = "10" + ) + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name2} + PROPERTIES( + "storage_resource" = "${resource_name2}", + "cooldown_ttl" = "10" + ) + """ + + //generate_cooldown_task_interval_sec default is 20 + + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + + sql """ + CREATE TABLE ${dbName}.${tableName} + ( + k1 BIGINT, + v1 VARCHAR(48), + INDEX idx1 (v1) USING INVERTED PROPERTIES("parser" = "english") + ) + DUPLICATE KEY(k1) + PARTITION BY RANGE(`k1`) + ( + PARTITION p201701 VALUES [(0), (3)) ("storage_policy" = "${policy_name1}"), + PARTITION `p201702` VALUES LESS THAN (6)("storage_policy" = "${policy_name2}"), + PARTITION `p2018` VALUES [(6),(100)) + ) + DISTRIBUTED BY HASH (k1) BUCKETS 3 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + List values = [] + for (int i = 1; i <= 10; ++i) { + values.add("(${i}, ${i})") + } + sql "INSERT INTO ${dbName}.${tableName} VALUES ${values.join(",")}" + def result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + int count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + + + } + + assertNotEquals('0.000 ', result[0][5].toString()) + + sql """ + BACKUP SNAPSHOT ${dbName}.${snapshotName} + TO `${repoName}` + ON (${tableName}) + """ + + syncer.waitSnapshotFinish(dbName) + + def snapshot = syncer.getSnapshotTimestamp(repoName, snapshotName) + assertTrue(snapshot != null) + + sql "DROP TABLE ${dbName}.${tableName}" + + sql """ + drop storage policy ${policy_name1}; + """ + + sql """ + drop resource ${resource_name1}; + """ + + sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + drop resource ${resource_name2}; + """ + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + + + } + + //cleanup + sql "DROP TABLE ${dbName}.${tableName} FORCE" + sql "DROP DATABASE ${dbName} FORCE" + sql "DROP REPOSITORY `${repoName}`" + + sql """ + drop storage policy ${policy_name1}; + """ + + sql """ + drop resource ${resource_name1}; + """ + + sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + drop resource ${resource_name2}; + """ +} + +// test restore back to old instance +suite("test_backup_cooldown_1", "backup_cooldown_data") { + + String suiteName = "test_backup_cooldown_1" + String resource_name1 = "resource_${suiteName}_1" + String policy_name1 = "policy_${suiteName}_1" + String resource_name2 = "resource_${suiteName}_2" + String policy_name2 = "policy_${suiteName}_2" + String dbName = "${suiteName}_db" + String tableName = "${suiteName}_table" + String snapshotName = "${suiteName}_snapshot" + String repoName = "${suiteName}_repo" + def found = 0 + def records + def result + def row + + def syncer = getSyncer() + syncer.createS3Repository(repoName) + + + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name1}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown1", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name2}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown2", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name1} + PROPERTIES( + "storage_resource" = "${resource_name1}", + "cooldown_ttl" = "10" + ) + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name2} + PROPERTIES( + "storage_resource" = "${resource_name2}", + "cooldown_ttl" = "10" + ) + """ + + //generate_cooldown_task_interval_sec default is 20 + + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + + sql """ + CREATE TABLE ${dbName}.${tableName} + ( + k1 BIGINT, + v1 VARCHAR(48), + INDEX idx1 (v1) USING INVERTED PROPERTIES("parser" = "english") + ) + DUPLICATE KEY(k1) + PARTITION BY RANGE(`k1`) + ( + PARTITION p201701 VALUES [(0), (3)) ("storage_policy" = "${policy_name1}"), + PARTITION `p201702` VALUES LESS THAN (6)("storage_policy" = "${policy_name2}"), + PARTITION `p2018` VALUES [(6),(100)) + ) + DISTRIBUTED BY HASH (k1) BUCKETS 3 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + List values = [] + for (int i = 1; i <= 10; ++i) { + values.add("(${i}, ${i})") + } + sql "INSERT INTO ${dbName}.${tableName} VALUES ${values.join(",")}" + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + int count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + + assertNotEquals('0.000 ', result[0][5].toString()) + + sql """ + BACKUP SNAPSHOT ${dbName}.${snapshotName} + TO `${repoName}` + ON (${tableName}) + """ + + syncer.waitSnapshotFinish(dbName) + + def snapshot = syncer.getSnapshotTimestamp(repoName, snapshotName) + assertTrue(snapshot != null) + + // 1 老表存在的情况 + // 1.1 restore 不指定。预期失败, 不支持将冷热属性的表恢复到已存在的表中。 + // 1.2 restore 指定 ("reserve_storage_policy"="true"), 预期失败, 不支持将冷热属性的表恢复到已存在的表中。 + // 1.3 restore 指定 ("reserve_storage_policy"="false"), 预期成功,且不落冷 + + // 2 删除老表 + // 1.1 restore 不指定 预期成功,且落冷 + // 1.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 + // 1.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + + + // 3 删除resource 和 policy + // 1.1 restore 不指定 预期成功,且落冷 + // 1.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 + // 1.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + + + // 1. old table exist + // 1.1 restore normal fail + // 1.2 restore with("reserve_storage_policy"="true") fail + // 1.3 restore with("reserve_storage_policy"="false") success and don't cooldown + logger.info(" ====================================== 1.1 ==================================== ") + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Can't restore remote partition")) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + logger.info(" ====================================== 1.2 ==================================== ") + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Can't restore remote partition")) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + logger.info(" ====================================== 1.3 ==================================== ") + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="false" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Can't restore remote partition")) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + // 2. drop old table + // 2.1 restore normal success and cooldown + // 2.2 restore with ("reserve_storage_policy"="true")success and cooldown + // 2.3 restore with ("reserve_storage_policy"="false")success and don't cooldown + logger.info(" ====================================== 2.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + + logger.info(" ====================================== 2.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + + logger.info(" ====================================== 2.3 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="false" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // check table don't have storage_policy + records = sql_return_maparray "show storage policy using" + found = 0 + for (def res2 : records) { + if (res2.Database.equals(dbName) && res2.Table.equals(tableName)) { + found = 1 + break + } + } + assertEquals(found, 0) + + + // 3. drop old table and resource and policy + // 3.1 restore normal success and cooldown + // 3.2 restore with("reserve_storage_policy"="true")success and cooldown + // 3.3 restore with("reserve_storage_policy"="false")success and don't cooldown + logger.info(" ====================================== 3.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + + logger.info(" ====================================== 3.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + + logger.info(" ====================================== 3.3 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="false" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // check table don't have storage_policy + records = sql_return_maparray "show storage policy using" + found = 0 + for (def res2 : records) { + if (res2.Database.equals(dbName) && res2.Table.equals(tableName)) { + found = 1 + break + } + } + assertEquals(found, 0) + + // check storage policy ${policy_name1} not exist + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.PolicyName.equals(policy_name1)) { + found = 1 + break + } + } + assertEquals(found, 0) + + // check resource ${resource_name1} not exist + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.Name.equals(resource_name1)) { + found = 1 + break + } + } + assertEquals(found, 0) + + + // 4. alter policy and success + logger.info(" ====================================== 4.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + ALTER STORAGE POLICY ${policy_name2} PROPERTIES ("cooldown_ttl" = "11"); + """ + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + // check storage policy ${policy_name2} exist + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.PolicyName.equals(policy_name2) && res2.CooldownTtl.equals("11")) { + found = 1 + break + } + } + assertEquals(found, 1) + + + + //cleanup + sql "DROP TABLE IF EXISTS ${dbName}.${tableName} FORCE" + sql "DROP DATABASE ${dbName} FORCE" + sql "DROP REPOSITORY `${repoName}`" + + sql """ + drop storage policy ${policy_name1}; + """ + + sql """ + drop resource ${resource_name1}; + """ + + sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + drop resource ${resource_name2}; + """ +} + + + + + +// test restore back to a new instance +suite("test_backup_cooldown_2", "backup_cooldown_data") { + + String suiteName = "test_backup_cooldown_2" + String resource_name1 = "resource_${suiteName}_1" + String policy_name1 = "policy_${suiteName}_1" + String resource_name2 = "resource_${suiteName}_2" + String resource_new_name = "resource_${suiteName}_new" + String policy_name2 = "policy_${suiteName}_2" + String dbName = "${suiteName}_db" + String tableName = "${suiteName}_table" + String snapshotName = "${suiteName}_snapshot" + String repoName = "${suiteName}_repo" + def found = 0 + def records + def syncer = getSyncer() + def result + syncer.createS3Repository(repoName) + + + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name1}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown1", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name2}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown2", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_new_name}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown3", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name1} + PROPERTIES( + "storage_resource" = "${resource_name1}", + "cooldown_ttl" = "10" + ) + """ + + sql """ + CREATE STORAGE POLICY IF NOT EXISTS ${policy_name2} + PROPERTIES( + "storage_resource" = "${resource_name2}", + "cooldown_ttl" = "10" + ) + """ + + //generate_cooldown_task_interval_sec default is 20 + + sql "CREATE DATABASE IF NOT EXISTS ${dbName}" + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + + sql """ + CREATE TABLE ${dbName}.${tableName} + ( + k1 BIGINT, + v1 VARCHAR(48), + INDEX idx1 (v1) USING INVERTED PROPERTIES("parser" = "english") + ) + DUPLICATE KEY(k1) + PARTITION BY RANGE(`k1`) + ( + PARTITION p201701 VALUES [(0), (3)) ("storage_policy" = "${policy_name1}"), + PARTITION `p201702` VALUES LESS THAN (6)("storage_policy" = "${policy_name2}"), + PARTITION `p2018` VALUES [(6),(100)) + ) + DISTRIBUTED BY HASH (k1) BUCKETS 3 + PROPERTIES( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + List values = [] + for (int i = 1; i <= 10; ++i) { + values.add("(${i}, ${i})") + } + sql "INSERT INTO ${dbName}.${tableName} VALUES ${values.join(",")}" + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + int count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + sql """ + BACKUP SNAPSHOT ${dbName}.${snapshotName} + TO `${repoName}` + ON (${tableName}) + """ + + syncer.waitSnapshotFinish(dbName) + + def snapshot = syncer.getSnapshotTimestamp(repoName, snapshotName) + assertTrue(snapshot != null) + + // 1 老表存在的情况 + // 1.1 restore 指定 ("storage_resource"="resource_name_exist"), 预期失败,不支持将冷热属性的表恢复到已存在的表中。 + // 1.2 restore 指定 ("storage_resource"="resource_name_not_exist"), 预期失败,resource不存在 + // 1.3 restore 指定 ("storage_resource"="resource_new_name"), 预期失败,不支持将冷热属性的表恢复到已存在的表中。 + + + // 2 删除表 + // 2.1 restore 指定 ("storage_resource"="resource_name_exist"), 预期失败,resource路径需不一致 + // 2.2 restore 指定 ("storage_resource"="resource_name_not_exist"), 预期失败,resource不存在 + // 2.3 restore 指定 ("storage_resource"="resource_new_name"), storage policy 存在失败 + + + // 3 删除表和policy + // 3.1 restore 指定 ("storage_resource"="resource_name_not_exist"), 预期失败,resource不存在 + // 3.2 restore 指定 ("storage_resource"="resource_new_name"), 成功 + + + + // 4 删除表和policy 同时指定storage_resource和reserve_storage_policy + // 4.1 restore 指定 ("storage_resource"="resource_name_not_exist", "reserve_storage_policy"="true"), 预期失败,resource不存在 + // 4.2 restore 指定 ("storage_resource"="resource_name_not_exist", "reserve_storage_policy"="false"), 预期失败,resource不存在 + // 4.3 restore 指定 ("storage_resource"="resource_new_name", "reserve_storage_policy"="true"), 预期成功,且落冷 + // 4.4 restore 指定 ("storage_resource"="resource_new_name", "reserve_storage_policy"="false"), 预期成功,且不落冷 + + + + + + // 1 old table exist + // 1.1 restore with ("storage_resource"="resource_name1") fail + // 1.2 restore with ("storage_resource"="resource_name_not_exist") fail + // 1.3 restore with ("storage_resource"="resource_new_name") fail + logger.info(" ====================================== 1.1 ==================================== ") + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_name1}" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Can't restore remote partition")) + + + + + logger.info(" ====================================== 1.2 ==================================== ") + def fail_restore_1 = try_sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="resource_name_not_exist" + ) + """ + + logger.info("fail_restore_1: ${fail_restore_1}") + + assertEquals(fail_restore_1, null) + + logger.info(" ====================================== 1.3 ==================================== ") + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_new_name}" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("Can't restore remote partition")) + + + + + // 2 drop old table + // 2.1 restore with ("storage_resource"="resource_name_exist")fail + // 2.2 restore with ("storage_resource"="resource_name_not_exist") fail + // 2.3 restore with ("storage_resource"="resource_new_name")fail + logger.info(" ====================================== 2.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_name1}" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("should not same as restored resource root path")) + + + + + logger.info(" ====================================== 2.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + def fail_restore_2 = try_sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="resource_name_not_exist" + ) + """ + + assertEquals(fail_restore_2, null) + + + logger.info(" ====================================== 2.3 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_new_name}" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("already exist but with different properties")) + + // 3 drop table and resource and policy + // 3.1 restore with ("storage_resource"="resource_name_not_exist") fail + // 3.2 restore with ("storage_resource"="resource_new_name") success and cooldown + logger.info(" ====================================== 3.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + def fail_restore_3 = try_sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="resource_name_not_exist" + ) + """ + + assertEquals(fail_restore_3, null) + + logger.info(" ====================================== 3.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_new_name}" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + // check plocy_name1 storage_resource change to resource_new_name + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.StorageResource.equals(resource_new_name) && res2.PolicyName.equals(policy_name1)) { + found = 1 + break + } + } + assertEquals(found, 1) + + // check plocy_name2 storage_resource change to resource_new_name + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.StorageResource.equals(resource_new_name) && res2.PolicyName.equals(policy_name2)) { + found = 1 + break + } + } + assertEquals(found, 1) + + + + // 4 drop table/resource/policy, set both storage_resource and reserve_storage_policy + // 4.1 restore with ("storage_resource"="resource_name_not_exist", "reserve_storage_policy"="true") fail + // 4.2 restore with ("storage_resource"="resource_name_not_exist", "reserve_storage_policy"="false") fail + // 4.3 restore with ("storage_resource"="resource_new_name", "reserve_storage_policy"="true") success and cooldown + // 4.4 restore with ("storage_resource"="resource_new_name", "reserve_storage_policy"="false") success and don't cooldown + logger.info(" ====================================== 4.1 ==================================== ") + sql "DROP TABLE if exists ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + def fail_restore_4 = try_sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="resource_name_not_exist", + "reserve_storage_policy"="true" + ) + """ + + assertEquals(fail_restore_4, null) + + + logger.info(" ====================================== 4.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + + def fail_restore_5 = try_sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="resource_name_not_exist", + "reserve_storage_policy"="false" + ) + """ + + assertEquals(fail_restore_5, null) + + + logger.info(" ====================================== 4.3 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_new_name}", + "reserve_storage_policy"="true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // check plocy_name1 storage_resource change to resource_new_name + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.StorageResource.equals(resource_new_name) && res2.PolicyName.equals(policy_name1)) { + found = 1 + break + } + } + assertEquals(found, 1) + + // check plocy_name2 storage_resource change to resource_new_name + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.StorageResource.equals(resource_new_name) && res2.PolicyName.equals(policy_name2)) { + found = 1 + break + } + } + assertEquals(found, 1) + + // wait cooldown + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + count = 0; + while (sqlResult.contains("0.00")) { + if (++count >= 120) { // 10min + logger.error('cooldown task is timeouted') + throw new Exception("cooldown task is timeouted after 10 mins") + } + Thread.sleep(5000) + + result = sql "show data FROM ${dbName}.${tableName}" + sqlResult = result[0][5].toString(); + } + assertNotEquals('0.000 ', result[0][5].toString()) + + + + logger.info(" ====================================== 4.4 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop storage policy ${policy_name2}; + """ + + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "storage_resource"="${resource_new_name}", + "reserve_storage_policy"="false" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + + // check table don't have storage_policy + records = sql_return_maparray "show storage policy using" + found = 0 + for (def res2 : records) { + if (res2.Database.equals(dbName) && res2.Table.equals(tableName)) { + found = 1 + break + } + } + assertEquals(found, 0) + + + //cleanup + sql "DROP TABLE IF EXISTS ${dbName}.${tableName} FORCE" + sql "DROP DATABASE ${dbName} FORCE" + sql "DROP REPOSITORY `${repoName}`" + + try_sql """ + drop storage policy ${policy_name1}; + """ + + try_sql """ + drop resource ${resource_name1}; + """ + + try_sql """ + drop storage policy ${policy_name2}; + """ + + try_sql """ + drop resource ${resource_name2}; + """ +} \ No newline at end of file From 1cf5f7ca21625e1d1a01cded8ce935959ac10ebe Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Tue, 12 Nov 2024 14:07:37 +0800 Subject: [PATCH 02/10] fix inverted index --- be/src/olap/rowset/beta_rowset.cpp | 41 ++++++------------- be/src/olap/rowset/rowset_meta.h | 2 +- be/src/olap/tablet_schema.h | 8 ++++ be/src/runtime/snapshot_loader.cpp | 2 +- .../apache/doris/policy/DropPolicyLog.java | 5 +++ .../org/apache/doris/policy/PolicyMgr.java | 5 +++ 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 35e394cfcdb5bc..0815d1f83eb63e 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -392,40 +392,23 @@ Status BetaRowset::download(io::RemoteFileSystem* fs, const std::string& dir) { linked_success_files.push_back(local_seg_path); - if (_schema->get_inverted_index_storage_format() != InvertedIndexStorageFormatPB::V1) { - if (_schema->has_inverted_index()) { - std::string inverted_index_src_file = - InvertedIndexDescriptor::get_index_file_name(remote_seg_path); - - std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(local_seg_path); - - RETURN_IF_ERROR( - fs->download(inverted_index_src_file, inverted_index_dst_file_path)); - - linked_success_files.push_back(inverted_index_dst_file_path); + for (const auto& index : _schema->indexes()) { + if (index.index_type() != IndexType::INVERTED) { + continue; } - } else { - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); - std::string inverted_index_src_file = InvertedIndexDescriptor::get_index_file_name( - remote_seg_path, index_id, index.get_index_suffix()); + auto index_id = index.index_id(); + std::string inverted_index_src_file = + InvertedIndexDescriptor::get_index_file_name(remote_seg_path, index_id); - std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(local_seg_path, index_id, - index.get_index_suffix()); + std::string inverted_index_dst_file_path = + InvertedIndexDescriptor::get_index_file_name(local_seg_path, index_id); - RETURN_IF_ERROR( - fs->download(inverted_index_src_file, inverted_index_dst_file_path)); + RETURN_IF_ERROR(fs->download(inverted_index_src_file, inverted_index_dst_file_path)); - linked_success_files.push_back(inverted_index_dst_file_path); - LOG(INFO) << "success to download. from=" << inverted_index_src_file << ", " - << "to=" << inverted_index_dst_file_path; - } + linked_success_files.push_back(inverted_index_dst_file_path); + LOG(INFO) << "success to download. from=" << inverted_index_src_file << ", " + << "to=" << inverted_index_dst_file_path; } } diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index df526dbd321962..d600a7771b3a19 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -107,7 +107,7 @@ class RowsetMeta { _fs = std::move(fs); } - void clear_resource_id(); + void clear_resource_id() { _rowset_meta_pb.clear_resource_id(); } const std::string& resource_id() const { return _rowset_meta_pb.resource_id(); } diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 2ffbfb0ffe8ee3..216812eb594485 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -269,6 +269,14 @@ class TabletSchema { segment_v2::CompressionTypePB compression_type() const { return _compression_type; } const std::vector& indexes() const { return _indexes; } + bool has_inverted_index() const { + for (const auto& index : _indexes) { + if (index.index_type() == IndexType::INVERTED) { + return true; + } + } + return false; + } std::vector get_indexes_for_column(int32_t col_unique_id) const; bool has_inverted_index(int32_t col_unique_id) const; bool has_inverted_index_with_index_id(int64_t index_id) const; diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 89eb378b6372d3..b0017e61102f78 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -138,7 +138,7 @@ static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs, std::string md5sum; RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(local_seg_path, &md5sum)); - RETURN_IF_ERROR(upload_with_checksum(dest_fs, local_seg_path, dest_seg_path, md5sum)); + RETURN_IF_ERROR(dest_fs.upload_with_checksum(local_seg_path, dest_seg_path, md5sum)); //delete local file RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(local_seg_path)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/DropPolicyLog.java b/fe/fe-core/src/main/java/org/apache/doris/policy/DropPolicyLog.java index 9b58e5b4d99512..88537630dd87f7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/DropPolicyLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/DropPolicyLog.java @@ -61,6 +61,11 @@ public class DropPolicyLog implements Writable { @SerializedName(value = "roleName") private String roleName; + public DropPolicyLog(PolicyTypeEnum type, String policyName) { + this.type = type; + this.policyName = policyName; + } + /** * Generate delete logs through stmt. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java index 344c333c4966d9..0d3ccc1bc31a68 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java @@ -275,6 +275,11 @@ private void unprotectedAdd(Policy policy) { } + public void replayDrop(StoragePolicy policy) { + DropPolicyLog log = new DropPolicyLog(policy.getType(), policy.getPolicyName()); + replayDrop(log); + } + public void replayDrop(DropPolicyLog log) { unprotectedDrop(log); LOG.info("replay drop policy log: {}", log); From 881a3834cec5be333c820aff9acb0242901c3156 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Wed, 13 Nov 2024 21:59:24 +0800 Subject: [PATCH 03/10] update test_backup_restore_cold_data.groovy --- .../java/org/apache/doris/backup/RestoreJob.java | 3 +-- .../java/org/apache/doris/catalog/S3Resource.java | 4 +++- .../test_backup_restore_cold_data.groovy | 12 ++++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 23997f621ebfbd..d3f33285d4e7a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1474,8 +1474,7 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc if (baseTabletRef != null) { // ensure this replica is bound to the same backend disk as the origin table's replica. task.setBaseTablet(baseTabletRef.tabletId, baseTabletRef.schemaHash); - LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}," + - " storage_policy={}", + LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={},", baseTabletRef.tabletId, restoreReplica.getId(), jobId, restoreTablet.getId()); } batchTask.addTask(task); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java index 677f6d370032e3..5275d4888b6055 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java @@ -128,7 +128,9 @@ private static boolean pingS3(CloudCredentialWithEndpoint credential, String buc Map propertiesPing = new HashMap<>(); propertiesPing.put(S3Properties.Env.ACCESS_KEY, credential.getAccessKey()); propertiesPing.put(S3Properties.Env.SECRET_KEY, credential.getSecretKey()); - propertiesPing.put(S3Properties.Env.TOKEN, credential.getSessionToken()); + if (credential.getSessionToken() != null) { + propertiesPing.put(S3Properties.Env.TOKEN, credential.getSessionToken()); + } propertiesPing.put(S3Properties.Env.ENDPOINT, credential.getEndpoint()); propertiesPing.put(S3Properties.Env.REGION, credential.getRegion()); propertiesPing.put(PropertyConverter.USE_PATH_STYLE, diff --git a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy index 11d890a20ef019..8c41e2126768a5 100644 --- a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy +++ b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy @@ -377,15 +377,15 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { // 1.3 restore 指定 ("reserve_storage_policy"="false"), 预期成功,且不落冷 // 2 删除老表 - // 1.1 restore 不指定 预期成功,且落冷 - // 1.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 - // 1.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + // 2.1 restore 不指定 预期成功,且落冷 + // 2.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 + // 2.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 // 3 删除resource 和 policy - // 1.1 restore 不指定 预期成功,且落冷 - // 1.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 - // 1.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + // 2.1 restore 不指定 预期成功,且落冷 + // 2.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 + // 2.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 // 1. old table exist From 98fbda19be112ea5a9d242bec02ee58bf24a1221 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Mon, 18 Nov 2024 16:28:18 +0800 Subject: [PATCH 04/10] use new storage policy id --- .../org/apache/doris/backup/RestoreJob.java | 3 +-- .../org/apache/doris/policy/PolicyMgr.java | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index d3f33285d4e7a8..152e2811aa8d64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1372,8 +1372,7 @@ private void checkAndRestoreStoragePolicies() { } else { // restore storage policy try { - policyMgr.replayCreate(backupStoargePolicy); - Env.getCurrentEnv().getEditLog().logCreatePolicy(backupStoargePolicy); + policyMgr.createStoragePolicy(backupStoargePolicy); } catch (Exception e) { LOG.error("restore user property fail should not happen", e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java index 0d3ccc1bc31a68..1d7a1061a57e4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java @@ -117,6 +117,24 @@ public void createDefaultStoragePolicy() { LOG.info("Create default storage success."); } + /** + * Create policy through StoragePolicy. + **/ + public void createStoragePolicy(StoragePolicy storagePolicy) throws UserException { + Map pros = Maps.newConcurrentMap(); + if (storagePolicy.getCooldownTimestampMs() != -1) { + pros.put(StoragePolicy.COOLDOWN_DATETIME, String.valueOf(storagePolicy.getCooldownTimestampMs())); + } + if (storagePolicy.getCooldownTtl() != -1) { + pros.put(StoragePolicy.COOLDOWN_TTL, String.valueOf(storagePolicy.getCooldownTtl())); + } + pros.put(StoragePolicy.STORAGE_RESOURCE, storagePolicy.getStorageResource()); + + CreatePolicyStmt stmt = new CreatePolicyStmt(storagePolicy.getType(), true, + storagePolicy.getPolicyName(), pros); + createPolicy(stmt); + } + /** * Create policy through stmt. **/ From 8acf68e1badd3186db98f9ce07a0fa2f5b15a144 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Mon, 18 Nov 2024 16:35:30 +0800 Subject: [PATCH 05/10] update FeMetaVersion.VERSION_130 --- .../java/org/apache/doris/common/FeMetaVersion.java | 5 ++++- .../main/java/org/apache/doris/backup/BackupMeta.java | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java index b1e42d343adf0e..0a016d3595a2b3 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java @@ -67,8 +67,11 @@ public final class FeMetaVersion { // For AnalysisInfo public static final int VERSION_123 = 123; + // For BackupMeta storage policy + public static final int VERSION_124 = 124; + // note: when increment meta version, should assign the latest version to VERSION_CURRENT - public static final int VERSION_CURRENT = VERSION_123; + public static final int VERSION_CURRENT = VERSION_124; // all logs meta version should >= the minimum version, so that we could remove many if clause, for example // if (FE_METAVERSION < VERSION_94) ... diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java index 7acad89a7fa74e..9a2f4e37a23138 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.Table; +import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Writable; import org.apache.doris.meta.MetaContext; import org.apache.doris.persist.gson.GsonUtils; @@ -176,10 +177,12 @@ public void readFields(DataInput in) throws IOException { Resource resource = Resource.read(in); resourceNameMap.put(resource.getName(), resource); } - size = in.readInt(); - for (int i = 0; i < size; i++) { - StoragePolicy policy = StoragePolicy.read(in); - storagePolicyNameMap.put(policy.getName(), policy); + if (Env.getCurrentEnvJournalVersion() >= FeMetaVersion.VERSION_124) { + size = in.readInt(); + for (int i = 0; i < size; i++) { + StoragePolicy policy = StoragePolicy.read(in); + storagePolicyNameMap.put(policy.getName(), policy); + } } } From c65ea6252f7cb7e0504df88612356bc36bbb5eeb Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Fri, 29 Nov 2024 09:36:53 +0800 Subject: [PATCH 06/10] fix cooldown_meta_id remove remote_file_info --- be/src/olap/olap_define.h | 1 - be/src/olap/rowset/beta_rowset.cpp | 60 ------ be/src/olap/rowset/beta_rowset.h | 2 - be/src/olap/rowset/rowset.h | 4 - be/src/olap/single_replica_compaction.cpp | 2 +- be/src/olap/snapshot_manager.cpp | 77 ++----- be/src/olap/snapshot_manager.h | 2 +- be/src/olap/tablet.cpp | 12 -- be/src/olap/tablet.h | 2 - be/src/olap/tablet_meta.cpp | 9 - be/src/olap/tablet_meta.h | 2 - be/src/olap/task/engine_clone_task.cpp | 2 +- .../task/engine_storage_migration_task.cpp | 2 +- be/src/runtime/snapshot_loader.cpp | 79 +++----- .../org/apache/doris/backup/BackupJob.java | 9 + .../org/apache/doris/backup/BackupMeta.java | 7 +- .../org/apache/doris/backup/RestoreJob.java | 11 +- .../apache/doris/catalog/PartitionInfo.java | 4 +- .../doris/service/FrontendServiceImpl.java | 1 + .../test_backup_restore_cold_data.groovy | 188 +++++++++++++++--- 20 files changed, 225 insertions(+), 251 deletions(-) diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index ee6c012cfd3088..e0e1d919a5048d 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -97,7 +97,6 @@ static const std::string ERROR_LOG_PREFIX = "error_log"; static const std::string PENDING_DELTA_PREFIX = "pending_delta"; static const std::string INCREMENTAL_DELTA_PREFIX = "incremental_delta"; static const std::string CLONE_PREFIX = "clone"; -static const std::string REMOTE_FILE_INFO = "remote_file_info"; // define paths static inline std::string remote_tablet_path(int64_t tablet_id) { diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 0815d1f83eb63e..d07b0b2254c123 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -355,66 +355,6 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row return Status::OK(); } -Status BetaRowset::download(io::RemoteFileSystem* fs, const std::string& dir) { - if (is_local()) { - DCHECK(false) << _rowset_meta->tablet_id() << ' ' << rowset_id(); - return Status::InternalError("should be remote rowset. tablet_id={} rowset_id={}", - _rowset_meta->tablet_id(), rowset_id().to_string()); - } - - if (num_segments() < 1) { - return Status::OK(); - } - - Status status; - std::vector linked_success_files; - Defer remove_linked_files {[&]() { // clear download files if errors happen - if (!status.ok()) { - LOG(WARNING) << "will delete download success files due to error " << status; - std::vector paths; - for (auto& file : linked_success_files) { - paths.emplace_back(file); - LOG(WARNING) << "will delete download success file " << file << " due to error"; - } - static_cast(fs->batch_delete(paths)); - LOG(WARNING) << "done delete download success files due to error " << status; - } - }}; - - for (int i = 0; i < num_segments(); ++i) { - // Note: Here we use relative path for remote. - auto remote_seg_path = - remote_segment_path(_rowset_meta->tablet_id(), rowset_id().to_string(), i); - - auto local_seg_path = segment_file_path(dir, rowset_id(), i); - - RETURN_IF_ERROR(fs->download(remote_seg_path, local_seg_path)); - - linked_success_files.push_back(local_seg_path); - - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - - auto index_id = index.index_id(); - std::string inverted_index_src_file = - InvertedIndexDescriptor::get_index_file_name(remote_seg_path, index_id); - - std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(local_seg_path, index_id); - - RETURN_IF_ERROR(fs->download(inverted_index_src_file, inverted_index_dst_file_path)); - - linked_success_files.push_back(inverted_index_dst_file_path); - LOG(INFO) << "success to download. from=" << inverted_index_src_file << ", " - << "to=" << inverted_index_dst_file_path; - } - } - - return Status::OK(); -} - Status BetaRowset::upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) { DCHECK(is_local()); if (num_segments() < 1) { diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 1c2e1051aa4038..ed30f76d45033b 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -76,8 +76,6 @@ class BetaRowset final : public Rowset { Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override; - Status download(io::RemoteFileSystem* fs, const std::string& dir) override; - Status upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) override; // only applicable to alpha rowset, no op here diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 62d6759de97ae5..a7617779014132 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -213,10 +213,6 @@ class Rowset : public std::enable_shared_from_this { // copy all files to `dir` virtual Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) = 0; - virtual Status download(io::RemoteFileSystem* fs, const std::string& dir) { - return Status::OK(); - } - virtual Status upload_to(io::RemoteFileSystem* dest_fs, const RowsetId& new_rowset_id) { return Status::OK(); } diff --git a/be/src/olap/single_replica_compaction.cpp b/be/src/olap/single_replica_compaction.cpp index 269a968488600b..5c5ff5a1121cb4 100644 --- a/be/src/olap/single_replica_compaction.cpp +++ b/be/src/olap/single_replica_compaction.cpp @@ -322,7 +322,7 @@ Status SingleReplicaCompaction::_fetch_rowset(const TReplicaInfo& addr, const st // change all rowset ids because they maybe its id same with local rowset auto olap_st = SnapshotManager::instance()->convert_rowset_ids( local_path, _tablet->tablet_id(), _tablet->replica_id(), _tablet->table_id(), - _tablet->partition_id(), _tablet->schema_hash(), 0); + _tablet->partition_id(), _tablet->schema_hash(), false, 0); if (!olap_st.ok()) { LOG(WARNING) << "fail to convert rowset ids, path=" << local_path << ", tablet_id=" << _tablet->tablet_id() << ", error=" << olap_st; diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 2eb39ff0827669..2af6cee6b5253d 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -148,7 +148,7 @@ Status SnapshotManager::release_snapshot(const string& snapshot_path) { Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t tablet_id, int64_t replica_id, int64_t table_id, int64_t partition_id, const int32_t& schema_hash, - int64_t storage_policy_id) { + bool is_restore, int64_t storage_policy_id) { SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); Status res = Status::OK(); // check clone dir existed @@ -183,7 +183,10 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t new_tablet_meta_pb.set_tablet_id(tablet_id); *new_tablet_meta_pb.mutable_tablet_uid() = TabletUid::gen_uid().to_proto(); new_tablet_meta_pb.set_replica_id(replica_id); - new_tablet_meta_pb.set_storage_policy_id(storage_policy_id); + if (is_restore) { + new_tablet_meta_pb.set_storage_policy_id(storage_policy_id); + new_tablet_meta_pb.clear_cooldown_meta_id(); + } if (table_id > 0) { new_tablet_meta_pb.set_table_id(table_id); } @@ -215,6 +218,9 @@ Status SnapshotManager::convert_rowset_ids(const std::string& clone_dir, int64_t } else { // remote rowset *rowset_meta = visible_rowset; + if (is_restore) { + rowset_meta->clear_resource_id(); + } } rowset_meta->set_tablet_id(tablet_id); @@ -496,7 +502,6 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet "missed version is a cooldowned rowset, must make full " "snapshot. missed_version={}, tablet_id={}", missed_version, ref_tablet->tablet_id()); - //todozy break; } consistent_rowsets.push_back(rowset); @@ -525,11 +530,8 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (!is_single_rowset_clone && (!res.ok() || request.missing_version.empty())) { if (!request.__isset.missing_version && ref_tablet->tablet_meta()->cooldown_meta_id().initialized()) { - LOG(WARNING) << "currently not support backup tablet with cooldowned remote " - "data. tablet=" - << request.tablet_id; - // return Status::NotSupported( - // "currently not support backup tablet with cooldowned remote data"); + LOG(INFO) << "Backup tablet with cooldowned remote data. tablet=" + << request.tablet_id; } /// not all missing versions are found, fall back to full snapshot. res = Status::OK(); // reset res @@ -602,10 +604,6 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet } std::vector rs_metas; - RowsetMetaSharedPtr rsm; - bool have_remote_file = false; - io::FileWriterPtr file_writer; - for (auto& rs : consistent_rowsets) { if (rs->is_local()) { // local rowset @@ -613,56 +611,12 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (!res.ok()) { break; } - rsm = rs->rowset_meta(); - } else { - std::string rowset_meta_str; - RowsetMetaPB rs_meta_pb; - rs->rowset_meta()->to_rowset_pb(&rs_meta_pb); - rs_meta_pb.SerializeToString(&rowset_meta_str); - - RowsetMetaSharedPtr rowset_meta(new RowsetMeta()); - rowset_meta->init(rowset_meta_str); - - rsm = rowset_meta; - - // save_remote_file info - // tableid|storage_policy_id| - // rowset_id|num_segments|has_inverted_index| - // ...... - // rowset_id|num_segments|has_inverted_index - { - // write file - std::string delimeter = "|"; - - if (!have_remote_file) { - auto romote_file_info = - fmt::format("{}/{}", schema_full_path, REMOTE_FILE_INFO); - RETURN_IF_ERROR(io::global_local_filesystem()->create_file(romote_file_info, - &file_writer)); - RETURN_IF_ERROR(file_writer->append( - std::to_string(rs->rowset_meta()->tablet_id()))); - RETURN_IF_ERROR(file_writer->append(delimeter)); - RETURN_IF_ERROR(file_writer->append( - std::to_string(ref_tablet->tablet_meta()->storage_policy_id()))); - have_remote_file = true; - } - RETURN_IF_ERROR(file_writer->append(delimeter)); - RETURN_IF_ERROR(file_writer->append(rs->rowset_id().to_string())); - RETURN_IF_ERROR(file_writer->append(delimeter)); - RETURN_IF_ERROR(file_writer->append(std::to_string(rs->num_segments()))); - RETURN_IF_ERROR(file_writer->append(delimeter)); - RETURN_IF_ERROR(file_writer->append( - std::to_string(rs->tablet_schema()->has_inverted_index()))); - } } - rs_metas.push_back(rsm); + rs_metas.push_back(rs->rowset_meta()); VLOG_NOTICE << "add rowset meta to clone list. " - << " start version " << rsm->start_version() << " end version " - << rsm->end_version() << " empty " << rsm->empty(); - } - - if (have_remote_file) { - RETURN_IF_ERROR(file_writer->close()); + << " start version " << rs->rowset_meta()->start_version() + << " end version " << rs->rowset_meta()->end_version() << " empty " + << rs->rowset_meta()->empty(); } if (!res.ok()) { LOG(WARNING) << "fail to create hard link. path=" << snapshot_id_path @@ -680,9 +634,6 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet new_tablet_meta->revise_delete_bitmap_unlocked(delete_bitmap_snapshot); } - //clear cooldown meta - new_tablet_meta->revise_clear_resource_id(); - if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) { res = new_tablet_meta->save(header_path); if (res.ok() && request.__isset.is_copy_tablet_task && request.is_copy_tablet_task) { diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index 5e218db28f82e7..9b042277fde8c2 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -56,7 +56,7 @@ class SnapshotManager { Status convert_rowset_ids(const std::string& clone_dir, int64_t tablet_id, int64_t replica_id, int64_t table_id, int64_t partition_id, const int32_t& schema_hash, - int64_t storage_policy_id); + bool is_restore, int64_t storage_policy_id); private: SnapshotManager() : _snapshot_base_id(0) { diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 98ea3f3706af33..7c85e8238f8f36 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2231,18 +2231,6 @@ Status Tablet::cooldown() { return Status::OK(); } -Status Tablet::download(RowsetSharedPtr rowset, const std::string& dir) { - std::shared_ptr dest_fs; - RETURN_IF_ERROR(get_remote_file_system(storage_policy_id(), &dest_fs)); - Status st; - - if (st = rowset->download(dest_fs.get(), dir); !st.ok()) { - return st; - } - - return Status::OK(); -} - // hold SHARED `cooldown_conf_lock` Status Tablet::_cooldown_data() { DCHECK(_cooldown_replica_id == replica_id()); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index cc078938f9dcd0..5ca2248c5b5ac2 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -390,8 +390,6 @@ class Tablet : public BaseTablet { // Cooldown to remote fs. Status cooldown(); - Status download(RowsetSharedPtr rowset, const std::string& dir); - RowsetSharedPtr pick_cooldown_rowset(); bool need_cooldown(int64_t* cooldown_timestamp, size_t* file_size); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 2711534412b319..a5324c9a6edd2f 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -837,15 +837,6 @@ void TabletMeta::revise_rs_metas(std::vector&& rs_metas) { _stale_rs_metas.clear(); } -void TabletMeta::revise_clear_resource_id() { - for (auto rs : _rs_metas) { - rs->clear_resource_id(); - } - for (auto rs : _stale_rs_metas) { - rs->clear_resource_id(); - } -} - // This method should call after revise_rs_metas, since new rs_metas might be a subset // of original tablet, we should revise the delete_bitmap according to current rowset. // diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 01a3ddd2e7c73d..a79cdd272962da 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -187,8 +187,6 @@ class TabletMeta { void modify_rs_metas(const std::vector& to_add, const std::vector& to_delete, bool same_version = false); - - void revise_clear_resource_id(); void revise_rs_metas(std::vector&& rs_metas); void revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 6e5a15d9e3bb79..cc816b25308e68 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -407,7 +407,7 @@ Status EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir, // change all rowset ids because they maybe its id same with local rowset status = SnapshotManager::instance()->convert_rowset_ids( local_data_path, _clone_req.tablet_id, _clone_req.replica_id, - _clone_req.table_id, _clone_req.partition_id, _clone_req.schema_hash, 0); + _clone_req.table_id, _clone_req.partition_id, _clone_req.schema_hash, false, 0); } else { LOG_WARNING("failed to download snapshot from remote BE") .tag("url", _mask_token(remote_url_prefix)) diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 5033c67b2dc38f..c85109c9a084e1 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -158,7 +158,7 @@ Status EngineStorageMigrationTask::_gen_and_write_header_to_hdr_file( // rowset create time is useful when load tablet from meta to check which tablet is the tablet to load return SnapshotManager::instance()->convert_rowset_ids( full_path, tablet_id, _tablet->replica_id(), _tablet->table_id(), - _tablet->partition_id(), schema_hash, 0); + _tablet->partition_id(), schema_hash, false, 0); } Status EngineStorageMigrationTask::_reload_tablet(const std::string& full_path) { diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index b0017e61102f78..f650c067e1ab87 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -50,6 +50,7 @@ #include "io/fs/s3_file_system.h" #include "io/hdfs_builder.h" #include "olap/data_dir.h" +#include "olap/olap_define.h" #include "olap/snapshot_manager.h" #include "olap/storage_engine.h" #include "olap/storage_policy.h" @@ -148,15 +149,15 @@ static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs, static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet_id, const std::string& local_path, const std::string& dest_path, - io::RemoteFileSystem* cold_fs, const std::string& rowset, + io::RemoteFileSystem* cold_fs, const std::string& rowset_id, int segments, int have_inverted_index) { Status res = Status::OK(); for (int i = 0; i < segments; i++) { std::string remote_seg_path = - fmt::format("{}/{}_{}.dat", remote_tablet_path(tablet_id), rowset, i); - std::string local_seg_path = fmt::format("{}/{}_{}.dat", local_path, rowset, i); - std::string dest_seg_path = fmt::format("{}/{}_{}.dat", dest_path, rowset, i); + fmt::format("{}/{}_{}.dat", remote_tablet_path(tablet_id), rowset_id, i); + std::string local_seg_path = fmt::format("{}/{}_{}.dat", local_path, rowset_id, i); + std::string dest_seg_path = fmt::format("{}/{}_{}.dat", dest_path, rowset_id, i); RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs, remote_seg_path, local_seg_path, dest_seg_path)); @@ -167,8 +168,8 @@ static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet } std::vector remote_index_files; - RETURN_IF_ERROR(list_segment_inverted_index_file(cold_fs, remote_tablet_path(tablet_id), rowset, - &remote_index_files)); + RETURN_IF_ERROR(list_segment_inverted_index_file(cold_fs, remote_tablet_path(tablet_id), + rowset_id, &remote_index_files)); for (auto& index_file : remote_index_files) { std::string remote_index_path = @@ -182,53 +183,43 @@ static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet return res; } +/* + * get the cooldown data info from the hdr file, download the cooldown data and + * upload it to remote storage. +*/ static Status upload_remote_file(io::RemoteFileSystem& dest_fs, int64_t tablet_id, const std::string& local_path, const std::string& dest_path, - const std::string& remote_file) { - io::FileReaderSPtr file_reader; + const std::string& hdr_file) { Status res = Status::OK(); - std::string full_remote_path = local_path + '/' + remote_file; - RETURN_IF_ERROR(io::global_local_filesystem()->open_file(full_remote_path, &file_reader)); - size_t bytes_read = 0; - char* buff = (char*)malloc(file_reader->size() + 1); - RETURN_IF_ERROR(file_reader->read_at(0, {buff, file_reader->size()}, &bytes_read)); - string str(buff, file_reader->size()); - size_t start = 0; - string delimiter = "|"; - size_t end = str.find(delimiter); - int64_t tablet_id_tmp = std::stol(str.substr(start, end - start)); - start = end + delimiter.length(); - - if (tablet_id_tmp != tablet_id) { - return Status::InternalError("Invalid tablet {}", tablet_id_tmp); + auto tablet_meta = std::make_shared(); + res = tablet_meta->create_from_file(local_path + "/" + hdr_file); + if (!res.ok()) { + return Status::Error( + "fail to load tablet_meta. file_path={}", local_path + "/" + hdr_file); + } + + if (tablet_meta->tablet_id() != tablet_id) { + return Status::InternalError("Invalid tablet {}", tablet_meta->tablet_id()); } - end = str.find(delimiter, start); // - int64_t storage_policy_id = std::stol(str.substr(start, end - start)); - start = end + delimiter.length(); + if (!tablet_meta->cooldown_meta_id().initialized()) { + return res; + } string rowset_id; int segments; int have_inverted_index; std::shared_ptr colddata_fs; - RETURN_IF_ERROR(get_remote_file_system(storage_policy_id, &colddata_fs)); - - while (end != std::string::npos) { - end = str.find(delimiter, start); // - rowset_id = str.substr(start, end - start); - start = end + delimiter.length(); + RETURN_IF_ERROR(get_remote_file_system(tablet_meta->storage_policy_id(), &colddata_fs)); - end = str.find(delimiter, start); - segments = std::stoi(str.substr(start, end - start)); - start = end + delimiter.length(); + for (auto rowset_meta : tablet_meta->all_rs_metas()) { + rowset_id = rowset_meta->rowset_id().to_string(); + segments = rowset_meta->num_segments(); + have_inverted_index = rowset_meta->tablet_schema()->has_inverted_index(); - end = str.find(delimiter, start); - have_inverted_index = std::stoi(str.substr(start, end - start)); - start = end + delimiter.length(); - - if (segments > 0) { + if (segments > 0 && !rowset_meta->is_local()) { RETURN_IF_ERROR(upload_remote_rowset(dest_fs, tablet_id, local_path, dest_path, colddata_fs.get(), rowset_id, segments, have_inverted_index)); @@ -288,10 +279,9 @@ Status SnapshotLoader::upload(const std::map& src_to_d TTaskType::type::UPLOAD)); const std::string& local_file = *it; - if (local_file.compare("remote_file_info") == 0) { + if (_end_with(local_file, ".hdr")) { RETURN_IF_ERROR(upload_remote_file(*_remote_fs, tablet_id, src_path, dest_path, local_file)); - // continue; } // calc md5sum of localfile @@ -413,17 +403,12 @@ Status SnapshotLoader::download(const std::map& src_to const FileStat& file_stat = iter.second; auto find = std::find(local_files.begin(), local_files.end(), remote_file); if (find == local_files.end()) { - if (remote_file.compare(REMOTE_FILE_INFO) == 0) { - continue; - } // remote file does not exist in local, download it need_download = true; } else { if (_end_with(remote_file, ".hdr")) { // this is a header file, download it. need_download = true; - } else if (remote_file.compare(REMOTE_FILE_INFO) == 0) { - continue; } else { // check checksum std::string local_md5sum; @@ -883,7 +868,7 @@ Status SnapshotLoader::move(const std::string& snapshot_path, TabletSharedPtr ta // rename the rowset ids and tabletid info in rowset meta Status convert_status = SnapshotManager::instance()->convert_rowset_ids( snapshot_path, tablet_id, tablet->replica_id(), tablet->table_id(), - tablet->partition_id(), schema_hash, tablet->storage_policy_id()); + tablet->partition_id(), schema_hash, true, tablet->storage_policy_id()); if (!convert_status.ok()) { std::stringstream ss; ss << "failed to convert rowsetids in snapshot: " << snapshot_path diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java index 38704462339de0..6155905d23aa64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJob.java @@ -467,6 +467,15 @@ private void prepareAndSendSnapshotTask() { } prepareBackupMetaForOlapTableWithoutLock(tableRef, olapTable, copiedTables, copiedStoragePolicys); + for (StoragePolicy policy : copiedStoragePolicys) { + Resource resource = Env.getCurrentEnv().getResourceMgr() + .getResource(policy.getStorageResource()); + if (resource.getType() != Resource.ResourceType.S3) { + status = new Status(ErrCode.COMMON_ERROR, + "backup job only support S3 type storage policy:" + resource.getType()); + return; + } + } break; case VIEW: prepareBackupMetaForViewWithoutLock((View) tbl, copiedTables); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java index 9a2f4e37a23138..03f31d4a56f335 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java @@ -60,7 +60,7 @@ public class BackupMeta implements Writable { private BackupMeta() { } - public BackupMeta(List
tables, List resources, List storagePolicys) { + public BackupMeta(List
tables, List resources, List storagePolicies) { for (Table table : tables) { tblNameMap.put(table.getName(), table); tblIdMap.put(table.getId(), table); @@ -69,7 +69,7 @@ public BackupMeta(List
tables, List resources, List tables, List resources, List entry : partitionIdMap.entrySet()) { - idToDataProperty.put(entry.getKey(), reserveStoragePolicy - ? origIdToDataProperty.get(entry.getValue()) : DataProperty.DEFAULT_HDD_DATA_PROPERTY); + idToDataProperty.put(entry.getKey(), reserveStoragePolicy ? origIdToDataProperty.get(entry.getValue()) : + new DataProperty(DataProperty.DEFAULT_STORAGE_MEDIUM)); idToReplicaAllocation.put(entry.getKey(), restoreReplicaAlloc == null ? origIdToReplicaAllocation.get(entry.getValue()) : restoreReplicaAlloc); diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index b9e4be8e118527..897de106760f46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -285,6 +285,7 @@ public TConfirmUnusedRemoteFilesResult confirmUnusedRemoteFiles(TConfirmUnusedRe List runningBackupJobs = jobs.stream().filter(job -> job instanceof BackupJob) .filter(job -> !((BackupJob) job).isDone()) + .filter(job -> ((BackupJob) job).getBackupMeta().getTable((info.tablet_id)) != null) .map(job -> (BackupJob) job).collect(Collectors.toList()); if (runningBackupJobs.size() > 0) { diff --git a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy index 8c41e2126768a5..b99e3e86b47f4d 100644 --- a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy +++ b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy @@ -170,18 +170,10 @@ suite("test_backup_cooldown", "backup_cooldown_data") { drop storage policy ${policy_name1}; """ - sql """ - drop resource ${resource_name1}; - """ - sql """ drop storage policy ${policy_name2}; """ - sql """ - drop resource ${resource_name2}; - """ - sql """ RESTORE SNAPSHOT ${dbName}.${snapshotName} FROM `${repoName}` @@ -382,14 +374,19 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { // 2.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 - // 3 删除resource 和 policy - // 2.1 restore 不指定 预期成功,且落冷 - // 2.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 - // 2.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + // 3 删除老表和policy + // 3.1 restore 不指定 预期成功,且落冷 + // 3.2 restore 指定 ("reserve_storage_policy"="true")预期成功,且落冷 + // 3.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 + + // 4 删除老表和resource、policy + // 4.1 restore 不指定 预期失败,resource不存在 + // 4.2 restore 指定 ("reserve_storage_policy"="true")预期失败,resource不存在 + // 4.3 restore 指定 ("reserve_storage_policy"="false")预期成功,且不落冷 // 1. old table exist - // 1.1 restore normal fail + // 1.1 restore normal fail // 1.2 restore with("reserve_storage_policy"="true") fail // 1.3 restore with("reserve_storage_policy"="false") success and don't cooldown logger.info(" ====================================== 1.1 ==================================== ") @@ -408,7 +405,7 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { // restore failed records = sql_return_maparray "SHOW restore FROM ${dbName}" - row = records[records.size() - 1] + row = records[records.size() - 1] assertTrue(row.Status.contains("Can't restore remote partition")) result = sql "SELECT * FROM ${dbName}.${tableName}" @@ -569,7 +566,7 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { assertEquals(found, 0) - // 3. drop old table and resource and policy + // 3. drop old table and policy // 3.1 restore normal success and cooldown // 3.2 restore with("reserve_storage_policy"="true")success and cooldown // 3.3 restore with("reserve_storage_policy"="false")success and don't cooldown @@ -578,9 +575,6 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { try_sql """ drop storage policy ${policy_name1}; """ - try_sql """ - drop resource ${resource_name1}; - """ sql """ RESTORE SNAPSHOT ${dbName}.${snapshotName} FROM `${repoName}` @@ -619,9 +613,6 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { try_sql """ drop storage policy ${policy_name1}; """ - try_sql """ - drop resource ${resource_name1}; - """ sql """ RESTORE SNAPSHOT ${dbName}.${snapshotName} FROM `${repoName}` @@ -661,6 +652,121 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { try_sql """ drop storage policy ${policy_name1}; """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="false" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + result = sql "SELECT * FROM ${dbName}.${tableName}" + assertEquals(result.size(), values.size()); + + // check table don't have storage_policy + records = sql_return_maparray "show storage policy using" + found = 0 + for (def res2 : records) { + if (res2.Database.equals(dbName) && res2.Table.equals(tableName)) { + found = 1 + break + } + } + assertEquals(found, 0) + + // check storage policy ${policy_name1} not exist + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.PolicyName.equals(policy_name1)) { + found = 1 + break + } + } + assertEquals(found, 0) + + // check resource ${resource_name1} not exist + records = sql_return_maparray "show storage policy" + found = 0 + for (def res2 : records) { + if (res2.Name.equals(resource_name1)) { + found = 1 + break + } + } + assertEquals(found, 0) + + + // 4. drop old table and resource and policy + // 4.1 restore normal fail + // 4.2 restore with("reserve_storage_policy"="true") fail + // 4.3 restore with("reserve_storage_policy"="false")success and don't cooldown + logger.info(" ====================================== 4.1 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed with local restore is not exist + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("is not exist")) + + + logger.info(" ====================================== 4.2 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + ON ( `${tableName}`) + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true", + "reserve_storage_policy"="true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + + // restore failed with local restore is not exist + records = sql_return_maparray "SHOW restore FROM ${dbName}" + row = records[records.size() - 1] + assertTrue(row.Status.contains("is not exist")) + + + logger.info(" ====================================== 4.3 ==================================== ") + sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop storage policy ${policy_name1}; + """ try_sql """ drop resource ${resource_name1}; """ @@ -689,7 +795,7 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 0) // check storage policy ${policy_name1} not exist @@ -700,7 +806,7 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 0) // check resource ${resource_name1} not exist @@ -711,13 +817,31 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 0) - - // 4. alter policy and success - logger.info(" ====================================== 4.1 ==================================== ") + // 5. alter policy and success + logger.info(" ====================================== 5.1 ==================================== ") sql "DROP TABLE IF EXISTS ${dbName}.${tableName}" + try_sql """ + drop resource ${resource_name1}; + """ + sql """ + CREATE RESOURCE IF NOT EXISTS "${resource_name1}" + PROPERTIES( + "type"="s3", + "AWS_ENDPOINT" = "${getS3Endpoint()}", + "AWS_REGION" = "${getS3Region()}", + "AWS_ROOT_PATH" = "regression/cooldown1", + "AWS_ACCESS_KEY" = "${getS3AK()}", + "AWS_SECRET_KEY" = "${getS3SK()}", + "AWS_MAX_CONNECTIONS" = "50", + "AWS_REQUEST_TIMEOUT_MS" = "3000", + "AWS_CONNECTION_TIMEOUT_MS" = "1000", + "AWS_BUCKET" = "${getS3BucketName()}", + "s3_validity_check" = "true" + ); + """ sql """ ALTER STORAGE POLICY ${policy_name2} PROPERTIES ("cooldown_ttl" = "11"); """ @@ -762,7 +886,7 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 1) @@ -968,8 +1092,6 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { - - // 1 old table exist // 1.1 restore with ("storage_resource"="resource_name1") fail // 1.2 restore with ("storage_resource"="resource_name_not_exist") fail @@ -1184,7 +1306,7 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 1) // check plocy_name2 storage_resource change to resource_new_name @@ -1300,7 +1422,7 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 1) // wait cooldown @@ -1357,7 +1479,7 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { found = 1 break } - } + } assertEquals(found, 0) From 64a040dac71a074bb8dfd2e3033164bb627e8883 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Wed, 4 Dec 2024 15:47:51 +0800 Subject: [PATCH 07/10] fix local_files_with_checksum fix review2 --- be/src/olap/snapshot_manager.cpp | 2 +- be/src/runtime/snapshot_loader.cpp | 110 ++++++++++-------- .../doris/service/FrontendServiceImpl.java | 12 +- 3 files changed, 66 insertions(+), 58 deletions(-) diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 2af6cee6b5253d..041f494f7bb6cc 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -531,7 +531,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (!request.__isset.missing_version && ref_tablet->tablet_meta()->cooldown_meta_id().initialized()) { LOG(INFO) << "Backup tablet with cooldowned remote data. tablet=" - << request.tablet_id; + << request.tablet_id; } /// not all missing versions are found, fall back to full snapshot. res = Status::OK(); // reset res diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index f650c067e1ab87..d703b03b638474 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -128,16 +128,45 @@ static Status list_segment_inverted_index_file(io::RemoteFileSystem* cold_fs, return Status::OK(); } -static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs, - io::RemoteFileSystem* cold_fs, - const std::string& remote_seg_path, - const std::string& local_seg_path, - const std::string& dest_seg_path) { +static Status check_need_upload(const std::string& src_path, const std::string& local_file, + std::map& remote_files, std::string* md5sum, + bool* need_upload) { + // calc md5sum of localfile + RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(src_path + "/" + local_file, md5sum)); + VLOG_CRITICAL << "get file checksum: " << local_file << ": " << *md5sum; + + // check if this local file need upload + auto find = remote_files.find(local_file); + if (find != remote_files.end()) { + if (*md5sum != find->second.md5) { + // remote storage file exist, but with different checksum + LOG(WARNING) << "remote file checksum is invalid. remote: " << find->first + << ", local: " << *md5sum; + // TODO(cmy): save these files and delete them later + *need_upload = true; + } + } else { + *need_upload = true; + } + + return Status::OK(); +} + +static Status download_and_upload_one_cold_file( + io::RemoteFileSystem& dest_fs, io::RemoteFileSystem* cold_fs, + const std::string& remote_seg_path, const std::string& local_seg_path, + const std::string& dest_seg_path, const std::string& local_path, + const std::string& local_file, std::map& remote_files) { RETURN_IF_ERROR(cold_fs->download(remote_seg_path, local_seg_path)); - // calc md5sum of localfile + bool need_upload = false; std::string md5sum; - RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(local_seg_path, &md5sum)); + RETURN_IF_ERROR(check_need_upload(local_path, local_file, remote_files, &md5sum, &need_upload)); + + if (!need_upload) { + VLOG_CRITICAL << "cold file exist in remote path, no need to upload: " << local_file; + return Status::OK(); + } RETURN_IF_ERROR(dest_fs.upload_with_checksum(local_seg_path, dest_seg_path, md5sum)); @@ -147,20 +176,23 @@ static Status download_and_upload_one_file(io::RemoteFileSystem& dest_fs, return Status::OK(); } -static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet_id, - const std::string& local_path, const std::string& dest_path, - io::RemoteFileSystem* cold_fs, const std::string& rowset_id, - int segments, int have_inverted_index) { +static Status upload_remote_cold_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet_id, + const std::string& local_path, const std::string& dest_path, + io::RemoteFileSystem* cold_fs, const std::string& rowset_id, + int segments, int have_inverted_index, + std::map& remote_files) { Status res = Status::OK(); for (int i = 0; i < segments; i++) { + std::string local_file = fmt::format("{}_{}.dat", rowset_id, i); std::string remote_seg_path = fmt::format("{}/{}_{}.dat", remote_tablet_path(tablet_id), rowset_id, i); std::string local_seg_path = fmt::format("{}/{}_{}.dat", local_path, rowset_id, i); std::string dest_seg_path = fmt::format("{}/{}_{}.dat", dest_path, rowset_id, i); - RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs, remote_seg_path, - local_seg_path, dest_seg_path)); + RETURN_IF_ERROR(download_and_upload_one_cold_file(dest_fs, cold_fs, remote_seg_path, + local_seg_path, dest_seg_path, local_path, + local_file, remote_files)); } if (!have_inverted_index) { @@ -177,8 +209,9 @@ static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet std::string local_seg_path = fmt::format("{}/{}", local_path, index_file); std::string dest_seg_path = fmt::format("{}/{}", dest_path, index_file); - RETURN_IF_ERROR(download_and_upload_one_file(dest_fs, cold_fs, remote_index_path, - local_seg_path, dest_seg_path)); + RETURN_IF_ERROR(download_and_upload_one_cold_file(dest_fs, cold_fs, remote_index_path, + local_seg_path, dest_seg_path, local_path, + index_file, remote_files)); } return res; } @@ -187,16 +220,17 @@ static Status upload_remote_rowset(io::RemoteFileSystem& dest_fs, int64_t tablet * get the cooldown data info from the hdr file, download the cooldown data and * upload it to remote storage. */ -static Status upload_remote_file(io::RemoteFileSystem& dest_fs, int64_t tablet_id, - const std::string& local_path, const std::string& dest_path, - const std::string& hdr_file) { +static Status upload_remote_cold_file(io::RemoteFileSystem& dest_fs, int64_t tablet_id, + const std::string& local_path, const std::string& dest_path, + std::map& remote_files) { Status res = Status::OK(); + std::string hdr_file = local_path + "/" + std::to_string(tablet_id) + ".hdr"; auto tablet_meta = std::make_shared(); - res = tablet_meta->create_from_file(local_path + "/" + hdr_file); + res = tablet_meta->create_from_file(hdr_file); if (!res.ok()) { return Status::Error( - "fail to load tablet_meta. file_path={}", local_path + "/" + hdr_file); + "fail to load tablet_meta. file_path={}", hdr_file); } if (tablet_meta->tablet_id() != tablet_id) { @@ -220,9 +254,9 @@ static Status upload_remote_file(io::RemoteFileSystem& dest_fs, int64_t tablet_i have_inverted_index = rowset_meta->tablet_schema()->has_inverted_index(); if (segments > 0 && !rowset_meta->is_local()) { - RETURN_IF_ERROR(upload_remote_rowset(dest_fs, tablet_id, local_path, dest_path, - colddata_fs.get(), rowset_id, segments, - have_inverted_index)); + RETURN_IF_ERROR(upload_remote_cold_rowset(dest_fs, tablet_id, local_path, dest_path, + colddata_fs.get(), rowset_id, segments, + have_inverted_index, remote_files)); } } @@ -279,33 +313,11 @@ Status SnapshotLoader::upload(const std::map& src_to_d TTaskType::type::UPLOAD)); const std::string& local_file = *it; - if (_end_with(local_file, ".hdr")) { - RETURN_IF_ERROR(upload_remote_file(*_remote_fs, tablet_id, src_path, dest_path, - local_file)); - } - - // calc md5sum of localfile + bool need_upload = false; std::string md5sum; RETURN_IF_ERROR( - io::global_local_filesystem()->md5sum(src_path + "/" + local_file, &md5sum)); - VLOG_CRITICAL << "get file checksum: " << local_file << ": " << md5sum; + check_need_upload(src_path, local_file, remote_files, &md5sum, &need_upload)); local_files_with_checksum.push_back(local_file + "." + md5sum); - - // check if this local file need upload - bool need_upload = false; - auto find = remote_files.find(local_file); - if (find != remote_files.end()) { - if (md5sum != find->second.md5) { - // remote storage file exist, but with different checksum - LOG(WARNING) << "remote file checksum is invalid. remote: " << find->first - << ", local: " << md5sum; - // TODO(cmy): save these files and delete them later - need_upload = true; - } - } else { - need_upload = true; - } - if (!need_upload) { VLOG_CRITICAL << "file exist in remote path, no need to upload: " << local_file; continue; @@ -318,6 +330,10 @@ Status SnapshotLoader::upload(const std::map& src_to_d _remote_fs->upload_with_checksum(full_local_file, full_remote_file, md5sum)); } // end for each tablet's local files + // 2.4. upload cooldown data files + RETURN_IF_ERROR( + upload_remote_cold_file(*_remote_fs, tablet_id, src_path, dest_path, remote_files)); + tablet_files->emplace(tablet_id, local_files_with_checksum); finished_num++; LOG(INFO) << "finished to write tablet to remote. local path: " << src_path diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 897de106760f46..51879a8bc95e38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -29,7 +29,6 @@ import org.apache.doris.analysis.TableRef; import org.apache.doris.analysis.TypeDef; import org.apache.doris.analysis.UserIdentity; -import org.apache.doris.backup.AbstractJob; import org.apache.doris.backup.BackupJob; import org.apache.doris.backup.Snapshot; import org.apache.doris.catalog.Column; @@ -280,15 +279,8 @@ public TConfirmUnusedRemoteFilesResult confirmUnusedRemoteFiles(TConfirmUnusedRe return; } - List jobs = Env.getCurrentEnv().getBackupHandler() - .getJobs(tabletMeta.getDbId(), label -> true); - - List runningBackupJobs = jobs.stream().filter(job -> job instanceof BackupJob) - .filter(job -> !((BackupJob) job).isDone()) - .filter(job -> ((BackupJob) job).getBackupMeta().getTable((info.tablet_id)) != null) - .map(job -> (BackupJob) job).collect(Collectors.toList()); - - if (runningBackupJobs.size() > 0) { + BackupJob backupJob = (BackupJob) Env.getCurrentEnv().getBackupHandler().getJob(tabletMeta.getDbId()); + if (!backupJob.isDone() && backupJob.getBackupMeta().getTable((tabletMeta.getTableId())) != null) { LOG.warn("Backup is running on this tablet {} ", info.tablet_id); return; } From 84ac5a12af9d153296558917e766a4f68e8db502 Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Fri, 6 Dec 2024 14:15:25 +0800 Subject: [PATCH 08/10] fix testcase timeout --- .../doris/service/FrontendServiceImpl.java | 3 +- .../test_backup_restore_cold_data.groovy | 44 +++++++++---------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 51879a8bc95e38..4bb339a1ab7bd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -280,7 +280,8 @@ public TConfirmUnusedRemoteFilesResult confirmUnusedRemoteFiles(TConfirmUnusedRe } BackupJob backupJob = (BackupJob) Env.getCurrentEnv().getBackupHandler().getJob(tabletMeta.getDbId()); - if (!backupJob.isDone() && backupJob.getBackupMeta().getTable((tabletMeta.getTableId())) != null) { + if (backupJob != null && !backupJob.isDone() + && backupJob.getBackupMeta().getTable((tabletMeta.getTableId())) != null) { LOG.warn("Backup is running on this tablet {} ", info.tablet_id); return; } diff --git a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy index b99e3e86b47f4d..84345575d8d3fe 100644 --- a/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy +++ b/regression-test/suites/backup_restore/test_backup_restore_cold_data.groovy @@ -139,9 +139,9 @@ suite("test_backup_cooldown", "backup_cooldown_data") { sqlResult = result[0][5].toString(); int count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -195,9 +195,9 @@ suite("test_backup_cooldown", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after r0 mins") } Thread.sleep(5000) @@ -340,9 +340,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); int count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -487,9 +487,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -523,9 +523,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -596,9 +596,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -635,9 +635,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -867,9 +867,9 @@ suite("test_backup_cooldown_1", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -1044,9 +1044,9 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { sqlResult = result[0][5].toString(); int count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -1287,9 +1287,9 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) @@ -1430,9 +1430,9 @@ suite("test_backup_cooldown_2", "backup_cooldown_data") { sqlResult = result[0][5].toString(); count = 0; while (sqlResult.contains("0.00")) { - if (++count >= 120) { // 10min + if (++count >= 360) { // 30min logger.error('cooldown task is timeouted') - throw new Exception("cooldown task is timeouted after 10 mins") + throw new Exception("cooldown task is timeouted after 30 mins") } Thread.sleep(5000) From a8ec081d376b31005871c2413840ba92aa9efabb Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Sun, 8 Dec 2024 12:35:20 +0800 Subject: [PATCH 09/10] fix job instanceof BackupJob --- .../apache/doris/service/FrontendServiceImpl.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 4bb339a1ab7bd7..ea7348db7f0ffc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -29,6 +29,7 @@ import org.apache.doris.analysis.TableRef; import org.apache.doris.analysis.TypeDef; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.backup.AbstractJob; import org.apache.doris.backup.BackupJob; import org.apache.doris.backup.Snapshot; import org.apache.doris.catalog.Column; @@ -279,11 +280,14 @@ public TConfirmUnusedRemoteFilesResult confirmUnusedRemoteFiles(TConfirmUnusedRe return; } - BackupJob backupJob = (BackupJob) Env.getCurrentEnv().getBackupHandler().getJob(tabletMeta.getDbId()); - if (backupJob != null && !backupJob.isDone() - && backupJob.getBackupMeta().getTable((tabletMeta.getTableId())) != null) { - LOG.warn("Backup is running on this tablet {} ", info.tablet_id); - return; + AbstractJob job = Env.getCurrentEnv().getBackupHandler().getJob(tabletMeta.getDbId()); + if (job != null && job instanceof BackupJob) { + BackupJob backupJob = (BackupJob) job; + if (backupJob.isDone() + && backupJob.getBackupMeta().getTable((tabletMeta.getTableId())) != null) { + LOG.warn("Backup is running on this tablet {} ", info.tablet_id); + return; + } } Tablet tablet; From 4643b5da68b623e5547430a7f48647f6becb6d6c Mon Sep 17 00:00:00 2001 From: ayuanzhang Date: Wed, 11 Dec 2024 16:17:15 +0800 Subject: [PATCH 10/10] save storage policy to jobinfo instead of BackupMeta --- be/src/olap/tablet_schema.h | 2 +- .../apache/doris/common/FeMetaVersion.java | 5 +---- .../apache/doris/backup/BackupJobInfo.java | 13 +++--------- .../org/apache/doris/backup/BackupMeta.java | 17 ---------------- .../org/apache/doris/backup/RestoreJob.java | 20 ++++--------------- 5 files changed, 9 insertions(+), 48 deletions(-) diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 216812eb594485..a4e88c9cc75e32 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -269,7 +269,7 @@ class TabletSchema { segment_v2::CompressionTypePB compression_type() const { return _compression_type; } const std::vector& indexes() const { return _indexes; } - bool has_inverted_index() const { + [[nodiscard]] bool has_inverted_index() const { for (const auto& index : _indexes) { if (index.index_type() == IndexType::INVERTED) { return true; diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java index 0a016d3595a2b3..b1e42d343adf0e 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java @@ -67,11 +67,8 @@ public final class FeMetaVersion { // For AnalysisInfo public static final int VERSION_123 = 123; - // For BackupMeta storage policy - public static final int VERSION_124 = 124; - // note: when increment meta version, should assign the latest version to VERSION_CURRENT - public static final int VERSION_CURRENT = VERSION_124; + public static final int VERSION_CURRENT = VERSION_123; // all logs meta version should >= the minimum version, so that we could remove many if clause, for example // if (FE_METAVERSION < VERSION_94) ... diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java index cfbc685b2488f1..a63c6f234cc07e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupJobInfo.java @@ -335,7 +335,7 @@ public static class BriefBackupJobInfo { @SerializedName("s3_resource_list") public List s3ResourceList = Lists.newArrayList(); @SerializedName("storage_policy_list") - public List storagePolicyList = Lists.newArrayList(); + public List storagePolicyList = Lists.newArrayList(); public static BriefBackupJobInfo fromBackupJobInfo(BackupJobInfo backupJobInfo) { BriefBackupJobInfo briefBackupJobInfo = new BriefBackupJobInfo(); @@ -376,7 +376,7 @@ public static class NewBackupObjects { @SerializedName("s3_resources") public List s3Resources = Lists.newArrayList(); @SerializedName("storage_policy") - public List storagePolicies = Lists.newArrayList(); + public List storagePolicies = Lists.newArrayList(); } public static class BackupOlapTableInfo { @@ -500,11 +500,6 @@ public static class BackupS3ResourceInfo { public String name; } - public static class BackupStoragePolicyInfo { - @SerializedName("name") - public String name; - } - // eg: __db_10001/__tbl_10002/__part_10003/__idx_10002/__10004 public String getFilePath(String db, String tbl, String part, String idx, long tabletId) { if (!db.equalsIgnoreCase(dbName)) { @@ -708,9 +703,7 @@ public static BackupJobInfo fromCatalog(long backupTime, String label, String db // storage policies Collection storagePolicies = backupMeta.getStoragePolicyNameMap().values(); for (StoragePolicy storagePolicy : storagePolicies) { - BackupStoragePolicyInfo backupStoragePolicyInfo = new BackupStoragePolicyInfo(); - backupStoragePolicyInfo.name = storagePolicy.getName(); - jobInfo.newBackupObjects.storagePolicies.add(backupStoragePolicyInfo); + jobInfo.newBackupObjects.storagePolicies.add(storagePolicy.clone()); } return jobInfo; diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java index 03f31d4a56f335..411ad7ddd580cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java @@ -20,7 +20,6 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.Table; -import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Writable; import org.apache.doris.meta.MetaContext; import org.apache.doris.persist.gson.GsonUtils; @@ -54,7 +53,6 @@ public class BackupMeta implements Writable { @SerializedName(value = "resourceNameMap") private Map resourceNameMap = Maps.newHashMap(); // storagePolicy name -> resource - @SerializedName(value = "storagePolicyNameMap") private Map storagePolicyNameMap = Maps.newHashMap(); private BackupMeta() { @@ -108,10 +106,6 @@ public Resource getResource(String resourceName) { return resourceNameMap.get(resourceName); } - public StoragePolicy getStoragePolicy(String policyName) { - return storagePolicyNameMap.get(policyName); - } - public Table getTable(Long tblId) { return tblIdMap.get(tblId); } @@ -162,10 +156,6 @@ public void write(DataOutput out) throws IOException { for (Resource resource : resourceNameMap.values()) { resource.write(out); } - out.writeInt(storagePolicyNameMap.size()); - for (StoragePolicy storagePolicy : storagePolicyNameMap.values()) { - storagePolicy.write(out); - } } public void readFields(DataInput in) throws IOException { @@ -180,13 +170,6 @@ public void readFields(DataInput in) throws IOException { Resource resource = Resource.read(in); resourceNameMap.put(resource.getName(), resource); } - if (Env.getCurrentEnvJournalVersion() >= FeMetaVersion.VERSION_124) { - size = in.readInt(); - for (int i = 0; i < size; i++) { - StoragePolicy policy = StoragePolicy.read(in); - storagePolicyNameMap.put(policy.getName(), policy); - } - } } public String toJson() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 28445d148ad1f6..7e8ec6a1a1e7e0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -659,8 +659,8 @@ private void checkAndPrepareMeta() { } } - for (BackupJobInfo.BackupStoragePolicyInfo backupStoragePolicyInfo : jobInfo.newBackupObjects.storagePolicies) { - String backupStoragePoliceName = backupStoragePolicyInfo.name; + for (StoragePolicy backupStoargePolicy : jobInfo.newBackupObjects.storagePolicies) { + String backupStoragePoliceName = backupStoargePolicy.getName(); Optional localPolicy = Env.getCurrentEnv().getPolicyMgr().findPolicy(backupStoragePoliceName, PolicyTypeEnum.STORAGE); if (localPolicy.isPresent() && localPolicy.get().getType() != PolicyTypeEnum.STORAGE) { @@ -1343,11 +1343,10 @@ private void checkAndRestoreStoragePolicies() { return; } PolicyMgr policyMgr = Env.getCurrentEnv().getPolicyMgr(); - for (BackupJobInfo.BackupStoragePolicyInfo backupStoragePolicyInfo : jobInfo.newBackupObjects.storagePolicies) { - String backupStoragePoliceName = backupStoragePolicyInfo.name; + for (StoragePolicy backupStoargePolicy : jobInfo.newBackupObjects.storagePolicies) { + String backupStoragePoliceName = backupStoargePolicy.getName(); Optional localPolicy = policyMgr.findPolicy(backupStoragePoliceName, PolicyTypeEnum.STORAGE); - StoragePolicy backupStoargePolicy = backupMeta.getStoragePolicy(backupStoragePoliceName); // use specified storageResource if (StringUtils.isNotEmpty(storageResource)) { @@ -2730,11 +2729,6 @@ private void writeOthers(DataOutput out) throws IOException { resource.write(out); } - out.writeInt(storagePolicies.size()); - for (StoragePolicy policy : storagePolicies) { - policy.write(out); - } - // write properties out.writeInt(properties.size()); for (Map.Entry entry : properties.entrySet()) { @@ -2833,12 +2827,6 @@ private void readOthers(DataInput in) throws IOException { restoredResources.add(Resource.read(in)); } - // restored storage policy - size = in.readInt(); - for (int i = 0; i < size; i++) { - storagePolicies.add(StoragePolicy.read(in)); - } - // read properties size = in.readInt(); for (int i = 0; i < size; i++) {