Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

branch-3.0: [improve](cloud-mow)Add delete bitmap metrics #47028 #47533

Merged
merged 1 commit into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions be/src/cloud/cloud_storage_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,12 @@ Status CloudStorageEngine::start_bg_threads() {

LOG(INFO) << "lease compaction thread started";

RETURN_IF_ERROR(Thread::create(
"StorageEngine", "check_tablet_delete_bitmap_score_thread",
[this]() { this->_check_tablet_delete_bitmap_score_callback(); },
&_bg_threads.emplace_back()));
LOG(INFO) << "check tablet delete bitmap score thread started";

return Status::OK();
}

Expand Down Expand Up @@ -774,6 +780,28 @@ void CloudStorageEngine::_lease_compaction_thread_callback() {
}
}

void CloudStorageEngine::_check_tablet_delete_bitmap_score_callback() {
LOG(INFO) << "try to start check tablet delete bitmap score!";
while (!_stop_background_threads_latch.wait_for(
std::chrono::seconds(config::check_tablet_delete_bitmap_interval_seconds))) {
if (!config::enable_check_tablet_delete_bitmap_score) {
return;
}
uint64_t max_delete_bitmap_score = 0;
uint64_t max_base_rowset_delete_bitmap_score = 0;
std::vector<CloudTabletSPtr> tablets;
tablet_mgr().get_topn_tablet_delete_bitmap_score(&max_delete_bitmap_score,
&max_base_rowset_delete_bitmap_score);
if (max_delete_bitmap_score > 0) {
_tablet_max_delete_bitmap_score_metrics->set_value(max_delete_bitmap_score);
}
if (max_base_rowset_delete_bitmap_score > 0) {
_tablet_max_base_rowset_delete_bitmap_score_metrics->set_value(
max_base_rowset_delete_bitmap_score);
}
}
}

Status CloudStorageEngine::get_compaction_status_json(std::string* result) {
rapidjson::Document root;
root.SetObject();
Expand Down
1 change: 1 addition & 0 deletions be/src/cloud/cloud_storage_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class CloudStorageEngine final : public BaseStorageEngine {
Status _submit_cumulative_compaction_task(const CloudTabletSPtr& tablet);
Status _submit_full_compaction_task(const CloudTabletSPtr& tablet);
void _lease_compaction_thread_callback();
void _check_tablet_delete_bitmap_score_callback();

std::atomic_bool _stopped {false};

Expand Down
46 changes: 46 additions & 0 deletions be/src/cloud/cloud_tablet_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,4 +421,50 @@ void CloudTabletMgr::get_tablet_info(int64_t num_tablets, std::vector<TabletInfo
}
}

void CloudTabletMgr::get_topn_tablet_delete_bitmap_score(
uint64_t* max_delete_bitmap_score, uint64_t* max_base_rowset_delete_bitmap_score) {
int64_t max_delete_bitmap_score_tablet_id = 0;
OlapStopWatch watch;
uint64_t total_delete_map_count = 0;
int64_t max_base_rowset_delete_bitmap_score_tablet_id = 0;
int n = config::check_tablet_delete_bitmap_score_top_n;
std::vector<std::pair<std::shared_ptr<CloudTablet>, int64_t>> buf;
buf.reserve(n + 1);
auto handler = [&](const std::weak_ptr<CloudTablet>& tablet_wk) {
auto t = tablet_wk.lock();
if (!t) return;
uint64_t delete_bitmap_count =
t.get()->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
total_delete_map_count += delete_bitmap_count;
if (delete_bitmap_count > *max_delete_bitmap_score) {
max_delete_bitmap_score_tablet_id = t->tablet_id();
*max_delete_bitmap_score = delete_bitmap_count;
}
buf.emplace_back(std::move(t), delete_bitmap_count);
std::sort(buf.begin(), buf.end(), [](auto& a, auto& b) { return a.second > b.second; });
if (buf.size() > n) {
buf.pop_back();
}
};
auto weak_tablets = get_weak_tablets();
std::for_each(weak_tablets.begin(), weak_tablets.end(), handler);
for (auto& [t, _] : buf) {
t->get_base_rowset_delete_bitmap_count(max_base_rowset_delete_bitmap_score,
&max_base_rowset_delete_bitmap_score_tablet_id);
}
std::stringstream ss;
for (auto& i : buf) {
ss << i.first->tablet_id() << ":" << i.second << ",";
}
LOG(INFO) << "get_topn_tablet_delete_bitmap_score, n=" << n
<< ",tablet size=" << weak_tablets.size()
<< ",total_delete_map_count=" << total_delete_map_count
<< ",cost(us)=" << watch.get_elapse_time_us()
<< ",max_delete_bitmap_score=" << *max_delete_bitmap_score
<< ",max_delete_bitmap_score_tablet_id=" << max_delete_bitmap_score_tablet_id
<< ",max_base_rowset_delete_bitmap_score=" << *max_base_rowset_delete_bitmap_score
<< ",max_base_rowset_delete_bitmap_score_tablet_id="
<< max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" << ss.str() << "]";
}

} // namespace doris
3 changes: 3 additions & 0 deletions be/src/cloud/cloud_tablet_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ class CloudTabletMgr {

void get_tablet_info(int64_t num_tablets, std::vector<TabletInfo>* tablets_info);

void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score,
uint64_t* max_base_rowset_delete_bitmap_score);

private:
CloudStorageEngine& _engine;

Expand Down
4 changes: 4 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,10 @@ DEFINE_mBool(enable_sleep_between_delete_cumu_compaction, "false");

DEFINE_mInt32(compaction_num_per_round, "1");

DEFINE_mInt32(check_tablet_delete_bitmap_interval_seconds, "300");
DEFINE_mInt32(check_tablet_delete_bitmap_score_top_n, "10");
DEFINE_mBool(enable_check_tablet_delete_bitmap_score, "true");

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
4 changes: 4 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,10 @@ DECLARE_mBool(enable_sleep_between_delete_cumu_compaction);

DECLARE_mInt32(compaction_num_per_round);

DECLARE_mInt32(check_tablet_delete_bitmap_interval_seconds);
DECLARE_mInt32(check_tablet_delete_bitmap_score_top_n);
DECLARE_mBool(enable_check_tablet_delete_bitmap_score);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
34 changes: 34 additions & 0 deletions be/src/olap/base_tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1651,4 +1651,38 @@ Status BaseTablet::show_nested_index_file(std::string* json_meta) {
return Status::OK();
}

void BaseTablet::get_base_rowset_delete_bitmap_count(
uint64_t* max_base_rowset_delete_bitmap_score,
int64_t* max_base_rowset_delete_bitmap_score_tablet_id) {
std::vector<RowsetSharedPtr> rowsets_;
std::string base_rowset_id_str;
{
std::shared_lock rowset_ldlock(this->get_header_lock());
for (const auto& it : _rs_version_map) {
rowsets_.emplace_back(it.second);
}
}
std::sort(rowsets_.begin(), rowsets_.end(), Rowset::comparator);
if (!rowsets_.empty()) {
bool base_found = false;
for (auto& rowset : rowsets_) {
if (rowset->start_version() > 2) {
break;
}
base_found = true;
uint64_t base_rowset_delete_bitmap_count =
this->tablet_meta()->delete_bitmap().get_count_with_range(
{rowset->rowset_id(), 0, 0},
{rowset->rowset_id(), UINT32_MAX, UINT64_MAX});
if (base_rowset_delete_bitmap_count > *max_base_rowset_delete_bitmap_score) {
*max_base_rowset_delete_bitmap_score = base_rowset_delete_bitmap_count;
*max_base_rowset_delete_bitmap_score_tablet_id = this->tablet_id();
}
}
if (!base_found) {
LOG(WARNING) << "can not found base rowset for tablet " << tablet_id();
}
}
}

} // namespace doris
4 changes: 4 additions & 0 deletions be/src/olap/base_tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,10 @@ class BaseTablet {
TabletUid tablet_uid() const { return _tablet_meta->tablet_uid(); }
TabletInfo get_tablet_info() const { return TabletInfo(tablet_id(), tablet_uid()); }

void get_base_rowset_delete_bitmap_count(
uint64_t* max_base_rowset_delete_bitmap_score,
int64_t* max_base_rowset_delete_bitmap_score_tablet_id);

protected:
// Find the missed versions until the spec_version.
//
Expand Down
28 changes: 28 additions & 0 deletions be/src/olap/olap_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ Status StorageEngine::start_bg_threads() {
[this]() { this->_async_publish_callback(); }, &_async_publish_thread));
LOG(INFO) << "async publish thread started";

RETURN_IF_ERROR(Thread::create(
"StorageEngine", "check_tablet_delete_bitmap_score_thread",
[this]() { this->_check_tablet_delete_bitmap_score_callback(); },
&_check_delete_bitmap_score_thread));
LOG(INFO) << "check tablet delete bitmap score thread started";

LOG(INFO) << "all storage engine's background threads are started.";
return Status::OK();
}
Expand Down Expand Up @@ -1611,4 +1617,26 @@ void StorageEngine::_async_publish_callback() {
}
}

void StorageEngine::_check_tablet_delete_bitmap_score_callback() {
LOG(INFO) << "try to start check tablet delete bitmap score!";
while (!_stop_background_threads_latch.wait_for(
std::chrono::seconds(config::check_tablet_delete_bitmap_interval_seconds))) {
if (!config::enable_check_tablet_delete_bitmap_score) {
return;
}
uint64_t max_delete_bitmap_score = 0;
uint64_t max_base_rowset_delete_bitmap_score = 0;
std::vector<CloudTabletSPtr> tablets;
_tablet_manager.get()->get_topn_tablet_delete_bitmap_score(
&max_delete_bitmap_score, &max_base_rowset_delete_bitmap_score);
if (max_delete_bitmap_score > 0) {
_tablet_max_delete_bitmap_score_metrics->set_value(max_delete_bitmap_score);
}
if (max_base_rowset_delete_bitmap_score > 0) {
_tablet_max_base_rowset_delete_bitmap_score_metrics->set_value(
max_base_rowset_delete_bitmap_score);
}
}
}

} // namespace doris
5 changes: 5 additions & 0 deletions be/src/olap/storage_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ BaseStorageEngine::BaseStorageEngine(Type type, const UniqueId& backend_uid)
_stop_background_threads_latch(1) {
_memory_limitation_bytes_for_schema_change =
static_cast<int64_t>(MemInfo::soft_mem_limit() * config::schema_change_mem_limit_frac);
_tablet_max_delete_bitmap_score_metrics =
std::make_shared<bvar::Status<size_t>>("tablet_max", "delete_bitmap_score", 0);
_tablet_max_base_rowset_delete_bitmap_score_metrics = std::make_shared<bvar::Status<size_t>>(
"tablet_max_base_rowset", "delete_bitmap_score", 0);
}

BaseStorageEngine::~BaseStorageEngine() = default;
Expand Down Expand Up @@ -706,6 +710,7 @@ void StorageEngine::stop() {
THREAD_JOIN(_async_publish_thread);
THREAD_JOIN(_cold_data_compaction_producer_thread);
THREAD_JOIN(_cooldown_tasks_producer_thread);
THREAD_JOIN(_check_delete_bitmap_score_thread);
#undef THREAD_JOIN

#define THREADS_JOIN(threads) \
Expand Down
9 changes: 9 additions & 0 deletions be/src/olap/storage_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#pragma once

#include <butil/macros.h>
#include <bvar/bvar.h>
#include <gen_cpp/Types_types.h>
#include <gen_cpp/internal_service.pb.h>
#include <gen_cpp/olap_file.pb.h>
Expand Down Expand Up @@ -167,6 +168,9 @@ class BaseStorageEngine {
int _disk_num {-1};

std::shared_ptr<StreamLoadRecorder> _stream_load_recorder;

std::shared_ptr<bvar::Status<size_t>> _tablet_max_delete_bitmap_score_metrics;
std::shared_ptr<bvar::Status<size_t>> _tablet_max_base_rowset_delete_bitmap_score_metrics;
};

class CompactionSubmitRegistry {
Expand Down Expand Up @@ -429,6 +433,8 @@ class StorageEngine final : public BaseStorageEngine {

int32_t _auto_get_interval_by_disk_capacity(DataDir* data_dir);

void _check_tablet_delete_bitmap_score_callback();

private:
EngineOptions _options;
std::mutex _store_lock;
Expand Down Expand Up @@ -535,6 +541,9 @@ class StorageEngine final : public BaseStorageEngine {
std::unique_ptr<CreateTabletRRIdxCache> _create_tablet_idx_lru_cache;

std::unique_ptr<SnapshotManager> _snapshot_mgr;

// thread to check tablet delete bitmap count tasks
scoped_refptr<Thread> _check_delete_bitmap_score_thread;
};

// lru cache for create tabelt round robin in disks
Expand Down
43 changes: 43 additions & 0 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1753,4 +1753,47 @@ bool TabletManager::update_tablet_partition_id(::doris::TPartitionId partition_i
return true;
}

void TabletManager::get_topn_tablet_delete_bitmap_score(
uint64_t* max_delete_bitmap_score, uint64_t* max_base_rowset_delete_bitmap_score) {
int64_t max_delete_bitmap_score_tablet_id = 0;
int64_t max_base_rowset_delete_bitmap_score_tablet_id = 0;
OlapStopWatch watch;
uint64_t total_delete_map_count = 0;
int n = config::check_tablet_delete_bitmap_score_top_n;
std::vector<std::pair<std::shared_ptr<Tablet>, int64_t>> buf;
buf.reserve(n + 1);
auto handler = [&](const TabletSharedPtr& tablet) {
uint64_t delete_bitmap_count =
tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
total_delete_map_count += delete_bitmap_count;
if (delete_bitmap_count > *max_delete_bitmap_score) {
max_delete_bitmap_score_tablet_id = tablet->tablet_id();
*max_delete_bitmap_score = delete_bitmap_count;
}
buf.emplace_back(std::move(tablet), delete_bitmap_count);
std::sort(buf.begin(), buf.end(), [](auto& a, auto& b) { return a.second > b.second; });
if (buf.size() > n) {
buf.pop_back();
}
};
for_each_tablet(handler, filter_all_tablets);
for (auto& [t, _] : buf) {
t->get_base_rowset_delete_bitmap_count(max_base_rowset_delete_bitmap_score,
&max_base_rowset_delete_bitmap_score_tablet_id);
}
std::stringstream ss;
for (auto& i : buf) {
ss << i.first->tablet_id() << ":" << i.second << ",";
}
LOG(INFO) << "get_topn_tablet_delete_bitmap_score, n=" << n
<< ",tablet size=" << _tablets_shards.size()
<< ",total_delete_map_count=" << total_delete_map_count
<< ",cost(us)=" << watch.get_elapse_time_us()
<< ",max_delete_bitmap_score=" << *max_delete_bitmap_score
<< ",max_delete_bitmap_score_tablet_id=" << max_delete_bitmap_score_tablet_id
<< ",max_base_rowset_delete_bitmap_score=" << *max_base_rowset_delete_bitmap_score
<< ",max_base_rowset_delete_bitmap_score_tablet_id="
<< max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" << ss.str() << "]";
}

} // end namespace doris
3 changes: 3 additions & 0 deletions be/src/olap/tablet_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ class TabletManager {
bool update_tablet_partition_id(::doris::TPartitionId partition_id,
::doris::TTabletId tablet_id);

void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score,
uint64_t* max_base_rowset_delete_bitmap_score);

private:
// Add a tablet pointer to StorageEngine
// If force, drop the existing tablet add this new one
Expand Down
14 changes: 14 additions & 0 deletions be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,20 @@ void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
}
}

size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const {
DCHECK(start < end);
size_t count = 0;
std::shared_lock l(lock);
for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
auto& [k, bm] = *it;
if (k >= end) {
break;
}
count++;
}
return count;
}

void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
std::lock_guard l(lock);
auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap);
Expand Down
8 changes: 8 additions & 0 deletions be/src/olap/tablet_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,14 @@ class DeleteBitmap {
void subset(const BitmapKey& start, const BitmapKey& end,
DeleteBitmap* subset_delete_map) const;

/**
* Gets count of delete_bitmap with given range [start, end)
*
* @parma start start
* @parma end end
*/
size_t get_count_with_range(const BitmapKey& start, const BitmapKey& end) const;

/**
* Merges the given segment delete bitmap into *this
*
Expand Down
12 changes: 12 additions & 0 deletions regression-test/data/metrics_p0/test_delete_bitmap_metrics.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
0 0 8
1 1 1
2 2 2
3 3 3
4 4 4
5 5 5
6 6 6
7 7 7
8 8 8

Loading
Loading