From 8373a2e1aa276083d3d6c0bf62ff98dfec1c2120 Mon Sep 17 00:00:00 2001 From: huanghaibin Date: Thu, 6 Feb 2025 10:25:54 +0800 Subject: [PATCH] [improve](cloud-mow)Add delete bitmap metrics (#47028) Count the max delete bitmap of tablet and base rowset, adn put them to be metrics. --- be/src/cloud/cloud_storage_engine.cpp | 28 +++ be/src/cloud/cloud_storage_engine.h | 1 + be/src/cloud/cloud_tablet_mgr.cpp | 46 ++++ be/src/cloud/cloud_tablet_mgr.h | 3 + be/src/common/config.cpp | 4 + be/src/common/config.h | 4 + be/src/olap/base_tablet.cpp | 34 +++ be/src/olap/base_tablet.h | 4 + be/src/olap/olap_server.cpp | 28 +++ be/src/olap/storage_engine.cpp | 5 + be/src/olap/storage_engine.h | 9 + be/src/olap/tablet_manager.cpp | 43 ++++ be/src/olap/tablet_manager.h | 3 + be/src/olap/tablet_meta.cpp | 14 ++ be/src/olap/tablet_meta.h | 8 + .../metrics_p0/test_delete_bitmap_metrics.out | 12 + .../test_delete_bitmap_metrics.groovy | 230 ++++++++++++++++++ 17 files changed, 476 insertions(+) create mode 100644 regression-test/data/metrics_p0/test_delete_bitmap_metrics.out create mode 100644 regression-test/suites/metrics_p0/test_delete_bitmap_metrics.groovy diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index f71277d5983525..6c86c40e50351d 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -302,6 +302,12 @@ Status CloudStorageEngine::start_bg_threads(std::shared_ptr wg_sp LOG(INFO) << "lease compaction thread started"; + RETURN_IF_ERROR(Thread::create( + "StorageEngine", "check_tablet_delete_bitmap_score_thread", + [this]() { this->_check_tablet_delete_bitmap_score_callback(); }, + &_bg_threads.emplace_back())); + LOG(INFO) << "check tablet delete bitmap score thread started"; + return Status::OK(); } @@ -796,6 +802,28 @@ void CloudStorageEngine::_lease_compaction_thread_callback() { } } +void CloudStorageEngine::_check_tablet_delete_bitmap_score_callback() { + LOG(INFO) << "try to start check tablet delete bitmap score!"; + while (!_stop_background_threads_latch.wait_for( + std::chrono::seconds(config::check_tablet_delete_bitmap_interval_seconds))) { + if (!config::enable_check_tablet_delete_bitmap_score) { + return; + } + uint64_t max_delete_bitmap_score = 0; + uint64_t max_base_rowset_delete_bitmap_score = 0; + std::vector tablets; + tablet_mgr().get_topn_tablet_delete_bitmap_score(&max_delete_bitmap_score, + &max_base_rowset_delete_bitmap_score); + if (max_delete_bitmap_score > 0) { + _tablet_max_delete_bitmap_score_metrics->set_value(max_delete_bitmap_score); + } + if (max_base_rowset_delete_bitmap_score > 0) { + _tablet_max_base_rowset_delete_bitmap_score_metrics->set_value( + max_base_rowset_delete_bitmap_score); + } + } +} + Status CloudStorageEngine::get_compaction_status_json(std::string* result) { rapidjson::Document root; root.SetObject(); diff --git a/be/src/cloud/cloud_storage_engine.h b/be/src/cloud/cloud_storage_engine.h index 34bde2e75f77b1..7d8e68c5f79906 100644 --- a/be/src/cloud/cloud_storage_engine.h +++ b/be/src/cloud/cloud_storage_engine.h @@ -155,6 +155,7 @@ class CloudStorageEngine final : public BaseStorageEngine { Status _submit_cumulative_compaction_task(const CloudTabletSPtr& tablet); Status _submit_full_compaction_task(const CloudTabletSPtr& tablet); void _lease_compaction_thread_callback(); + void _check_tablet_delete_bitmap_score_callback(); std::atomic_bool _stopped {false}; diff --git a/be/src/cloud/cloud_tablet_mgr.cpp b/be/src/cloud/cloud_tablet_mgr.cpp index d597ccc42a115f..04a1c33d5c385f 100644 --- a/be/src/cloud/cloud_tablet_mgr.cpp +++ b/be/src/cloud/cloud_tablet_mgr.cpp @@ -421,4 +421,50 @@ void CloudTabletMgr::get_tablet_info(int64_t num_tablets, std::vector, int64_t>> buf; + buf.reserve(n + 1); + auto handler = [&](const std::weak_ptr& tablet_wk) { + auto t = tablet_wk.lock(); + if (!t) return; + uint64_t delete_bitmap_count = + t.get()->tablet_meta()->delete_bitmap().get_delete_bitmap_count(); + total_delete_map_count += delete_bitmap_count; + if (delete_bitmap_count > *max_delete_bitmap_score) { + max_delete_bitmap_score_tablet_id = t->tablet_id(); + *max_delete_bitmap_score = delete_bitmap_count; + } + buf.emplace_back(std::move(t), delete_bitmap_count); + std::sort(buf.begin(), buf.end(), [](auto& a, auto& b) { return a.second > b.second; }); + if (buf.size() > n) { + buf.pop_back(); + } + }; + auto weak_tablets = get_weak_tablets(); + std::for_each(weak_tablets.begin(), weak_tablets.end(), handler); + for (auto& [t, _] : buf) { + t->get_base_rowset_delete_bitmap_count(max_base_rowset_delete_bitmap_score, + &max_base_rowset_delete_bitmap_score_tablet_id); + } + std::stringstream ss; + for (auto& i : buf) { + ss << i.first->tablet_id() << ":" << i.second << ","; + } + LOG(INFO) << "get_topn_tablet_delete_bitmap_score, n=" << n + << ",tablet size=" << weak_tablets.size() + << ",total_delete_map_count=" << total_delete_map_count + << ",cost(us)=" << watch.get_elapse_time_us() + << ",max_delete_bitmap_score=" << *max_delete_bitmap_score + << ",max_delete_bitmap_score_tablet_id=" << max_delete_bitmap_score_tablet_id + << ",max_base_rowset_delete_bitmap_score=" << *max_base_rowset_delete_bitmap_score + << ",max_base_rowset_delete_bitmap_score_tablet_id=" + << max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" << ss.str() << "]"; +} + } // namespace doris diff --git a/be/src/cloud/cloud_tablet_mgr.h b/be/src/cloud/cloud_tablet_mgr.h index cbbd119a36b532..409c2eb35a0854 100644 --- a/be/src/cloud/cloud_tablet_mgr.h +++ b/be/src/cloud/cloud_tablet_mgr.h @@ -82,6 +82,9 @@ class CloudTabletMgr { void get_tablet_info(int64_t num_tablets, std::vector* tablets_info); + void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score, + uint64_t* max_base_rowset_delete_bitmap_score); + private: CloudStorageEngine& _engine; diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 9ffcef4fdab080..2d133dac402d89 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1412,6 +1412,10 @@ DEFINE_mBool(enable_sleep_between_delete_cumu_compaction, "false"); DEFINE_mInt32(compaction_num_per_round, "1"); +DEFINE_mInt32(check_tablet_delete_bitmap_interval_seconds, "300"); +DEFINE_mInt32(check_tablet_delete_bitmap_score_top_n, "10"); +DEFINE_mBool(enable_check_tablet_delete_bitmap_score, "true"); + // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index 8e19975823782c..35475e12ff67b9 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1498,6 +1498,10 @@ DECLARE_mBool(enable_sleep_between_delete_cumu_compaction); DECLARE_mInt32(compaction_num_per_round); +DECLARE_mInt32(check_tablet_delete_bitmap_interval_seconds); +DECLARE_mInt32(check_tablet_delete_bitmap_score_top_n); +DECLARE_mBool(enable_check_tablet_delete_bitmap_score); + #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 0fb7154c3cdda6..4b11c43007d11d 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -1876,4 +1876,38 @@ Status BaseTablet::show_nested_index_file(std::string* json_meta) { return Status::OK(); } +void BaseTablet::get_base_rowset_delete_bitmap_count( + uint64_t* max_base_rowset_delete_bitmap_score, + int64_t* max_base_rowset_delete_bitmap_score_tablet_id) { + std::vector rowsets_; + std::string base_rowset_id_str; + { + std::shared_lock rowset_ldlock(this->get_header_lock()); + for (const auto& it : _rs_version_map) { + rowsets_.emplace_back(it.second); + } + } + std::sort(rowsets_.begin(), rowsets_.end(), Rowset::comparator); + if (!rowsets_.empty()) { + bool base_found = false; + for (auto& rowset : rowsets_) { + if (rowset->start_version() > 2) { + break; + } + base_found = true; + uint64_t base_rowset_delete_bitmap_count = + this->tablet_meta()->delete_bitmap().get_count_with_range( + {rowset->rowset_id(), 0, 0}, + {rowset->rowset_id(), UINT32_MAX, UINT64_MAX}); + if (base_rowset_delete_bitmap_count > *max_base_rowset_delete_bitmap_score) { + *max_base_rowset_delete_bitmap_score = base_rowset_delete_bitmap_count; + *max_base_rowset_delete_bitmap_score_tablet_id = this->tablet_id(); + } + } + if (!base_found) { + LOG(WARNING) << "can not found base rowset for tablet " << tablet_id(); + } + } +} + } // namespace doris diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 9c6b63a00fe5bb..dbe5d400022352 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -304,6 +304,10 @@ class BaseTablet { TabletUid tablet_uid() const { return _tablet_meta->tablet_uid(); } TabletInfo get_tablet_info() const { return TabletInfo(tablet_id(), tablet_uid()); } + void get_base_rowset_delete_bitmap_count( + uint64_t* max_base_rowset_delete_bitmap_score, + int64_t* max_base_rowset_delete_bitmap_score_tablet_id); + protected: // Find the missed versions until the spec_version. // diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 2b77dc7e7a6f8c..9a99ee3e246ad6 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -381,6 +381,12 @@ Status StorageEngine::start_bg_threads(std::shared_ptr wg_sptr) { [this]() { this->_async_publish_callback(); }, &_async_publish_thread)); LOG(INFO) << "async publish thread started"; + RETURN_IF_ERROR(Thread::create( + "StorageEngine", "check_tablet_delete_bitmap_score_thread", + [this]() { this->_check_tablet_delete_bitmap_score_callback(); }, + &_check_delete_bitmap_score_thread)); + LOG(INFO) << "check tablet delete bitmap score thread started"; + LOG(INFO) << "all storage engine's background threads are started."; return Status::OK(); } @@ -1642,4 +1648,26 @@ void StorageEngine::_async_publish_callback() { } } +void StorageEngine::_check_tablet_delete_bitmap_score_callback() { + LOG(INFO) << "try to start check tablet delete bitmap score!"; + while (!_stop_background_threads_latch.wait_for( + std::chrono::seconds(config::check_tablet_delete_bitmap_interval_seconds))) { + if (!config::enable_check_tablet_delete_bitmap_score) { + return; + } + uint64_t max_delete_bitmap_score = 0; + uint64_t max_base_rowset_delete_bitmap_score = 0; + std::vector tablets; + _tablet_manager.get()->get_topn_tablet_delete_bitmap_score( + &max_delete_bitmap_score, &max_base_rowset_delete_bitmap_score); + if (max_delete_bitmap_score > 0) { + _tablet_max_delete_bitmap_score_metrics->set_value(max_delete_bitmap_score); + } + if (max_base_rowset_delete_bitmap_score > 0) { + _tablet_max_base_rowset_delete_bitmap_score_metrics->set_value( + max_base_rowset_delete_bitmap_score); + } + } +} + } // namespace doris diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 24cda8232f115c..3ba65813492802 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -103,6 +103,10 @@ BaseStorageEngine::BaseStorageEngine(Type type, const UniqueId& backend_uid) _stop_background_threads_latch(1) { _memory_limitation_bytes_for_schema_change = static_cast(MemInfo::soft_mem_limit() * config::schema_change_mem_limit_frac); + _tablet_max_delete_bitmap_score_metrics = + std::make_shared>("tablet_max", "delete_bitmap_score", 0); + _tablet_max_base_rowset_delete_bitmap_score_metrics = std::make_shared>( + "tablet_max_base_rowset", "delete_bitmap_score", 0); } BaseStorageEngine::~BaseStorageEngine() = default; @@ -706,6 +710,7 @@ void StorageEngine::stop() { THREAD_JOIN(_async_publish_thread); THREAD_JOIN(_cold_data_compaction_producer_thread); THREAD_JOIN(_cooldown_tasks_producer_thread); + THREAD_JOIN(_check_delete_bitmap_score_thread); #undef THREAD_JOIN #define THREADS_JOIN(threads) \ diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index a22015898988b3..e180f181a15730 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include #include @@ -168,6 +169,9 @@ class BaseStorageEngine { int _disk_num {-1}; std::shared_ptr _stream_load_recorder; + + std::shared_ptr> _tablet_max_delete_bitmap_score_metrics; + std::shared_ptr> _tablet_max_base_rowset_delete_bitmap_score_metrics; }; class CompactionSubmitRegistry { @@ -430,6 +434,8 @@ class StorageEngine final : public BaseStorageEngine { int32_t _auto_get_interval_by_disk_capacity(DataDir* data_dir); + void _check_tablet_delete_bitmap_score_callback(); + private: EngineOptions _options; std::mutex _store_lock; @@ -536,6 +542,9 @@ class StorageEngine final : public BaseStorageEngine { std::unique_ptr _create_tablet_idx_lru_cache; std::unique_ptr _snapshot_mgr; + + // thread to check tablet delete bitmap count tasks + scoped_refptr _check_delete_bitmap_score_thread; }; // lru cache for create tabelt round robin in disks diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 18e317cb12d1e0..f3283ae0d93c8a 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -1764,4 +1764,47 @@ bool TabletManager::update_tablet_partition_id(::doris::TPartitionId partition_i return true; } +void TabletManager::get_topn_tablet_delete_bitmap_score( + uint64_t* max_delete_bitmap_score, uint64_t* max_base_rowset_delete_bitmap_score) { + int64_t max_delete_bitmap_score_tablet_id = 0; + int64_t max_base_rowset_delete_bitmap_score_tablet_id = 0; + OlapStopWatch watch; + uint64_t total_delete_map_count = 0; + int n = config::check_tablet_delete_bitmap_score_top_n; + std::vector, int64_t>> buf; + buf.reserve(n + 1); + auto handler = [&](const TabletSharedPtr& tablet) { + uint64_t delete_bitmap_count = + tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count(); + total_delete_map_count += delete_bitmap_count; + if (delete_bitmap_count > *max_delete_bitmap_score) { + max_delete_bitmap_score_tablet_id = tablet->tablet_id(); + *max_delete_bitmap_score = delete_bitmap_count; + } + buf.emplace_back(std::move(tablet), delete_bitmap_count); + std::sort(buf.begin(), buf.end(), [](auto& a, auto& b) { return a.second > b.second; }); + if (buf.size() > n) { + buf.pop_back(); + } + }; + for_each_tablet(handler, filter_all_tablets); + for (auto& [t, _] : buf) { + t->get_base_rowset_delete_bitmap_count(max_base_rowset_delete_bitmap_score, + &max_base_rowset_delete_bitmap_score_tablet_id); + } + std::stringstream ss; + for (auto& i : buf) { + ss << i.first->tablet_id() << ":" << i.second << ","; + } + LOG(INFO) << "get_topn_tablet_delete_bitmap_score, n=" << n + << ",tablet size=" << _tablets_shards.size() + << ",total_delete_map_count=" << total_delete_map_count + << ",cost(us)=" << watch.get_elapse_time_us() + << ",max_delete_bitmap_score=" << *max_delete_bitmap_score + << ",max_delete_bitmap_score_tablet_id=" << max_delete_bitmap_score_tablet_id + << ",max_base_rowset_delete_bitmap_score=" << *max_base_rowset_delete_bitmap_score + << ",max_base_rowset_delete_bitmap_score_tablet_id=" + << max_base_rowset_delete_bitmap_score_tablet_id << ",tablets=[" << ss.str() << "]"; +} + } // end namespace doris diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 6b6e7998f9cee1..b7262fe56b20ff 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -178,6 +178,9 @@ class TabletManager { bool update_tablet_partition_id(::doris::TPartitionId partition_id, ::doris::TTabletId tablet_id); + void get_topn_tablet_delete_bitmap_score(uint64_t* max_delete_bitmap_score, + uint64_t* max_base_rowset_delete_bitmap_score); + private: // Add a tablet pointer to StorageEngine // If force, drop the existing tablet add this new one diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index e2f17730973b8d..6889d7c8dc6dd5 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -1186,6 +1186,20 @@ void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end, } } +size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const { + DCHECK(start < end); + size_t count = 0; + std::shared_lock l(lock); + for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { + auto& [k, bm] = *it; + if (k >= end) { + break; + } + count++; + } + return count; +} + void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { std::lock_guard l(lock); auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap); diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 0cbc106c135610..e65cfb5ca86142 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -501,6 +501,14 @@ class DeleteBitmap { void subset(const BitmapKey& start, const BitmapKey& end, DeleteBitmap* subset_delete_map) const; + /** + * Gets count of delete_bitmap with given range [start, end) + * + * @parma start start + * @parma end end + */ + size_t get_count_with_range(const BitmapKey& start, const BitmapKey& end) const; + /** * Merges the given segment delete bitmap into *this * diff --git a/regression-test/data/metrics_p0/test_delete_bitmap_metrics.out b/regression-test/data/metrics_p0/test_delete_bitmap_metrics.out new file mode 100644 index 00000000000000..98d6fa9fdb8b76 --- /dev/null +++ b/regression-test/data/metrics_p0/test_delete_bitmap_metrics.out @@ -0,0 +1,12 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +0 0 8 +1 1 1 +2 2 2 +3 3 3 +4 4 4 +5 5 5 +6 6 6 +7 7 7 +8 8 8 + diff --git a/regression-test/suites/metrics_p0/test_delete_bitmap_metrics.groovy b/regression-test/suites/metrics_p0/test_delete_bitmap_metrics.groovy new file mode 100644 index 00000000000000..9d0e9b2956b861 --- /dev/null +++ b/regression-test/suites/metrics_p0/test_delete_bitmap_metrics.groovy @@ -0,0 +1,230 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_delete_bitmap_metrics", "p0") { + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + def backendId_to_params = [string: [:]] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def set_be_param = { paramName, paramValue -> + // for eache be node, set paramName=paramValue + for (String id in backendId_to_backendIP.keySet()) { + def beIp = backendId_to_backendIP.get(id) + def bePort = backendId_to_backendHttpPort.get(id) + def (code, out, err) = curl("POST", String.format("http://%s:%s/api/update_config?%s=%s", beIp, bePort, paramName, paramValue)) + assertTrue(out.contains("OK")) + } + } + + def reset_be_param = { paramName -> + // for eache be node, reset paramName to default + for (String id in backendId_to_backendIP.keySet()) { + def beIp = backendId_to_backendIP.get(id) + def bePort = backendId_to_backendHttpPort.get(id) + def original_value = backendId_to_params.get(id).get(paramName) + def (code, out, err) = curl("POST", String.format("http://%s:%s/api/update_config?%s=%s", beIp, bePort, paramName, original_value)) + assertTrue(out.contains("OK")) + } + } + + def get_be_param = { paramName -> + // for eache be node, get param value by default + def paramValue = "" + for (String id in backendId_to_backendIP.keySet()) { + def beIp = backendId_to_backendIP.get(id) + def bePort = backendId_to_backendHttpPort.get(id) + // get the config value from be + def (code, out, err) = curl("GET", String.format("http://%s:%s/api/show_config?conf_item=%s", beIp, bePort, paramName)) + assertTrue(code == 0) + assertTrue(out.contains(paramName)) + // parsing + def resultList = parseJson(out)[0] + assertTrue(resultList.size() == 4) + // get original value + paramValue = resultList[2] + backendId_to_params.get(id, [:]).put(paramName, paramValue) + } + } + + def getLocalDeleteBitmapStatus = { be_host, be_http_port, tablet_id -> + boolean running = true + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET http://${be_host}:${be_http_port}") + sb.append("/api/delete_bitmap/count_local?tablet_id=") + sb.append(tablet_id) + + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + out = process.getText() + logger.info("Get local delete bitmap count status: =" + code + ", out=" + out) + assertEquals(code, 0) + def deleteBitmapStatus = parseJson(out.trim()) + return deleteBitmapStatus + } + + def getMSDeleteBitmapStatus = { be_host, be_http_port, tablet_id -> + boolean running = true + StringBuilder sb = new StringBuilder(); + sb.append("curl -X GET http://${be_host}:${be_http_port}") + sb.append("/api/delete_bitmap/count_ms?tablet_id=") + sb.append(tablet_id) + + String command = sb.toString() + logger.info(command) + process = command.execute() + code = process.waitFor() + out = process.getText() + logger.info("Get ms delete bitmap count status: =" + code + ", out=" + out) + assertEquals(code, 0) + def deleteBitmapStatus = parseJson(out.trim()) + return deleteBitmapStatus + } + + String[][] backends = sql """ show backends """ + assertTrue(backends.size() > 0) + String backendId; + def backendIdToBackendIP = [:] + def backendIdToBackendBrpcPort = [:] + for (String[] backend in backends) { + if (backend[9].equals("true")) { + backendIdToBackendIP.put(backend[0], backend[1]) + backendIdToBackendBrpcPort.put(backend[0], backend[5]) + } + } + backendId = backendIdToBackendIP.keySet()[0] + def getMetricsMethod = { check_func -> + httpTest { + endpoint backendIdToBackendIP.get(backendId) + ":" + backendIdToBackendBrpcPort.get(backendId) + uri "/brpc_metrics" + op "get" + check check_func + } + } + + def testTable = "test_delete_bitmap_metrics" + def timeout = 10000 + sql """ DROP TABLE IF EXISTS ${testTable}""" + def testTableDDL = """ + create table ${testTable} + ( + `plan_id` bigint(20) NOT NULL, + `target_id` int(20) NOT NULL, + `target_name` varchar(255) NOT NULL + ) + ENGINE=OLAP + UNIQUE KEY(`plan_id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`plan_id`) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + sql testTableDDL + get_be_param("check_tablet_delete_bitmap_interval_seconds") + try { + set_be_param("check_tablet_delete_bitmap_interval_seconds", "10") + sql "sync" + + sql "sync" + sql """ INSERT INTO ${testTable} VALUES (0,0,'1'),(1,1,'1'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'2'),(2,2,'2'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'3'),(3,3,'3'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'4'),(4,4,'4'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'5'),(5,5,'5'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'6'),(6,6,'6'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'7'),(7,7,'7'); """ + sql """ INSERT INTO ${testTable} VALUES (0,0,'8'),(8,8,'8'); """ + + qt_sql "select * from ${testTable} order by plan_id" + + + def tablets = sql_return_maparray """ show tablets from ${testTable}; """ + logger.info("tablets: " + tablets) + def local_delete_bitmap_count = 0 + def ms_delete_bitmap_count = 0 + def local_delete_bitmap_cardinality = 0; + def ms_delete_bitmap_cardinality = 0; + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + def tablet_info = sql_return_maparray """ show tablet ${tablet_id}; """ + logger.info("tablet: " + tablet_info) + String trigger_backend_id = tablet.BackendId + + // before compaction, delete_bitmap_count is (rowsets num - 1) + local_delete_bitmap_count = getLocalDeleteBitmapStatus(backendId_to_backendIP[trigger_backend_id], backendId_to_backendHttpPort[trigger_backend_id], tablet_id).delete_bitmap_count + local_delete_bitmap_cardinality = getLocalDeleteBitmapStatus(backendId_to_backendIP[trigger_backend_id], backendId_to_backendHttpPort[trigger_backend_id], tablet_id).cardinality + logger.info("local_delete_bitmap_count:" + local_delete_bitmap_count) + logger.info("local_delete_bitmap_cardinality:" + local_delete_bitmap_cardinality) + assertTrue(local_delete_bitmap_count == 7) + assertTrue(local_delete_bitmap_cardinality == 7) + + if (isCloudMode()) { + ms_delete_bitmap_count = getMSDeleteBitmapStatus(backendId_to_backendIP[trigger_backend_id], backendId_to_backendHttpPort[trigger_backend_id], tablet_id).delete_bitmap_count + ms_delete_bitmap_cardinality = getMSDeleteBitmapStatus(backendId_to_backendIP[trigger_backend_id], backendId_to_backendHttpPort[trigger_backend_id], tablet_id).cardinality + logger.info("ms_delete_bitmap_count:" + ms_delete_bitmap_count) + logger.info("ms_delete_bitmap_cardinality:" + ms_delete_bitmap_cardinality) + assertTrue(ms_delete_bitmap_count == 7) + assertTrue(ms_delete_bitmap_cardinality == 7) + } + def tablet_delete_bitmap_count = 0; + def base_rowset_delete_bitmap_count = 0; + int retry_time = 0; + while (retry_time < 10) { + log.info("retry_time: ${retry_time}") + getMetricsMethod.call() { + respCode, body -> + logger.info("test get delete bitmap count resp Code {}", "${respCode}".toString()) + assertEquals("${respCode}".toString(), "200") + String out = "${body}".toString() + def strs = out.split('\n') + for (String line in strs) { + if (line.startsWith("tablet_max_delete_bitmap_score")) { + logger.info("find: {}", line) + tablet_delete_bitmap_count = line.replaceAll("tablet_max_delete_bitmap_score ", "").toInteger() + break + } + } + for (String line in strs) { + if (line.startsWith("tablet_max_base_rowset_delete_bitmap_score")) { + logger.info("find: {}", line) + base_rowset_delete_bitmap_count = line.replaceAll("tablet_max_base_rowset_delete_bitmap_score ", "").toInteger() + break + } + } + } + if (tablet_delete_bitmap_count > 0 && base_rowset_delete_bitmap_count > 0) { + break; + } else { + Thread.sleep(10000) + retry_time++; + } + } + assertTrue(tablet_delete_bitmap_count > 0) + assertTrue(base_rowset_delete_bitmap_count > 0) + } + } finally { + reset_be_param("check_tablet_delete_bitmap_interval_seconds") + } + +}