Skip to content

Commit 6509c3b

Browse files
authored
[test](index compaction) Add index compaction full flow UT test (#45746)
1. Add index compaction full flow UT tests 2. Add index compaction performance test, disable by default.
1 parent 30ebe42 commit 6509c3b

File tree

5 files changed

+1442
-110
lines changed

5 files changed

+1442
-110
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <gmock/gmock.h>
19+
20+
#include <filesystem>
21+
#include <map>
22+
#include <string>
23+
24+
#include "olap/utils.h"
25+
#include "util/index_compaction_utils.cpp"
26+
27+
namespace doris {
28+
29+
using namespace doris::vectorized;
30+
31+
constexpr static uint32_t MAX_PATH_LEN = 1024;
32+
constexpr static std::string_view dest_dir = "./ut_dir/inverted_index_test";
33+
constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
34+
35+
class DISABLED_IndexCompactionPerformanceTest : public ::testing::Test {
36+
protected:
37+
void SetUp() override {
38+
// absolute dir
39+
char buffer[MAX_PATH_LEN];
40+
EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
41+
_current_dir = std::string(buffer);
42+
_absolute_dir = _current_dir + std::string(dest_dir);
43+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
44+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
45+
46+
// tmp dir
47+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
48+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
49+
std::vector<StorePath> paths;
50+
paths.emplace_back(std::string(tmp_dir), 1024000000);
51+
auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
52+
Status st = tmp_file_dirs->init();
53+
EXPECT_TRUE(st.ok()) << st.to_json();
54+
ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
55+
56+
// storage engine
57+
doris::EngineOptions options;
58+
auto engine = std::make_unique<StorageEngine>(options);
59+
_engine_ref = engine.get();
60+
_data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
61+
static_cast<void>(_data_dir->update_capacity());
62+
ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
63+
config::enable_segcompaction = false;
64+
config::string_type_length_soft_limit_bytes = 2147483643;
65+
config::inverted_index_dict_path =
66+
_current_dir + "/be/src/clucene/src/contribs-lib/CLucene/analysis/jieba/dict";
67+
}
68+
void TearDown() override {
69+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
70+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
71+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
72+
_engine_ref = nullptr;
73+
ExecEnv::GetInstance()->set_storage_engine(nullptr);
74+
}
75+
76+
DISABLED_IndexCompactionPerformanceTest() = default;
77+
~DISABLED_IndexCompactionPerformanceTest() override = default;
78+
79+
void _build_wiki_tablet(const KeysType& keys_type,
80+
const InvertedIndexStorageFormatPB& storage_format,
81+
const std::map<std::string, std::string>& properties) {
82+
// tablet_schema
83+
TabletSchemaPB schema_pb;
84+
schema_pb.set_keys_type(keys_type);
85+
schema_pb.set_inverted_index_storage_format(storage_format);
86+
87+
IndexCompactionUtils::construct_column(schema_pb.add_column(), 0, "STRING", "title");
88+
IndexCompactionUtils::construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
89+
"idx_content", 1, "STRING", "content", properties);
90+
IndexCompactionUtils::construct_column(schema_pb.add_column(), 2, "STRING", "redirect");
91+
IndexCompactionUtils::construct_column(schema_pb.add_column(), 3, "STRING", "namespace");
92+
if (keys_type == KeysType::UNIQUE_KEYS) {
93+
// unique table must contain the DELETE_SIGN column
94+
auto* column_pb = schema_pb.add_column();
95+
IndexCompactionUtils::construct_column(column_pb, 4, "TINYINT", DELETE_SIGN);
96+
column_pb->set_length(1);
97+
column_pb->set_index_length(1);
98+
column_pb->set_is_nullable(false);
99+
}
100+
_tablet_schema = std::make_shared<TabletSchema>();
101+
_tablet_schema->init_from_pb(schema_pb);
102+
103+
// tablet
104+
TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
105+
if (keys_type == KeysType::UNIQUE_KEYS) {
106+
tablet_meta->_enable_unique_key_merge_on_write = true;
107+
}
108+
109+
_tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get());
110+
EXPECT_TRUE(_tablet->init().ok());
111+
}
112+
113+
void _run_normal_wiki_test() {
114+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
115+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
116+
std::string data_dir =
117+
_current_dir + "/be/test/olap/rowset/segment_v2/inverted_index/data/performance";
118+
std::vector<std::string> data_files;
119+
for (const auto& entry : std::filesystem::directory_iterator(data_dir)) {
120+
if (entry.is_regular_file()) {
121+
std::string filename = entry.path().filename().string();
122+
if (filename.starts_with("wikipedia") && filename.ends_with(".json")) {
123+
std::cout << "Found file: " << filename << std::endl;
124+
data_files.push_back(entry.path().string());
125+
}
126+
}
127+
}
128+
129+
std::vector<RowsetSharedPtr> rowsets(data_files.size());
130+
auto custom_check_build_rowsets = [](const int32_t& size) { EXPECT_EQ(size, 1); };
131+
IndexCompactionUtils::build_rowsets<IndexCompactionUtils::WikiDataRow>(
132+
_data_dir, _tablet_schema, _tablet, _engine_ref, rowsets, data_files, _inc_id,
133+
custom_check_build_rowsets, true);
134+
135+
auto custom_check_index = [](const BaseCompaction& compaction,
136+
const RowsetWriterContext& ctx) {
137+
EXPECT_EQ(compaction._cur_tablet_schema->inverted_indexes().size(), 1);
138+
EXPECT_TRUE(ctx.columns_to_do_index_compaction.size() == 1);
139+
EXPECT_TRUE(ctx.columns_to_do_index_compaction.contains(1));
140+
EXPECT_TRUE(compaction._output_rowset->num_segments() == 1)
141+
<< compaction._output_rowset->num_segments();
142+
};
143+
144+
RowsetSharedPtr output_rowset_index;
145+
Status st;
146+
{
147+
OlapStopWatch watch;
148+
st = IndexCompactionUtils::do_compaction(rowsets, _engine_ref, _tablet, true,
149+
output_rowset_index, custom_check_index,
150+
10000000);
151+
std::cout << "index compaction time: " << watch.get_elapse_second() << "s" << std::endl;
152+
}
153+
EXPECT_TRUE(st.ok()) << st.to_string();
154+
155+
const auto& seg_path = output_rowset_index->segment_path(0);
156+
EXPECT_TRUE(seg_path.has_value()) << seg_path.error();
157+
auto inverted_index_file_reader_index = IndexCompactionUtils::init_index_file_reader(
158+
output_rowset_index, seg_path.value(),
159+
_tablet_schema->get_inverted_index_storage_format());
160+
161+
auto custom_check_normal = [](const BaseCompaction& compaction,
162+
const RowsetWriterContext& ctx) {
163+
EXPECT_EQ(compaction._cur_tablet_schema->inverted_indexes().size(), 1);
164+
EXPECT_TRUE(ctx.columns_to_do_index_compaction.size() == 0);
165+
EXPECT_TRUE(compaction._output_rowset->num_segments() == 1);
166+
};
167+
168+
RowsetSharedPtr output_rowset_normal;
169+
{
170+
OlapStopWatch watch;
171+
st = IndexCompactionUtils::do_compaction(rowsets, _engine_ref, _tablet, false,
172+
output_rowset_normal, custom_check_normal,
173+
10000000);
174+
std::cout << "normal compaction time: " << watch.get_elapse_second() << "s"
175+
<< std::endl;
176+
}
177+
EXPECT_TRUE(st.ok()) << st.to_string();
178+
const auto& seg_path_normal = output_rowset_normal->segment_path(0);
179+
EXPECT_TRUE(seg_path_normal.has_value()) << seg_path_normal.error();
180+
auto inverted_index_file_reader_normal = IndexCompactionUtils::init_index_file_reader(
181+
output_rowset_normal, seg_path_normal.value(),
182+
_tablet_schema->get_inverted_index_storage_format());
183+
184+
// check index file terms
185+
for (int idx = 10001; idx < 10002; idx++) {
186+
auto dir_idx = inverted_index_file_reader_index->_open(idx, "");
187+
EXPECT_TRUE(dir_idx.has_value()) << dir_idx.error();
188+
auto dir_normal = inverted_index_file_reader_normal->_open(idx, "");
189+
EXPECT_TRUE(dir_normal.has_value()) << dir_normal.error();
190+
st = IndexCompactionUtils::check_idx_file_correctness(dir_idx->get(),
191+
dir_normal->get());
192+
EXPECT_TRUE(st.ok()) << st.to_string();
193+
}
194+
}
195+
196+
private:
197+
TabletSchemaSPtr _tablet_schema = nullptr;
198+
StorageEngine* _engine_ref = nullptr;
199+
std::unique_ptr<DataDir> _data_dir = nullptr;
200+
TabletSharedPtr _tablet = nullptr;
201+
std::string _absolute_dir;
202+
std::string _current_dir;
203+
int64_t _inc_id = 1000;
204+
};
205+
206+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_dup_v2_english) {
207+
std::map<std::string, std::string> properties;
208+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_ENGLISH);
209+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
210+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
211+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
212+
_build_wiki_tablet(KeysType::DUP_KEYS, InvertedIndexStorageFormatPB::V2, properties);
213+
_run_normal_wiki_test();
214+
}
215+
216+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_dup_v2_unicode) {
217+
std::map<std::string, std::string> properties;
218+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_UNICODE);
219+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
220+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
221+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
222+
_build_wiki_tablet(KeysType::DUP_KEYS, InvertedIndexStorageFormatPB::V2, properties);
223+
_run_normal_wiki_test();
224+
}
225+
226+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_dup_v2_chinese) {
227+
std::map<std::string, std::string> properties;
228+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_CHINESE);
229+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
230+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
231+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
232+
_build_wiki_tablet(KeysType::DUP_KEYS, InvertedIndexStorageFormatPB::V2, properties);
233+
_run_normal_wiki_test();
234+
}
235+
236+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_mow_v2_english) {
237+
std::map<std::string, std::string> properties;
238+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_ENGLISH);
239+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
240+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
241+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
242+
_build_wiki_tablet(KeysType::UNIQUE_KEYS, InvertedIndexStorageFormatPB::V2, properties);
243+
_run_normal_wiki_test();
244+
}
245+
246+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_mow_v2_unicode) {
247+
std::map<std::string, std::string> properties;
248+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_UNICODE);
249+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
250+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
251+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
252+
_build_wiki_tablet(KeysType::UNIQUE_KEYS, InvertedIndexStorageFormatPB::V2, properties);
253+
_run_normal_wiki_test();
254+
}
255+
256+
TEST_F(DISABLED_IndexCompactionPerformanceTest, tes_wikipedia_mow_v2_chinese) {
257+
std::map<std::string, std::string> properties;
258+
properties.emplace(INVERTED_INDEX_PARSER_KEY, INVERTED_INDEX_PARSER_CHINESE);
259+
properties.emplace(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY,
260+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES);
261+
properties.emplace(INVERTED_INDEX_PARSER_LOWERCASE_KEY, INVERTED_INDEX_PARSER_TRUE);
262+
_build_wiki_tablet(KeysType::UNIQUE_KEYS, InvertedIndexStorageFormatPB::V2, properties);
263+
_run_normal_wiki_test();
264+
}
265+
} // namespace doris

0 commit comments

Comments
 (0)