From ed874b92038972003e25615d22ae6b905e874a7a Mon Sep 17 00:00:00 2001 From: Alexander Ponomarenko Date: Mon, 16 Dec 2024 14:13:10 +0000 Subject: [PATCH] [#24069] DocDB: Merge function update to take vector of indexes as input Summary: This diff continues work started at D39478. It makes Merge function able to take vector of indexes as input Jira:DB-12962 Test Plan: Jenkins Reviewers: sergei, arybochkin Reviewed By: arybochkin Subscribers: ybase Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D40718 --- src/yb/vector_index/index_merge-test.cc | 6 +++--- src/yb/vector_index/vectorann_util.h | 22 ++++++++++------------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/yb/vector_index/index_merge-test.cc b/src/yb/vector_index/index_merge-test.cc index 7dcc3084a9e2..8a01a63e1019 100644 --- a/src/yb/vector_index/index_merge-test.cc +++ b/src/yb/vector_index/index_merge-test.cc @@ -84,7 +84,7 @@ class IndexMergeTest : public YBTest { auto data_b = CreateAndFillIndex(index_factory, half_size, half_size); VectorIndexIfPtr merged_index = - ASSERT_RESULT(Merge(index_factory, data_a.index, data_b.index)); + ASSERT_RESULT(Merge(index_factory, {data_a.index, data_b.index})); // Check that the merged index contains all entries. auto result_a = ASSERT_RESULT(merged_index->Search(input_vectors_[0], 1)); @@ -112,8 +112,8 @@ class IndexMergeTest : public YBTest { // Generate indexes for the input set. auto data_a = CreateAndFillIndex(index_factory, 0, input_vectors_.size() / 2); - // Merge empty_index into data_a. - auto merged_index = ASSERT_RESULT(Merge(index_factory, data_a.index, empty_index)); + // Merge empty_index with data_a. + auto merged_index = ASSERT_RESULT(Merge(index_factory, {data_a.index, empty_index})); // Check that the merged index contains only the entries from data_a. auto all_results = ASSERT_RESULT(merged_index->Search( diff --git a/src/yb/vector_index/vectorann_util.h b/src/yb/vector_index/vectorann_util.h index d676df8c34fb..fc6fd1a3077d 100644 --- a/src/yb/vector_index/vectorann_util.h +++ b/src/yb/vector_index/vectorann_util.h @@ -120,23 +120,21 @@ std::vector> BruteForcePreciseNearestNeighbor template Result> Merge( VectorIndexFactory index_factory, - VectorIndexIfPtr index_a, - VectorIndexIfPtr index_b) { + const std::vector>& indexes) { VectorIndexIfPtr merged_index = index_factory(); - size_t max_vectors_a = index_a->MaxVectors(); - size_t max_vectors_b = index_b->MaxVectors(); + size_t total_max_vectors = 0; + for (const auto& index : indexes) { + total_max_vectors += index->MaxVectors(); + } RETURN_NOT_OK(merged_index->Reserve( - max_vectors_a + max_vectors_b, std::thread::hardware_concurrency(), - std::thread::hardware_concurrency())); - - for (const auto& [vertex_id, vector] : *index_a) { - RETURN_NOT_OK(merged_index->Insert(vertex_id, vector)); - } + total_max_vectors, std::thread::hardware_concurrency(), std::thread::hardware_concurrency())); - for (const auto& [vertex_id, vector] : *index_b) { - RETURN_NOT_OK(merged_index->Insert(vertex_id, vector)); + for (const auto& index : indexes) { + for (const auto& [vertex_id, vector] : *index) { + RETURN_NOT_OK(merged_index->Insert(vertex_id, vector)); + } } return merged_index;