Skip to content

Commit

Permalink
[#24069] DocDB: Merge function update to take vector of indexes as input
Browse files Browse the repository at this point in the history
Summary:
This diff continues work started at D39478.
It makes Merge function able to take vector of indexes as input

Jira:DB-12962

Test Plan: Jenkins

Reviewers: sergei, arybochkin

Reviewed By: arybochkin

Subscribers: ybase

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D40718
  • Loading branch information
aponom84 committed Dec 16, 2024
1 parent 4c3b9e2 commit ed874b9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
6 changes: 3 additions & 3 deletions src/yb/vector_index/index_merge-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class IndexMergeTest : public YBTest {
auto data_b = CreateAndFillIndex(index_factory, half_size, half_size);

VectorIndexIfPtr<FloatVector, float> merged_index =
ASSERT_RESULT(Merge(index_factory, data_a.index, data_b.index));
ASSERT_RESULT(Merge(index_factory, {data_a.index, data_b.index}));

// Check that the merged index contains all entries.
auto result_a = ASSERT_RESULT(merged_index->Search(input_vectors_[0], 1));
Expand Down Expand Up @@ -112,8 +112,8 @@ class IndexMergeTest : public YBTest {
// Generate indexes for the input set.
auto data_a = CreateAndFillIndex(index_factory, 0, input_vectors_.size() / 2);

// Merge empty_index into data_a.
auto merged_index = ASSERT_RESULT(Merge(index_factory, data_a.index, empty_index));
// Merge empty_index with data_a.
auto merged_index = ASSERT_RESULT(Merge(index_factory, {data_a.index, empty_index}));

// Check that the merged index contains only the entries from data_a.
auto all_results = ASSERT_RESULT(merged_index->Search(
Expand Down
22 changes: 10 additions & 12 deletions src/yb/vector_index/vectorann_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,21 @@ std::vector<VertexWithDistance<DistanceResult>> BruteForcePreciseNearestNeighbor
template <IndexableVectorType Vector, ValidDistanceResultType DistanceResult>
Result<VectorIndexIfPtr<Vector, DistanceResult>> Merge(
VectorIndexFactory<Vector, DistanceResult> index_factory,
VectorIndexIfPtr<Vector, DistanceResult> index_a,
VectorIndexIfPtr<Vector, DistanceResult> index_b) {
const std::vector<VectorIndexIfPtr<Vector, DistanceResult>>& indexes) {
VectorIndexIfPtr<Vector, DistanceResult> merged_index = index_factory();

size_t max_vectors_a = index_a->MaxVectors();
size_t max_vectors_b = index_b->MaxVectors();
size_t total_max_vectors = 0;
for (const auto& index : indexes) {
total_max_vectors += index->MaxVectors();
}

RETURN_NOT_OK(merged_index->Reserve(
max_vectors_a + max_vectors_b, std::thread::hardware_concurrency(),
std::thread::hardware_concurrency()));

for (const auto& [vertex_id, vector] : *index_a) {
RETURN_NOT_OK(merged_index->Insert(vertex_id, vector));
}
total_max_vectors, std::thread::hardware_concurrency(), std::thread::hardware_concurrency()));

for (const auto& [vertex_id, vector] : *index_b) {
RETURN_NOT_OK(merged_index->Insert(vertex_id, vector));
for (const auto& index : indexes) {
for (const auto& [vertex_id, vector] : *index) {
RETURN_NOT_OK(merged_index->Insert(vertex_id, vector));
}
}

return merged_index;
Expand Down

0 comments on commit ed874b9

Please sign in to comment.