diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 61ca1fa2d..173f1b086 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -271,8 +271,10 @@ class HNSWIndex : public VecSimIndexAbstract, bool isMarkedDeleted(idType internalId) const; bool isInProcess(idType internalId) const; void unmarkInProcess(idType internalId); - HNSWAddVectorState storeNewElement(labelType label, const void *vector_data); + HNSWAddVectorState storeNewElement(labelType label, const void *vector_data, + idType newElementId); void removeAndSwapMarkDeletedElement(idType internalId); + void removeIncomingEdgesAndDelete(idType deletedId); void repairNodeConnections(idType node_id, size_t level); // For prefetching only. const ElementMetaData *getMetaDataAddress(idType internal_id) const { @@ -283,6 +285,7 @@ class HNSWIndex : public VecSimIndexAbstract, void insertElementToGraph(idType element_id, size_t element_max_level, idType entry_point, size_t global_max_level, const void *vector_data); void removeVectorInPlace(idType id); + idType getNewElementId(); /*************************** Labels lookup API ***************************/ /* Virtual functions that access the label lookup which is implemented in the derived classes */ @@ -356,6 +359,11 @@ size_t HNSWIndex::indexSize() const { return this->curElementCount; } +template +idType HNSWIndex::getNewElementId() { + return this->curElementCount++; +} + template size_t HNSWIndex::indexCapacity() const { return this->maxElements; @@ -1628,11 +1636,11 @@ HNSWIndex::~HNSWIndex() { */ template -void HNSWIndex::removeAndSwap(idType internalId) { +void HNSWIndex::removeIncomingEdgesAndDelete(idType deletedId) { // Sanity check - the id to remove cannot be the entry point, as it should have been replaced // upon marking it as deleted. - assert(entrypointNode != internalId); - auto element = getGraphDataByInternalId(internalId); + assert(entrypointNode != deletedId); + auto element = getGraphDataByInternalId(deletedId); // Remove the deleted id form the relevant incoming edges sets in which it appears. for (size_t level = 0; level <= element->toplevel; level++) { @@ -1644,17 +1652,27 @@ void HNSWIndex::removeAndSwap(idType internalId) { // (we know we will get here and remove this deleted id permanently). // However, upon asynchronous delete, this should always succeed since we do update // the incoming edges in the mutual update even for deleted elements. - bool res = neighbour.removeIncomingUnidirectionalEdgeIfExists(internalId); + + lockNodeLinks(cur_level.getLinkAtPos(i)); + bool res = neighbour.removeIncomingUnidirectionalEdgeIfExists(deletedId); + unlockNodeLinks(cur_level.getLinkAtPos(i)); + // Assert the logical condition of: is_marked_deleted(id) => res==True. (void)res; - assert((!isMarkedDeleted(internalId) || res) && "The edge should be in the incoming " - "unidirectional edges"); + assert((!isMarkedDeleted(deletedId) || res) && "The edge should be in the incoming " + "unidirectional edges"); } } // Free the element's resources element->destroy(this->levelDataSize, this->allocator); + --numMarkedDeleted; +} + +template +void HNSWIndex::removeAndSwap(idType internalId) { + removeIncomingEdgesAndDelete(internalId); // We can say now that the element has removed completely from index. --curElementCount; @@ -1749,15 +1767,13 @@ void HNSWIndex::removeVectorInPlace(const idType element_int // scenario, the index data guard should be held by the caller (exclusive lock). template HNSWAddVectorState HNSWIndex::storeNewElement(labelType label, - const void *vector_data) { - HNSWAddVectorState state{}; + const void *vector_data, + idType newElementId) { + HNSWAddVectorState state(newElementId = newElementId); // Choose randomly the maximum level in which the new element will be in the index. state.elementMaxLevel = getRandomLevel(mult); - // Access and update the index global data structures with the new element meta-data. - state.newElementId = curElementCount++; - // Create the new element's graph metadata. // We must assign manually enough memory on the stack and not just declare an `ElementGraphData` // variable, since it has a flexible array member. @@ -1814,7 +1830,7 @@ HNSWAddVectorState HNSWIndex::storeVector(const void *vector HNSWAddVectorState state{}; this->lockIndexDataGuard(); - state = storeNewElement(label, vector_data); + state = storeNewElement(label, vector_data, this->getNewElementId()); if (state.currMaxLevel >= state.elementMaxLevel) { this->unlockIndexDataGuard(); } diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h index f868cd16d..c6a76663b 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h @@ -129,6 +129,8 @@ class TieredHNSWIndex : public VecSimTieredIndex { // Handle deletion of vector inplace considering that async deletion might occurred beforehand. int deleteLabelFromHNSWInplace(labelType label); + idType getNewElementId(); + #ifdef BUILD_TESTS #include "VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h" #endif @@ -443,7 +445,8 @@ void TieredHNSWIndex::insertVectorToHNSW( // graph scans will not occur, as they will try access the entry point's neighbors. // If an index resize is still needed, `storeNewElement` will perform it. This is OK since // we hold the main index lock for exclusive access. - auto state = hnsw_index->storeNewElement(label, processed_storage_blob); + auto state = + hnsw_index->storeNewElement(label, processed_storage_blob, this->getNewElementId()); if constexpr (releaseFlatGuard) { this->flatIndexGuard.unlock_shared(); } @@ -468,7 +471,8 @@ void TieredHNSWIndex::insertVectorToHNSW( // graph scans will not occur, as they will try access the entry point's neighbors. // At this point we are certain that the index has enough capacity for the new element, and // this call will not resize the index. - auto state = hnsw_index->storeNewElement(label, processed_storage_blob); + auto state = + hnsw_index->storeNewElement(label, processed_storage_blob, this->getNewElementId()); if constexpr (releaseFlatGuard) { this->flatIndexGuard.unlock_shared(); } @@ -518,6 +522,21 @@ int TieredHNSWIndex::deleteLabelFromHNSWInplace(labelType la return ids.size(); } +template +idType TieredHNSWIndex::getNewElementId() { + auto *hnsw_index = this->getHNSWIndex(); + if (idToSwapJob.size() == 0) { + // No pending swap jobs, so we can use a new id. + return hnsw_index->getNewElementId(); + } + idType deletedId = idToSwapJob.begin()->first; + hnsw_index->removeIncomingEdgesAndDelete(deletedId); + idToSwapJob.erase(deletedId); + + return deletedId; + // Note that this function is not thread-safe, and should be called while the main index lock +} + /******************** Job's callbacks **********************************/ template void TieredHNSWIndex::executeInsertJob(HNSWInsertJob *job) {