diff --git a/rust/index/src/spann/types.rs b/rust/index/src/spann/types.rs index 59269864c34..3cbfad9b0b1 100644 --- a/rust/index/src/spann/types.rs +++ b/rust/index/src/spann/types.rs @@ -766,6 +766,61 @@ impl SpannIndexWriter { Ok(false) } + async fn try_delete_posting_list(&self, head_id: u32) -> Result<(), SpannIndexWriterError> { + let _write_guard = self.posting_list_partitioned_mutex.lock(&head_id).await; + if self.is_head_deleted(head_id as usize).await? { + return Ok(()); + } + let result = self + .posting_list_writer + .get_owned::>("", head_id) + .await; + // If the error is posting list not found, then return ok. + match result { + Ok(Some((doc_offset_ids, doc_versions, _))) => { + let mut outdated_count = 0; + for (doc_offset_id, doc_version) in doc_offset_ids.iter().zip(doc_versions.iter()) { + if self.is_outdated(*doc_offset_id, *doc_version).await? { + outdated_count += 1; + } + } + if outdated_count == doc_offset_ids.len() { + { + let hnsw_write_guard = self.hnsw_index.inner.write(); + hnsw_write_guard + .hnsw_index + .delete(head_id as usize) + .map_err(|e| { + tracing::error!( + "Error deleting head {} from hnsw index: {}", + head_id, + e + ); + SpannIndexWriterError::HnswIndexMutateError(e) + })?; + } + self.posting_list_writer + .delete::>("", head_id) + .await + .map_err(|e| { + tracing::error!( + "Error deleting posting list for head {}: {}", + head_id, + e + ); + SpannIndexWriterError::PostingListSetError(e) + })?; + } + } + Ok(None) => {} + Err(e) => { + tracing::error!("Error getting posting list for head {}: {}", head_id, e); + return Err(SpannIndexWriterError::PostingListGetError(e)); + } + } + Ok(()) + } + #[allow(clippy::too_many_arguments)] async fn collect_and_reassign_split_points( &self, @@ -814,6 +869,8 @@ impl SpannIndexWriter { .await?; } } + // Delete head if all points were moved out. + self.try_delete_posting_list(new_head_ids[k] as u32).await?; } Ok(assigned_ids) } @@ -946,17 +1003,20 @@ impl SpannIndexWriter { let doc_versions; let doc_embeddings; { - // TODO(Sanket): Check if head is deleted, can happen if another concurrent thread - // deletes it. - (doc_offset_ids, doc_versions, doc_embeddings) = self + let result = self .posting_list_writer .get_owned::>("", head_id as u32) - .await - .map_err(|e| { - tracing::error!("Error getting posting list for head {}: {}", head_id, e); - SpannIndexWriterError::PostingListGetError(e) - })? - .ok_or(SpannIndexWriterError::PostingListNotFound)?; + .await; + match result { + Ok(Some((offset_ids, versions, embeddings))) => { + doc_offset_ids = offset_ids; + doc_versions = versions; + doc_embeddings = embeddings; + } + // Posting list can be concurrent deleted so bail out early if not found. + Ok(None) => return Ok(()), + Err(e) => return Err(SpannIndexWriterError::PostingListGetError(e)), + } } for (index, doc_offset_id) in doc_offset_ids.iter().enumerate() { if assigned_ids.contains(doc_offset_id) @@ -1004,6 +1064,8 @@ impl SpannIndexWriter { ) .await?; } + // Delete head if all points were moved out. + self.try_delete_posting_list(head_id as u32).await?; Ok(()) } @@ -1264,6 +1326,7 @@ impl SpannIndexWriter { if !same_head && distance_function .distance(&clustering_output.cluster_centers[k], &head_embedding) + .abs() < 1e-6 { same_head = true; @@ -1350,17 +1413,32 @@ impl SpannIndexWriter { } if !same_head { // Delete the old head - let hnsw_write_guard = self.hnsw_index.inner.write(); - hnsw_write_guard - .hnsw_index - .delete(head_id as usize) + // First delete from hnsw then from postings list. This order + // ensures that the head is never dangling. + { + let hnsw_write_guard = self.hnsw_index.inner.write(); + hnsw_write_guard + .hnsw_index + .delete(head_id as usize) + .map_err(|e| { + tracing::error!( + "Error deleting head {} from hnsw index: {}", + head_id, + e + ); + SpannIndexWriterError::HnswIndexMutateError(e) + })?; + } + self.posting_list_writer + .delete::>("", head_id) + .await .map_err(|e| { tracing::error!( - "Error deleting head {} from hnsw index: {}", + "Error deleting posting list for head {}: {}", head_id, e ); - SpannIndexWriterError::HnswIndexMutateError(e) + SpannIndexWriterError::PostingListSetError(e) })?; self.stats .num_heads_deleted @@ -1755,12 +1833,29 @@ impl SpannIndexWriter { self.stats .num_pl_modified .fetch_add(1, std::sync::atomic::Ordering::Relaxed); - // Delete from hnsw. - let hnsw_write_guard = self.hnsw_index.inner.write(); - hnsw_write_guard.hnsw_index.delete(head_id).map_err(|e| { - tracing::error!("Error deleting head {} from hnsw index: {}", head_id, e); - SpannIndexWriterError::HnswIndexMutateError(e) - })?; + { + // Delete from hnsw. + let hnsw_write_guard = self.hnsw_index.inner.write(); + hnsw_write_guard.hnsw_index.delete(head_id).map_err(|e| { + tracing::error!( + "Error deleting head {} from hnsw index: {}", + head_id, + e + ); + SpannIndexWriterError::HnswIndexMutateError(e) + })?; + } + self.posting_list_writer + .delete::>("", head_id as u32) + .await + .map_err(|e| { + tracing::error!( + "Error deleting posting list for head {}: {}", + head_id, + e + ); + SpannIndexWriterError::PostingListSetError(e) + })?; self.stats .num_heads_deleted .fetch_add(1, std::sync::atomic::Ordering::Relaxed); @@ -1779,18 +1874,31 @@ impl SpannIndexWriter { self.stats .num_pl_modified .fetch_add(1, std::sync::atomic::Ordering::Relaxed); - // Delete from hnsw. - let hnsw_write_guard = self.hnsw_index.inner.write(); - hnsw_write_guard - .hnsw_index - .delete(nearest_head_id) + { + // Delete from hnsw. + let hnsw_write_guard = self.hnsw_index.inner.write(); + hnsw_write_guard + .hnsw_index + .delete(nearest_head_id) + .map_err(|e| { + tracing::error!( + "Error deleting head {} from hnsw index: {}", + nearest_head_id, + e + ); + SpannIndexWriterError::HnswIndexMutateError(e) + })?; + } + self.posting_list_writer + .delete::>("", nearest_head_id as u32) + .await .map_err(|e| { tracing::error!( - "Error deleting head {} from hnsw index: {}", + "Error deleting posting list for head {}: {}", nearest_head_id, e ); - SpannIndexWriterError::HnswIndexMutateError(e) + SpannIndexWriterError::PostingListSetError(e) })?; self.stats .num_heads_deleted