From 57c6d65cb73ea7456140e0197796bcf4e11a75d1 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 8 Aug 2025 16:13:24 -0600 Subject: [PATCH 1/2] MB-66396: New IndexIVF API: ObtainTopKCentroidCardinalitiesFromIVFIndex --- index.go | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/index.go b/index.go index 18177fc..9a8a303 100644 --- a/index.go +++ b/index.go @@ -14,6 +14,7 @@ import "C" import ( "encoding/json" "fmt" + "sort" "unsafe" ) @@ -64,6 +65,9 @@ type Index interface { ObtainClustersWithDistancesFromIVFIndex(x []float32, centroidIDs []int64) ( []int64, []float32, error) + // Applicable only to IVF indexes: Returns the top k centroid cardinalities and their vectors + ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([]uint64, [][]float32, error) + // Search queries the index with the vectors in x. // Returns the IDs of the k nearest neighbors for each query vector and the // corresponding distances. @@ -214,6 +218,72 @@ func (idx *faissIndex) ObtainClustersWithDistancesFromIVFIndex(x []float32, cent return centroids, centroidDistances, nil } +func (idx *faissIndex) ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([]uint64, [][]float32, error) { + nlist := int(C.faiss_IndexIVF_nlist(idx.idx)) + if nlist == 0 { + return nil, nil, nil + } + + centroidCardinalities := make([]C.size_t, nlist) + + // Allocate a flat buffer for all centroids, then slice it per centroid + d := idx.D() + flatCentroids := make([]float32, nlist*d) + + // Call the C function to fill centroid vectors and cardinalities + c := C.faiss_IndexIVF_get_centroids_and_cardinality( + idx.idx, + (*C.float)(&flatCentroids[0]), + (*C.size_t)(¢roidCardinalities[0]), + nil, + ) + if c != 0 { + return nil, nil, getLastError() + } + + topIndices := getTopIndicesOfTopKCardinalities(centroidCardinalities, limit) + + rvCardinalities := make([]uint64, len(topIndices)) + rvCentroids := make([][]float32, len(topIndices)) + + for i, idx := range topIndices { + rvCardinalities[i] = uint64(centroidCardinalities[idx]) + rvCentroids[i] = flatCentroids[idx*d : (idx+1)*d] + } + + return rvCardinalities, rvCentroids, nil + +} + +func getTopIndicesOfTopKCardinalities(cardinalities []C.size_t, k int) []int { + if k <= 0 || k > len(cardinalities) { + return nil + } + + // Store value and original index + type pair struct { + val C.size_t + idx int + } + + pairs := make([]pair, len(cardinalities)) + for i, v := range cardinalities { + pairs[i] = pair{v, i} + } + + // Sort pairs by value descending + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].val > pairs[j].val + }) + + // Collect top k indexes + result := make([]int, k) + for i := 0; i < k; i++ { + result[i] = pairs[i].idx + } + return result +} + func (idx *faissIndex) SearchClustersFromIVFIndex(selector Selector, eligibleCentroidIDs []int64, minEligibleCentroids int, k int64, x, centroidDis []float32, params json.RawMessage) ([]float32, []int64, error) { From 79c510379ccc9bb8cde3e7c503adbc071f39e8fb Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Fri, 22 Aug 2025 11:51:06 -0600 Subject: [PATCH 2/2] Update API to return based on ascending/descending order --- index.go | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/index.go b/index.go index 9a8a303..91b6681 100644 --- a/index.go +++ b/index.go @@ -65,8 +65,9 @@ type Index interface { ObtainClustersWithDistancesFromIVFIndex(x []float32, centroidIDs []int64) ( []int64, []float32, error) - // Applicable only to IVF indexes: Returns the top k centroid cardinalities and their vectors - ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([]uint64, [][]float32, error) + // Applicable only to IVF indexes: Returns the top k centroid cardinalities and + // their vectors in chosen order (descending or ascending) + ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) ([]uint64, [][]float32, error) // Search queries the index with the vectors in x. // Returns the IDs of the k nearest neighbors for each query vector and the @@ -218,7 +219,8 @@ func (idx *faissIndex) ObtainClustersWithDistancesFromIVFIndex(x []float32, cent return centroids, centroidDistances, nil } -func (idx *faissIndex) ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([]uint64, [][]float32, error) { +func (idx *faissIndex) ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) ( + []uint64, [][]float32, error) { nlist := int(C.faiss_IndexIVF_nlist(idx.idx)) if nlist == 0 { return nil, nil, nil @@ -241,7 +243,7 @@ func (idx *faissIndex) ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([ return nil, nil, getLastError() } - topIndices := getTopIndicesOfTopKCardinalities(centroidCardinalities, limit) + topIndices := getIndicesOfKCentroidCardinalities(centroidCardinalities, limit, descending) rvCardinalities := make([]uint64, len(topIndices)) rvCentroids := make([][]float32, len(topIndices)) @@ -255,7 +257,7 @@ func (idx *faissIndex) ObtainTopKCentroidCardinalitiesFromIVFIndex(limit int) ([ } -func getTopIndicesOfTopKCardinalities(cardinalities []C.size_t, k int) []int { +func getIndicesOfKCentroidCardinalities(cardinalities []C.size_t, k int, descending bool) []int { if k <= 0 || k > len(cardinalities) { return nil } @@ -271,10 +273,16 @@ func getTopIndicesOfTopKCardinalities(cardinalities []C.size_t, k int) []int { pairs[i] = pair{v, i} } - // Sort pairs by value descending - sort.Slice(pairs, func(i, j int) bool { - return pairs[i].val > pairs[j].val - }) + // Sort pairs by value descending if descending is true, otherwise ascending + if descending { + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].val > pairs[j].val + }) + } else { + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].val < pairs[j].val + }) + } // Collect top k indexes result := make([]int, k)