From 4bbf6f0a2a852a4988d7555db6104e2173921a03 Mon Sep 17 00:00:00 2001 From: Pedram Razavi Date: Mon, 23 Jun 2025 15:20:37 -0700 Subject: [PATCH] Add vector_transform/pca binding Running example: ``` === PCA Dimensionality Reduction Demo === Reducing from 64D to 3D Training PCA... Example transformation: Original (first 5 of 64 values): [0.000 0.000 0.770 0.061 0.048 ...] Transformed (3 values): [-4.993 -0.287 -0.445] === Similarity Search with PCA === Searching for neighbors of vector #500: Nearest neighbors (ID: distance): #500: 0.0000 #510: 0.0144 #512: 0.0838 === PCA with Whitening === Whitening normalizes variance (eigen_power=0.5) ``` --- _example/pca/pca.go | 101 ++++++++++++++++++++++++++++++++++++++++ vector_transform.go | 110 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 _example/pca/pca.go create mode 100644 vector_transform.go diff --git a/_example/pca/pca.go b/_example/pca/pca.go new file mode 100644 index 0000000..a83a792 --- /dev/null +++ b/_example/pca/pca.go @@ -0,0 +1,101 @@ +package main + +import ( + "fmt" + "log" + "math/rand" + + "github.com/blevesearch/go-faiss" +) + +func main() { + rng := rand.New(rand.NewSource(123456)) + + d := 64 // Original high dimension + dPCA := 3 // Target low dimension + n := 1000 // Number of vectors for demo + + fmt.Printf("=== PCA Dimensionality Reduction Demo ===\n") + fmt.Printf("Reducing from %dD to %dD\n\n", d, dPCA) + + pca, err := faiss.NewPCAMatrix(d, dPCA, 0, false) + if err != nil { + log.Fatal(err) + } + defer pca.Close() + + trainingData := make([]float32, d*n) + for i := 0; i < n; i++ { + trainingData[i*d+0] = float32(i) / 100.0 // Linear trend + trainingData[i*d+1] = float32(i%10) / 10.0 // Periodic pattern + trainingData[i*d+2] = rng.Float32() * 2.0 // Scaled random + + // Rest is small noise + for j := 3; j < d; j++ { + trainingData[i*d+j] = rng.Float32() * 0.1 + } + } + + fmt.Println("Training PCA...") + if err := pca.Train(trainingData); err != nil { + log.Fatal(err) + } + + fmt.Println("\nExample transformation:") + sampleVector := trainingData[:d] + transformed, err := pca.Apply(sampleVector) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Original (first 5 of %d values): [%.3f %.3f %.3f %.3f %.3f ...]\n", + d, sampleVector[0], sampleVector[1], sampleVector[2], sampleVector[3], sampleVector[4]) + fmt.Printf("Transformed (%d values): [%.3f %.3f %.3f]\n", + dPCA, transformed[0], transformed[1], transformed[2]) + + fmt.Println("\n=== Similarity Search with PCA ===") + + index, err := faiss.NewIndexFlatL2(dPCA) + if err != nil { + log.Fatal(err) + } + defer index.Close() + + transformedData, err := pca.Apply(trainingData) + if err != nil { + log.Fatal(err) + } + index.Add(transformedData) + + k := int64(3) + queryIdx := 500 // Query with vector at index 500 + query := trainingData[queryIdx*d : (queryIdx+1)*d] + queryPCA, err := pca.Apply(query) + if err != nil { + log.Fatal(err) + } + + distances, ids, err := index.Search(queryPCA, k) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("\nSearching for neighbors of vector #%d:\n", queryIdx) + fmt.Println("Nearest neighbors (ID: distance):") + for i := int64(0); i < k; i++ { + fmt.Printf(" #%d: %.4f\n", ids[i], distances[i]) + } + + fmt.Println("\n=== PCA with Whitening ===") + pcaWhite, err := faiss.NewPCAMatrix(d, dPCA, 0.5, false) + if err != nil { + log.Fatal(err) + } + defer pcaWhite.Close() + + if err := pcaWhite.Train(trainingData); err != nil { + log.Fatal(err) + } + + fmt.Printf("Whitening normalizes variance (eigen_power=%.1f)\n", pcaWhite.EigenPower()) +} diff --git a/vector_transform.go b/vector_transform.go new file mode 100644 index 0000000..d1e68f9 --- /dev/null +++ b/vector_transform.go @@ -0,0 +1,110 @@ +package faiss + +/* +#include +#include +*/ +import "C" +import ( + "unsafe" +) + +type VectorTransform struct { + vt *C.FaissVectorTransform +} + +func (vt *VectorTransform) cPtr() *C.FaissVectorTransform { + return vt.vt +} + +// Free the memory associated with the vector transform. +func (vt *VectorTransform) Close() { + if vt != nil && vt.vt != nil { + C.faiss_VectorTransform_free(vt.vt) + vt.vt = nil + } +} + +func (vt *VectorTransform) IsTrained() bool { + return C.faiss_VectorTransform_is_trained(vt.vt) != 0 +} + +// The input dimension. +func (vt *VectorTransform) DIn() int { + return int(C.faiss_VectorTransform_d_in(vt.vt)) +} + +// The output dimension. +func (vt *VectorTransform) DOut() int { + return int(C.faiss_VectorTransform_d_out(vt.vt)) +} + +func (vt *VectorTransform) Train(x []float32) error { + n := len(x) / vt.DIn() + if c := C.faiss_VectorTransform_train( + vt.vt, + C.idx_t(n), + (*C.float)(&x[0]), + ); c != 0 { + return getLastError() + } + return nil +} + +// Apply runs the transform on x and returns the result. +func (vt *VectorTransform) Apply(x []float32) ([]float32, error) { + n := len(x) / vt.DIn() + ptr := C.faiss_VectorTransform_apply( + vt.vt, + C.idx_t(n), + (*C.float)(&x[0]), + ) + if ptr == nil { + return nil, getLastError() + } + defer C.free(unsafe.Pointer(ptr)) + size := n * vt.DOut() + out := make([]float32, size) + src := (*[1 << 30]float32)(unsafe.Pointer(ptr))[:size:size] + copy(out, src) + return out, nil +} + +// PCAMatrix is a linear transformation obtained by PCA, +// including a rotation back to the original dimension. +type PCAMatrix struct { + VectorTransform +} + +// NewPCAMatrix creates a new PCA matrix. +// d_in: input dimension +// d_out: output dimension +// eigen_power: power applied to eigenvalues (default 0 = no whitening) +// random_rotation: whether to apply a random rotation after PCA +func NewPCAMatrix(dIn, dOut int, eigenPower float32, randomRotation bool) (*PCAMatrix, error) { + var vt *C.FaissPCAMatrix + rot := C.int(0) + if randomRotation { + rot = 1 + } + if c := C.faiss_PCAMatrix_new_with( + &vt, + C.int(dIn), + C.int(dOut), + C.float(eigenPower), + rot, + ); c != 0 { + return nil, getLastError() + } + return &PCAMatrix{VectorTransform{(*C.FaissVectorTransform)(vt)}}, nil +} + +// Eigen power parameter. +func (pca *PCAMatrix) EigenPower() float32 { + return float32(C.faiss_PCAMatrix_eigen_power((*C.FaissPCAMatrix)(pca.vt))) +} + +// Whether random rotation is enabled. +func (pca *PCAMatrix) RandomRotation() bool { + return C.faiss_PCAMatrix_random_rotation((*C.FaissPCAMatrix)(pca.vt)) != 0 +}