Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions _example/pca/pca.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package main

import (
"fmt"
"log"
"math/rand"

"github.com/blevesearch/go-faiss"
)

func main() {
rng := rand.New(rand.NewSource(123456))

d := 64 // Original high dimension
dPCA := 3 // Target low dimension
n := 1000 // Number of vectors for demo

fmt.Printf("=== PCA Dimensionality Reduction Demo ===\n")
fmt.Printf("Reducing from %dD to %dD\n\n", d, dPCA)

pca, err := faiss.NewPCAMatrix(d, dPCA, 0, false)
if err != nil {
log.Fatal(err)
}
defer pca.Close()

trainingData := make([]float32, d*n)
for i := 0; i < n; i++ {
trainingData[i*d+0] = float32(i) / 100.0 // Linear trend
trainingData[i*d+1] = float32(i%10) / 10.0 // Periodic pattern
trainingData[i*d+2] = rng.Float32() * 2.0 // Scaled random

// Rest is small noise
for j := 3; j < d; j++ {
trainingData[i*d+j] = rng.Float32() * 0.1
}
}

fmt.Println("Training PCA...")
if err := pca.Train(trainingData); err != nil {
log.Fatal(err)
}

fmt.Println("\nExample transformation:")
sampleVector := trainingData[:d]
transformed, err := pca.Apply(sampleVector)
if err != nil {
log.Fatal(err)
}

fmt.Printf("Original (first 5 of %d values): [%.3f %.3f %.3f %.3f %.3f ...]\n",
d, sampleVector[0], sampleVector[1], sampleVector[2], sampleVector[3], sampleVector[4])
fmt.Printf("Transformed (%d values): [%.3f %.3f %.3f]\n",
dPCA, transformed[0], transformed[1], transformed[2])

fmt.Println("\n=== Similarity Search with PCA ===")

index, err := faiss.NewIndexFlatL2(dPCA)
if err != nil {
log.Fatal(err)
}
defer index.Close()

transformedData, err := pca.Apply(trainingData)
if err != nil {
log.Fatal(err)
}
index.Add(transformedData)

k := int64(3)
queryIdx := 500 // Query with vector at index 500
query := trainingData[queryIdx*d : (queryIdx+1)*d]
queryPCA, err := pca.Apply(query)
if err != nil {
log.Fatal(err)
}

distances, ids, err := index.Search(queryPCA, k)
if err != nil {
log.Fatal(err)
}

fmt.Printf("\nSearching for neighbors of vector #%d:\n", queryIdx)
fmt.Println("Nearest neighbors (ID: distance):")
for i := int64(0); i < k; i++ {
fmt.Printf(" #%d: %.4f\n", ids[i], distances[i])
}

fmt.Println("\n=== PCA with Whitening ===")
pcaWhite, err := faiss.NewPCAMatrix(d, dPCA, 0.5, false)
if err != nil {
log.Fatal(err)
}
defer pcaWhite.Close()

if err := pcaWhite.Train(trainingData); err != nil {
log.Fatal(err)
}

fmt.Printf("Whitening normalizes variance (eigen_power=%.1f)\n", pcaWhite.EigenPower())
}
110 changes: 110 additions & 0 deletions vector_transform.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package faiss

/*
#include <faiss/c_api/VectorTransform_c.h>
#include <stdlib.h>
*/
import "C"
import (
"unsafe"
)

type VectorTransform struct {
vt *C.FaissVectorTransform
}

func (vt *VectorTransform) cPtr() *C.FaissVectorTransform {
return vt.vt
}

// Free the memory associated with the vector transform.
func (vt *VectorTransform) Close() {
if vt != nil && vt.vt != nil {
C.faiss_VectorTransform_free(vt.vt)
vt.vt = nil
}
}

func (vt *VectorTransform) IsTrained() bool {
return C.faiss_VectorTransform_is_trained(vt.vt) != 0
}

// The input dimension.
func (vt *VectorTransform) DIn() int {
return int(C.faiss_VectorTransform_d_in(vt.vt))
}

// The output dimension.
func (vt *VectorTransform) DOut() int {
return int(C.faiss_VectorTransform_d_out(vt.vt))
}

func (vt *VectorTransform) Train(x []float32) error {
n := len(x) / vt.DIn()
if c := C.faiss_VectorTransform_train(
vt.vt,
C.idx_t(n),
(*C.float)(&x[0]),
); c != 0 {
return getLastError()
}
return nil
}

// Apply runs the transform on x and returns the result.
func (vt *VectorTransform) Apply(x []float32) ([]float32, error) {
n := len(x) / vt.DIn()
ptr := C.faiss_VectorTransform_apply(
vt.vt,
C.idx_t(n),
(*C.float)(&x[0]),
)
if ptr == nil {
return nil, getLastError()
}
defer C.free(unsafe.Pointer(ptr))
size := n * vt.DOut()
out := make([]float32, size)
src := (*[1 << 30]float32)(unsafe.Pointer(ptr))[:size:size]
copy(out, src)
return out, nil
}

// PCAMatrix is a linear transformation obtained by PCA,
// including a rotation back to the original dimension.
type PCAMatrix struct {
VectorTransform
}

// NewPCAMatrix creates a new PCA matrix.
// d_in: input dimension
// d_out: output dimension
// eigen_power: power applied to eigenvalues (default 0 = no whitening)
// random_rotation: whether to apply a random rotation after PCA
func NewPCAMatrix(dIn, dOut int, eigenPower float32, randomRotation bool) (*PCAMatrix, error) {
var vt *C.FaissPCAMatrix
rot := C.int(0)
if randomRotation {
rot = 1
}
if c := C.faiss_PCAMatrix_new_with(
&vt,
C.int(dIn),
C.int(dOut),
C.float(eigenPower),
rot,
); c != 0 {
return nil, getLastError()
}
return &PCAMatrix{VectorTransform{(*C.FaissVectorTransform)(vt)}}, nil
}

// Eigen power parameter.
func (pca *PCAMatrix) EigenPower() float32 {
return float32(C.faiss_PCAMatrix_eigen_power((*C.FaissPCAMatrix)(pca.vt)))
}

// Whether random rotation is enabled.
func (pca *PCAMatrix) RandomRotation() bool {
return C.faiss_PCAMatrix_random_rotation((*C.FaissPCAMatrix)(pca.vt)) != 0
}