generated from habedi/template-rust-project
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
892 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,3 +29,6 @@ indent_size = 2 | |
[*.{yaml,yml}] | ||
indent_size = 2 | ||
# Python files | ||
[*.py] | ||
max_line_length = 120 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
[package] | ||
name = "vq" | ||
version = "0.1.1" | ||
version = "0.1.2" | ||
description = "A vector quantization library for Rust" | ||
repository = "https://github.com/habedi/vq" | ||
license = "MIT OR Apache-2.0" | ||
readme = "README.md" | ||
keywords = ["vq", "vector-quantization", "clustering", "nearest-neighbor", "data-compression"] | ||
keywords = ["vector-quantization", "quantization", "nearest-neighbor", "data-compression", "embeddings"] | ||
authors = ["Hassan Abedi <[email protected]>"] | ||
homepage = "https://github.com/habedi/vq" | ||
documentation = "https://docs.rs/vq" | ||
#categories = ["development-tools"] | ||
categories = ["algorithms", "compression", "data-structures"] | ||
edition = "2021" | ||
|
||
[lib] | ||
|
@@ -33,5 +33,5 @@ rayon = "1.10" | |
criterion = { version = "0.5", features = ["html_reports"] } | ||
|
||
[[bench]] | ||
name = "my_benchmarks" | ||
name = "bq_bench" | ||
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#[path = "utils.rs"] | ||
mod utils; | ||
|
||
use crate::utils::{BENCH_TIMEOUT, NUM_VECTORS}; | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use rayon::prelude::*; | ||
use vq::bq::BinaryQuantizer; | ||
use vq::vector::{Vector, PARALLEL_THRESHOLD}; | ||
|
||
/// Benchmark quantization on a single vector that is small enough to trigger sequential processing. | ||
fn bench_quantize_sequential(_c: &mut Criterion) { | ||
// Create a vector with length less than PARALLEL_THRESHOLD. | ||
let n = PARALLEL_THRESHOLD / 2; | ||
let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect(); | ||
let vector = Vector::new(data); | ||
let quantizer = BinaryQuantizer::new(0.5, 0, 1); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("quantize_sequential", |b| { | ||
b.iter(|| { | ||
let result = quantizer.quantize(black_box(&vector)); | ||
black_box(result) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization on a single vector that is large enough to trigger parallel processing. | ||
fn bench_quantize_parallel(_c: &mut Criterion) { | ||
// Create a vector with length greater than PARALLEL_THRESHOLD. | ||
let n = PARALLEL_THRESHOLD + 1000; | ||
let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect(); | ||
let vector = Vector::new(data); | ||
let quantizer = BinaryQuantizer::new(0.5, 0, 1); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("quantize_parallel", |b| { | ||
b.iter(|| { | ||
let result = quantizer.quantize(black_box(&vector)); | ||
black_box(result) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization of many small vectors (each processed sequentially) using a sequential outer loop. | ||
fn bench_quantize_multiple_vectors_sequential(_c: &mut Criterion) { | ||
// Each vector is small enough to use sequential quantization internally. | ||
let vector_size = PARALLEL_THRESHOLD / 2; | ||
let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS) | ||
.map(|_| { | ||
let data: Vec<f32> = (0..vector_size) | ||
.map(|i| (i as f32) / (vector_size as f32)) | ||
.collect(); | ||
Vector::new(data) | ||
}) | ||
.collect(); | ||
|
||
let quantizer = BinaryQuantizer::new(0.5, 0, 1); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("quantize_multiple_vectors_sequential", |b| { | ||
b.iter(|| { | ||
let results: Vec<Vector<u8>> = vectors | ||
.iter() | ||
.map(|v| quantizer.quantize(black_box(v))) | ||
.collect(); | ||
black_box(results); | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization of many large vectors (each using parallel quantization) | ||
/// and process them concurrently using a parallel outer loop. | ||
fn bench_quantize_multiple_vectors_parallel_outer(_c: &mut Criterion) { | ||
// Each vector is large enough to use parallel quantization internally. | ||
let vector_size = PARALLEL_THRESHOLD + 100; | ||
let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS) | ||
.map(|_| { | ||
let data: Vec<f32> = (0..vector_size) | ||
.map(|i| (i as f32) / (vector_size as f32)) | ||
.collect(); | ||
Vector::new(data) | ||
}) | ||
.collect(); | ||
|
||
let quantizer = BinaryQuantizer::new(0.5, 0, 1); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("quantize_multiple_vectors_parallel_outer", |b| { | ||
b.iter(|| { | ||
let results: Vec<Vector<u8>> = vectors | ||
.par_iter() | ||
.map(|v| quantizer.quantize(black_box(v))) | ||
.collect(); | ||
black_box(results); | ||
}) | ||
}); | ||
} | ||
|
||
criterion_group!( | ||
benches, | ||
bench_quantize_sequential, | ||
bench_quantize_parallel, | ||
bench_quantize_multiple_vectors_sequential, | ||
bench_quantize_multiple_vectors_parallel_outer | ||
); | ||
criterion_main!(benches); |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
#[path = "utils.rs"] | ||
mod utils; | ||
|
||
use crate::utils::{ | ||
generate_training_data, BENCH_TIMEOUT, DIM, K, M, MAX_ITERS, NUM_VECTORS, SEED, | ||
}; | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use rayon::prelude::*; | ||
use vq::distances::Distance; | ||
use vq::pq::ProductQuantizer; | ||
use vq::vector::Vector; | ||
|
||
/// Benchmark the construction of a ProductQuantizer using LBG quantization over training data. | ||
fn bench_pq_construction(_c: &mut Criterion) { | ||
// Generate synthetic training data. | ||
let training_data = generate_training_data(NUM_VECTORS, DIM); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("pq_construction", |b| { | ||
b.iter(|| { | ||
// Measure the time to construct the quantizer. | ||
let pq = ProductQuantizer::new( | ||
black_box(&training_data), | ||
M, | ||
K, | ||
MAX_ITERS, | ||
Distance::Euclidean, | ||
SEED, | ||
); | ||
black_box(pq) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantizing a single vector using an already constructed ProductQuantizer. | ||
fn bench_pq_quantize_single(_c: &mut Criterion) { | ||
let training_data = generate_training_data(NUM_VECTORS, DIM); | ||
|
||
let pq = ProductQuantizer::new(&training_data, M, K, MAX_ITERS, Distance::Euclidean, SEED); | ||
|
||
// Create a test vector (must have dimension m * (dim/m) = 64). | ||
let test_vector = Vector::new((0..DIM).map(|i| (i as f32) / (DIM as f32)).collect()); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("pq_quantize_single_vector", |b| { | ||
b.iter(|| { | ||
let result = pq.quantize(black_box(&test_vector)); | ||
black_box(result) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantizing a batch of vectors. | ||
fn bench_pq_quantize_multiple_vectors(_c: &mut Criterion) { | ||
let training_data = generate_training_data(NUM_VECTORS, DIM); | ||
|
||
let pq = ProductQuantizer::new(&training_data, M, K, MAX_ITERS, Distance::Euclidean, SEED); | ||
|
||
// Generate a batch of test vectors. | ||
let test_vectors: Vec<Vector<f32>> = (0..NUM_VECTORS) | ||
.map(|_| Vector::new((0..DIM).map(|i| (i as f32) / (DIM as f32)).collect())) | ||
.collect(); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("pq_quantize_multiple_vectors", |b| { | ||
b.iter(|| { | ||
// Quantize each vector in the batch. | ||
let results: Vec<_> = test_vectors | ||
.iter() | ||
.map(|v| pq.quantize(black_box(v))) | ||
.collect(); | ||
black_box(results); | ||
}) | ||
}); | ||
} | ||
|
||
criterion_group!( | ||
benches, | ||
bench_pq_construction, | ||
bench_pq_quantize_single, | ||
bench_pq_quantize_multiple_vectors | ||
); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#[path = "utils.rs"] | ||
mod utils; | ||
|
||
use crate::utils::{BENCH_TIMEOUT, NUM_VECTORS}; | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use rayon::prelude::*; | ||
use vq::sq::ScalarQuantizer; | ||
use vq::vector::{Vector, PARALLEL_THRESHOLD}; | ||
|
||
/// Benchmark quantization on a single vector that is small enough to trigger sequential processing. | ||
fn bench_sq_quantize_sequential(_c: &mut Criterion) { | ||
// Create a vector with length less than SQ_PARALLEL_THRESHOLD. | ||
let n = PARALLEL_THRESHOLD / 2; | ||
let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect(); | ||
let vector = Vector::new(data); | ||
// Configure the quantizer with a range from 0.0 to 1.0 and 256 levels. | ||
let quantizer = ScalarQuantizer::new(0.0, 1.0, 256); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("sq_quantize_sequential", |b| { | ||
b.iter(|| { | ||
let result = quantizer.quantize(black_box(&vector)); | ||
black_box(result) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization on a single vector that is large enough to trigger parallel processing. | ||
fn bench_sq_quantize_parallel(_c: &mut Criterion) { | ||
// Create a vector with length greater than SQ_PARALLEL_THRESHOLD. | ||
let n = PARALLEL_THRESHOLD + 1000; | ||
let data: Vec<f32> = (0..n).map(|i| (i as f32) / (n as f32)).collect(); | ||
let vector = Vector::new(data); | ||
let quantizer = ScalarQuantizer::new(0.0, 1.0, 256); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("sq_quantize_parallel", |b| { | ||
b.iter(|| { | ||
let result = quantizer.quantize(black_box(&vector)); | ||
black_box(result) | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization of many small vectors (each processed sequentially) using a sequential outer loop. | ||
fn bench_sq_quantize_multiple_vectors_sequential(_c: &mut Criterion) { | ||
// Each vector is small enough to be processed sequentially. | ||
let vector_size = PARALLEL_THRESHOLD / 2; | ||
let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS) | ||
.map(|_| { | ||
let data: Vec<f32> = (0..vector_size) | ||
.map(|i| (i as f32) / (vector_size as f32)) | ||
.collect(); | ||
Vector::new(data) | ||
}) | ||
.collect(); | ||
|
||
let quantizer = ScalarQuantizer::new(0.0, 1.0, 256); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("sq_quantize_multiple_vectors_sequential", |b| { | ||
b.iter(|| { | ||
let results: Vec<Vector<u8>> = vectors | ||
.iter() | ||
.map(|v| quantizer.quantize(black_box(v))) | ||
.collect(); | ||
black_box(results); | ||
}) | ||
}); | ||
} | ||
|
||
/// Benchmark quantization of many large vectors (each using parallel quantization) | ||
/// and process them concurrently using a parallel outer loop. | ||
fn bench_sq_quantize_multiple_vectors_parallel_outer(_c: &mut Criterion) { | ||
// Each vector is large enough to trigger parallel quantization internally. | ||
let vector_size = PARALLEL_THRESHOLD + 100; | ||
let vectors: Vec<Vector<f32>> = (0..NUM_VECTORS) | ||
.map(|_| { | ||
let data: Vec<f32> = (0..vector_size) | ||
.map(|i| (i as f32) / (vector_size as f32)) | ||
.collect(); | ||
Vector::new(data) | ||
}) | ||
.collect(); | ||
|
||
let quantizer = ScalarQuantizer::new(0.0, 1.0, 256); | ||
|
||
let mut cc = Criterion::default().measurement_time(BENCH_TIMEOUT); | ||
cc.bench_function("sq_quantize_multiple_vectors_parallel_outer", |b| { | ||
b.iter(|| { | ||
let results: Vec<Vector<u8>> = vectors | ||
.par_iter() | ||
.map(|v| quantizer.quantize(black_box(v))) | ||
.collect(); | ||
black_box(results); | ||
}) | ||
}); | ||
} | ||
|
||
criterion_group!( | ||
benches, | ||
bench_sq_quantize_sequential, | ||
bench_sq_quantize_parallel, | ||
bench_sq_quantize_multiple_vectors_sequential, | ||
bench_sq_quantize_multiple_vectors_parallel_outer | ||
); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#![allow(dead_code)] | ||
|
||
use vq::vector::Vector; | ||
|
||
pub const NUM_VECTORS: usize = 10; | ||
pub const BENCH_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); | ||
pub const DIM: usize = 64; | ||
pub const M: usize = 4; | ||
pub const K: usize = 16; | ||
pub const MAX_ITERS: usize = 10; | ||
pub const SEED: u64 = 42; | ||
|
||
/// Generates a synthetic training dataset of `num` vectors, each of dimension `dim`. | ||
pub fn generate_training_data(num: usize, dim: usize) -> Vec<Vector<f32>> { | ||
(0..num) | ||
.map(|_| { | ||
let data: Vec<f32> = (0..dim).map(|i| (i as f32) / (dim as f32)).collect(); | ||
Vector::new(data) | ||
}) | ||
.collect() | ||
} |
Oops, something went wrong.