Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use builder pattern for the configuration #96

Merged
merged 8 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![dependency status](https://deps.rs/repo/github/meilisearch/arroy/status.svg)](https://deps.rs/repo/github/meilisearch/arroy)
[![Build](https://github.com/meilisearch/arroy/actions/workflows/rust.yml/badge.svg)](https://github.com/meilisearch/arroy/actions/workflows/rust.yml)

Arroy ([Approximate Rearest Reighbors][1] Oh Yeah) is a Rust library with the interface of the [Annoy Python library][2] to search for vectors in space that are close to a given query vector. It is based on LMDB, a memory-mapped key-value store, so many processes may share the same data and atomically modify the vectors.
Arroy ([Approximate Rearest Reighbors][1] Oh Yeah) is a Rust library with an interface close of the [Annoy Python library][2] to search for vectors in space that are near a targeted vector. It is based on LMDB, a memory-mapped key-value store, so many processes may share the same data and atomically modify the vectors.

## Background

Expand Down
6 changes: 5 additions & 1 deletion examples/build-tree-no-commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ fn main() -> Result<(), heed::BoxedError> {
let now = Instant::now();
println!("Building the arroy internal trees...");
let mut rng = StdRng::seed_from_u64(seed);
writer.build(&mut wtxn, &mut rng, n_trees).unwrap();
let mut builder = writer.builder(&mut rng);
if let Some(n_trees) = n_trees {
builder.n_trees(n_trees);
}
builder.build(&mut wtxn).unwrap();
println!("Took {:.2?} to build", now.elapsed());

let reader = Reader::open(&wtxn, 0, database)?;
Expand Down
12 changes: 7 additions & 5 deletions examples/compare_with_hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ fn main() -> Result<()> {
let rtxn = env.read_txn()?;
let reader = Reader::<Euclidean>::open(&rtxn, 0, database)?;

let mut query = reader.nns(NUMBER_FETCHED);

// By making it precise we are near the HNSW but
// we take a lot more time to search than the HNSW.
let is_precise = true;
let search_k =
if is_precise { NonZeroUsize::new(NUMBER_FETCHED * reader.n_trees() * 20) } else { None };
if is_precise {
query.search_k(NonZeroUsize::new(NUMBER_FETCHED * reader.n_trees() * 20).unwrap());
}

let arroy_results =
reader.nns_by_item(&rtxn, 0, NUMBER_FETCHED, search_k, None, None)?.unwrap();
let arroy_results = query.by_item(&rtxn, 0)?.unwrap();
eprintln!("took {:.02?} to find into arroy", before.elapsed());

let first = Point(reader.item_vector(&rtxn, 0)?.unwrap());
Expand Down Expand Up @@ -79,7 +81,7 @@ fn load_into_arroy(
for (i, Point(vector)) in points.iter().enumerate() {
writer.add_item(&mut wtxn, i.try_into().unwrap(), &vector[..])?;
}
writer.build(&mut wtxn, rng, None)?;
writer.builder(rng).build(&mut wtxn)?;
wtxn.commit()?;

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ fn main() -> Result<()> {
Operation::Delete(doc) => drop(writer.del_item(&mut wtxn, doc.id)?),
}
}
writer.build(&mut wtxn, &mut rng_arroy, None)?;
writer.builder(&mut rng_arroy).build(&mut wtxn)?;
wtxn.commit()?;
let rtxn = env.read_txn()?;
let reader = Reader::<Euclidean>::open(&rtxn, 0, database)?;
Expand Down
6 changes: 5 additions & 1 deletion examples/import-vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ fn main() -> Result<(), heed::BoxedError> {

println!("Building the arroy internal trees...");
let now = Instant::now();
writer.build(&mut wtxn, &mut rng, n_trees).unwrap();
let mut builder = writer.builder(&mut rng);
if let Some(n_trees) = n_trees {
builder.n_trees(n_trees);
}
builder.build(&mut wtxn)?;
wtxn.commit().unwrap();
println!("Took {:.2?} to build", now.elapsed());

Expand Down
4 changes: 2 additions & 2 deletions examples/search_movies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() {
#[rustfmt::skip]
let query: Vec<f32> = vec![-0.016822422, -0.021621706, 0.00019239318, 0.054372, 0.034900583, -0.011089119, 0.042128928, 0.02026509, 0.0019038923, -0.0014809829, -0.033832666, -0.029640282, -0.054234847, 0.018665258, -0.055895746, 0.0753006, 0.0061977296, 0.032228395, 0.022795584, -0.058140032, 0.026109613, -0.029421866, 0.04866331, 0.020661665, 0.017602501, 0.020653117, 0.046483666, 0.042843442, -0.045545023, -0.0017392042, 0.012052985, -0.0058927303, 0.032480225, 0.009872672, 0.024758337, -0.013354463, -0.044432696, -0.03226193, -0.059227727, 0.0078192735, 0.013650394, 0.0031477972, 0.005877705, 0.0068786396, 0.002517114, -0.011458909, 0.008640344, 0.044904687, -0.0047290456, -0.012748338, -0.048921518, 0.07827129, 0.005205742, -0.021857478, -0.02370976, 0.041743826, -0.016076453, -0.011403813, -0.025544455, -0.0046601044, -0.021723151, 0.007303265, -0.0136509575, 0.0073000537, -0.005085544, 0.04384241, -0.018964743, 0.03818674, -0.09198379, -0.032043297, -0.0067259674, 0.019887544, 0.005341308, 0.0050093965, 0.054900724, -0.020799952, 0.020495495, 0.01472667, 0.019677797, 0.037550557, -0.010920308, 0.03371257, 0.0020930816, 0.03709999, -0.036922902, -0.049608115, 0.0154750785, 0.007696657, -0.058294553, 0.045302838, -0.023393214, -0.060448237, -0.005798211, 0.053323198, 0.04070376, -0.0028753958, 0.051668108, -0.0069777397, 0.033418525, 0.016234992, -0.033323497, -0.0074829464, -0.008664235, -0.05547656, -0.11400871, -0.03518515, -0.0056998464, 0.01812429, -0.031799175, -0.0073341345, -0.06147767, -0.003742939, -0.004249079, -0.013904026, -0.00065635156, 0.09179383, 0.004267396, 0.00015509031, -0.049766053, 0.029881846, 0.10485467, -0.03120661, 0.014043553, 0.08671136, 0.059179407, 0.029454986, -0.0122302845, 0.06451508, 0.021481989, -0.06484224, 0.018707344, 0.022344032, -0.004790084, -0.04705671, 0.016396629, -0.03346155, 0.0064264126, -0.0053360737, 0.06672058, -0.0078784805, -0.016174054, 0.026566355, -0.046398066, 0.0025418145, 0.046905387, -0.020884424, -0.051193744, -0.031737294, -0.009927951, 0.023741305, -0.058117628, 0.051733956, -0.025581324, -0.030992776, 0.008804903, 0.04388304, 0.013213721, 0.004467152, -0.04988626, 0.0069321035, 0.039403677, 0.019677948, -0.066907056, 0.018572355, 0.013511877, -0.010518738, 0.010099771, -0.003633823, -0.0631501, -0.025649378, -0.043461364, 0.0016490245, 0.064196914, 0.033599235, -0.013222726, 0.015318823, 0.0771801, -0.0070276, -0.031138066, 0.0055310773, -0.09972089, 0.05066132, 0.047467627, -0.03498512, -0.03416252, -0.018362196, 0.040274452, -0.031371195, 0.030042851, 0.016328678, -0.05765591, -0.048823263, 0.054553114, -0.02033182, 0.046627544, 0.016558101, -0.0033715998, 0.0006232865, -0.0065704435, 0.008104579, 0.016307961, -0.041840676, 0.048135996, -0.018808063, -0.036892023, -0.0450471, 0.02718623, -0.036660295, -0.022694368, 0.005702901, -0.022678563, 0.0013453028, 0.07429447, -0.034700394, -0.032727163, 0.00596015, 0.034842487, -0.027818438, -0.00051779655, -0.014468772, 0.033954486, 0.04148899, -0.0829876, -0.015300944, 0.015376903, 0.09567573, 0.036652327, -0.049033575, -0.04484115, 0.041701544, -0.057027884, 0.0069984253, -0.0053272387, 0.025826871, 0.002177651, -0.030157669, 0.007895542, -0.014717798, 0.054724272, -0.05034077, -0.016694192, 0.038352106, -0.060709346, 0.08236629, -0.0096279215, 0.014632059, 0.025158316, -0.0009260515, -0.043707818, -0.01941624, -0.0118600605, -0.035666965, 0.037794825, 0.014687504, 0.038666032, -0.075831376, -0.038647566, -0.048394937, 0.031239703, 0.029136332, -0.00076040986, -0.015906896, 0.03718925, -0.0140040675, -0.037951406, -0.041062936, -0.06529122, 0.011906159, -0.011368897, 0.0060307034, 0.03684682, 0.031995844, -0.033985753, -0.018714348, -0.012443444, -0.007389346, 0.03257332, -0.04580996, -0.026579294, -0.024290696, -0.025647637, 0.022456668, -0.02420987, -0.065709755, -0.02623659, -0.028259972, 0.019707581, -0.022819564, -0.0409341, 0.026851093, 0.031858675, 0.048687093, -0.013439109, 0.011736404, 0.016420575, 0.03451187, -0.0059358296, 0.015338021, 0.04402986, 0.033739056, 0.033959225, 0.0068245684, -0.0143376645, -0.0007635987, -0.01949658, 0.016379116, 0.018640755, -0.06126936, -0.22691156, 0.015514225, -0.0010716971, 0.0044359663, 0.03258783, -0.0018310734, 0.010761778, -0.033404265, 0.005418415, 0.028870588, -0.0366465, 0.025508897, -0.003327967, -0.025249101, 0.041501254, -0.06906739, -0.03184493, -0.041302733, -0.03037772, 0.015740091, 0.008446552, -0.0459613, -0.022405358, -0.0036640046, 0.017842831, 0.003960712, -0.025942408, -0.038227286, -0.045894515, -0.01752483, 0.017444108, -0.051017836, 0.029609472, 0.008688325, 0.020816054, 0.008120903, 0.03892946, -0.033378396, 0.02176841, 0.027685048, -0.012064678, -0.079198286, -0.04271553, 0.005021753, 0.066962436, -0.03443632, -0.004004281, -0.050009515, -0.01630804, 0.06379373, 0.055116866, 0.027930314, 0.043325268, 0.02733439, -0.015951807, 0.059688378, -0.0075212875, 0.03786285, -0.04638327, -0.043671872, 0.043587692, 0.011264745, -0.059823193, 0.008415408, -0.040225852, -0.05263509, -0.0038932117, -0.047234535, 0.05749084, 0.029582193, -0.012869698, 0.027698075, -0.014221754, -0.05440618, 0.007839065, -0.028753158, -0.029088387, -0.00039888048, 0.012631819, 0.0038486738, -0.059913363, -0.0034661351, 0.011339918, 0.005836589, -0.018044928, -0.035229705, 0.0015524679, -0.035521194, -0.028409205, 0.0004174717, 0.060292065, -0.009710763, -0.04719587, 0.034226153, 0.04258676, 0.03754591, 0.056335006, -0.012293127, 0.03885916, -0.011872468, 0.026709288, -0.030494772, -0.0027441443, 0.01256448, 0.0070703924, 0.011282641, -0.03820788, -0.029001744, 0.0024300558, -0.0032799696, 0.037857816, 0.001686728, 0.056249045, -0.01862739, 0.04376537, -0.0019654054, 0.050269835, 0.035223164, 0.0059567657, 0.013870472, -0.001804614, 0.027300585, -0.03382222, -0.041098855, -0.060636565, 0.0047175046, 0.029142305, 0.06523361, 0.028681634, -0.023454288, -0.018000197, -0.030090509, -0.0046562785, -0.04519735, 0.047884777, -0.00059952086, -0.03280122, -0.08012555, 0.008639195, 0.01629006, 0.032155965, 0.034481294, 0.021274198, 0.010470909, 0.022913199, -0.035904404, 0.041294016, -0.00987633, -0.021613108, 0.012205929, 0.005322071, 0.025864823, 0.08942025, -0.08067831, -0.014871667, -0.034839284, 0.028048998, -0.0063091223, 0.037978478, -0.055790387, 0.0045954804, -0.042958327, 0.02137769, -0.008589233, -0.00062141696, 0.052822173, 0.034125473, -0.015106767, 0.0030919765, -0.0072712647, 0.0056287237, 0.019516133, -0.031278323, 0.025771588, 0.01701546, 0.019516064, 0.016180338, 0.01349268, 0.011978184, 0.011838524, -0.0050102035, -0.06970658, 0.022854539, -0.004192521, 0.0577575, -0.003954721, -0.054374386, -0.027609108, 0.0134023735, 0.010305641, -0.011130317, 0.052328475, 0.014928648, -0.013976018, -0.07100651, -0.06789901, -0.031873316, -0.011598853, 0.029284442, -0.04940027, 0.0100974385, -0.02187546, -0.062819175, 0.0069366414, 0.052176703, -0.06834835, 0.013463273, -0.0013379813, 0.005786334, 0.017775143, -0.01291353, -0.016923305, -0.049682386, -0.034103107, 0.010883184, -0.055132758, 0.025268175, -0.025599582, 0.015927013, -0.03237898, -0.027073668, -0.034358867, -0.027672807, 0.022677394, -0.03531693, 0.010573503, 0.00032215187, 0.0066956943, -0.051510572, -0.029456092, 0.05758612, -0.038166363, -0.00999853, 0.05758596, -0.006796505, 0.028503977, -0.024184246, 0.054051045, 0.0040905816, 0.099899694, 0.06076009, 0.011753628, -0.03253187, -0.0035343366, 0.02351163, 0.03206495, 0.004892613, -0.04530409, -0.0056237346, -0.006101407, 0.019704496, -0.010228795, 0.027814431, 0.020409154, 0.033115197, -0.07446951, -0.042142425, 0.03928483, -0.022784598, -0.003539396, -0.0074683367, 0.043651864, 0.007761874, 0.022063423, 0.05344986, 0.05065469, 0.029476669, -0.028968832, 0.023550583, -0.022291148, 0.055309687, -0.053843252, 0.020895477, -0.0148687605, 0.012166838, 0.0033556349, -0.07022937, -0.059401378, 0.013194393, -0.0419862, -0.0070434613, 0.030479655, -0.053955454, -0.031870224, 0.034511264, -0.047943473, 0.0069080396, 0.026099209, -0.012516935, 0.0003174421, -0.006716995, 0.07027558, 0.038463045, -0.016081728, 0.05018074, -0.062176052, 0.08961092, 0.03679902, 0.011107996, -0.0032339245, -0.0118898135, 0.013669906, 0.056221563, -0.049234938, 0.003090264, 0.01062722, -0.008937757, -0.08464787, -0.032616463, 0.055935893, 0.006192905, -0.014768529, 0.04930304, 0.053852808, -0.036349185, -0.037947245, 0.0076732435, -0.040889677, 0.022189876, 0.015142795, 0.005928425, -0.009679575, 0.039194115, 0.0041091475, 0.035120673, 0.016776932, -0.04100678, 0.041131947, 0.040904496, 0.047341976, 0.029321635, 0.030489001, -0.0135518275, 0.038717188, 0.0017859036, -0.044703316, -0.007397534, 0.029149175, -0.00021891313, 0.019795585, -0.054424375, 0.010228703, -0.0057461066, 0.05096695, 0.05683213, -0.018136851, -0.0030009004, -0.033427265, 0.010878728, 0.050759643, 0.040795874, 0.019920254, -0.026135486, -0.07642272, 0.035290312, 0.004655317, 0.0043676766, -0.010411962, -0.0076723946, 0.015248613, 0.008905208, -0.0002423048, 0.03892336, -0.025703456, -0.021123456, -0.00066909986, 0.04459856, 0.052217484, -0.017885901, -0.015303531, 0.0057848957, -0.036129624, -0.0736907, 0.035401847, -0.025658514, -0.0082354145, -0.0012491915, -0.040769547, -0.039205503, 0.05530217, -0.014954734, 0.0056790086, -0.04454665, -0.028425619, -0.034654, -0.057087515, -0.0224583, -0.005496095, -0.009889468, -0.05025576, -0.009459795, -0.00871503, 0.021968294, 0.0074964114, -0.032455806, -0.005696087, 0.005180231, 0.056079067, -0.03189999, 0.045113377, 0.061360348, 0.01839327, -0.053088665, 0.04942768, 0.014662789, 0.06641078, -0.008998172, -0.009717696, -0.079248, 0.047506567, 0.04778238, 0.025009798, -0.03899872, 0.009850679, -0.04399064, -0.053494785, 0.055456433, 0.026770461, -0.011158729, -0.073486604, -0.04088162, -0.023263954, -0.022185653, 0.03401001, -0.034742568, 0.043794204, 0.004035502, 0.011585448, -0.009235968, 0.031503983, 0.016500674, -0.012498497, -0.05733327, 0.0024852154, -0.02377962, -0.072548844, -0.008489325, 0.01825339, 0.032909963, -0.023669574, 0.0022601841, -0.008336443, 0.0041536367, 0.007989558, -0.035507284, -0.03951105, 0.0069870483, 0.04283141, -0.05102877, -0.025309727, 0.052937508, -0.014378752, -0.012047669, -0.024964543, -0.00071902486, 0.009493713, 0.024152702, 0.022622166, 0.06481285, 0.0022744837];
let now = Instant::now();
let _ret = reader.nns_by_vector(&rtxn, &query, 20, None, None, None).unwrap();
let _ret = reader.nns(20).by_vector(&rtxn, &query).unwrap();
println!("Louis's query took {:?}", now.elapsed());

let mut durations = Vec::new();
Expand All @@ -37,7 +37,7 @@ fn main() {
println!("Starts querying all documents ...");
for (id, _) in vectors.into_iter() {
let now = Instant::now();
reader.nns_by_item(&rtxn, id, 20, None, None, None).unwrap().unwrap();
let _ret = reader.nns(20).by_item(&rtxn, id).unwrap().unwrap();
// reader.nns_by_item(&rtxn, id, 20, None, Some(&filter)).unwrap().unwrap();
durations.push(now.elapsed());
}
Expand Down
18 changes: 9 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
//!
//! // You can specify the number of trees to use or specify None.
//! let mut rng = StdRng::seed_from_u64(42);
//! writer.build(&mut wtxn, &mut rng, None)?;
//! writer.builder(&mut rng).build(&mut wtxn)?;
//!
//! // By committing, other readers can query the database in parallel.
//! wtxn.commit()?;
Expand All @@ -49,18 +49,18 @@
//! let reader = Reader::<Euclidean>::open(&rtxn, index, db)?;
//! let n_results = 20;
//!
//! let mut query = reader.nns(n_results);
//!
//! // You can increase the quality of the results by forcing arroy to search into more nodes.
//! // This multiplier is arbitrary but basically the higher, the better the results, the slower the query.
//! let is_precise = true;
//! let search_k = if is_precise {
//! NonZeroUsize::new(n_results * reader.n_trees() * 15)
//! } else {
//! None
//! };
//! if is_precise {
//! query.search_k(NonZeroUsize::new(n_results * reader.n_trees() * 15).unwrap());
//! }
//!
//! // Similar searching can be achieved by requesting the nearest neighbors of a given item.
//! let item_id = 0;
//! let arroy_results = reader.nns_by_item(&rtxn, item_id, n_results, search_k, None, None)?.unwrap();
//! let arroy_results = query.by_item(&rtxn, item_id)?.unwrap();
//! # Ok(()) }
//! ```

Expand Down Expand Up @@ -96,9 +96,9 @@ use key::{Key, Prefix, PrefixCodec};
use metadata::{Metadata, MetadataCodec};
use node::{Node, NodeCodec};
use node_id::{NodeId, NodeMode};
pub use reader::Reader;
pub use reader::{QueryBuilder, Reader};
pub use stats::{Stats, TreeStats};
pub use writer::Writer;
pub use writer::{ArroyBuilder, Writer};

/// The set of types used by the [`Distance`] trait.
pub mod internals {
Expand Down
Loading
Loading