From 3659904278263ef6dda98230378303ef85c0b686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Jun 2024 11:57:58 +0200 Subject: [PATCH 01/61] First draft --- src/distance/binary_quantized_euclidean.rs | 78 ++++++++++++++++++++++ src/distance/mod.rs | 1 + 2 files changed, 79 insertions(+) create mode 100644 src/distance/binary_quantized_euclidean.rs diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs new file mode 100644 index 00000000..4ce103a0 --- /dev/null +++ b/src/distance/binary_quantized_euclidean.rs @@ -0,0 +1,78 @@ +use std::borrow::Cow; + +use bytemuck::{Pod, Zeroable}; +use rand::Rng; + +use super::two_means; +use crate::distance::Distance; +use crate::node::{Leaf, UnalignedF32Slice}; +use crate::parallel::ImmutableSubsetLeafs; +use crate::spaces::simple::{dot_product, euclidean_distance}; + +/// The Euclidean distance between two points in Euclidean space +/// is the length of the line segment between them. +/// +/// `d(p, q) = sqrt((p - q)²)` +#[derive(Debug, Clone)] +pub enum BinaryQuantizedEuclidean {} + +/// The header of BinaryQuantizedEuclidean leaf nodes. +#[repr(C)] +#[derive(Pod, Zeroable, Debug, Clone, Copy)] +pub struct NodeHeaderBinaryQuantizedEuclidean { + /// An extra constant term to determine the offset of the plane + bias: f32, +} + +impl Distance for BinaryQuantizedEuclidean { + type Header = NodeHeaderBinaryQuantizedEuclidean; + + fn name() -> &'static str { + "binary quantized euclidean" + } + + fn new_header(_vector: &UnalignedF32Slice) -> Self::Header { + NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 } + } + + fn built_distance(p: &Leaf, q: &Leaf) -> f32 { + binary_quantized_euclidean_distance(&p.vector, &q.vector) + } + + fn init(_node: &mut Leaf) {} + + fn create_split( + children: &ImmutableSubsetLeafs, + rng: &mut R, + ) -> heed::Result> { + let [node_p, node_q] = two_means(rng, children, false)?; + let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); + let mut normal = Leaf { + header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, + vector: Cow::Owned(vector), + }; + Self::normalize(&mut normal); + + normal.header.bias = normal + .vector + .iter() + .zip(node_p.vector.iter()) + .zip(node_q.vector.iter()) + .map(|((n, p), q)| -n * (p + q) / 2.0) + .sum(); + + Ok(normal.vector.into_owned()) + } + + fn margin(p: &Leaf, q: &Leaf) -> f32 { + p.header.bias + dot_product(&p.vector, &q.vector) + } + + fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + dot_product(p, q) + } +} + +fn binary_quantized_euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 +} diff --git a/src/distance/mod.rs b/src/distance/mod.rs index c047b61e..5462f543 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -15,6 +15,7 @@ use crate::spaces::simple::dot_product; use crate::NodeCodec; mod angular; +mod binary_quantized_euclidean; mod dot_product; mod euclidean; mod manhattan; From 692f1efc8e52b194f4019cc5421d9eee431e95bc Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 3 Jul 2024 11:47:27 +0200 Subject: [PATCH 02/61] prepare for the new distance trait --- examples/compare_with_hnsw.rs | 11 +-- src/distance/angular.rs | 21 +++-- src/distance/binary_quantized_euclidean.rs | 29 +++--- src/distance/dot_product.rs | 22 +++-- src/distance/euclidean.rs | 29 +++--- src/distance/manhattan.rs | 31 +++--- src/distance/mod.rs | 50 +++++++--- src/item_iter.rs | 2 +- src/lib.rs | 2 +- src/node.rs | 105 ++++++++++++++++----- src/reader.rs | 9 +- src/spaces/simple.rs | 18 ++-- src/spaces/simple_neon.rs | 14 +-- src/tests/mod.rs | 1 + src/tests/writer.rs | 4 +- src/writer.rs | 21 +++-- 16 files changed, 232 insertions(+), 137 deletions(-) diff --git a/examples/compare_with_hnsw.rs b/examples/compare_with_hnsw.rs index 841d9f39..82be3f33 100644 --- a/examples/compare_with_hnsw.rs +++ b/examples/compare_with_hnsw.rs @@ -1,9 +1,8 @@ -use std::borrow::Cow; use std::num::NonZeroUsize; use std::time::Instant; use arroy::distances::Euclidean; -use arroy::internals::{Leaf, UnalignedF32Slice}; +use arroy::internals::Leaf; use arroy::{Database, Distance, ItemId, Reader, Result, Writer}; use heed::{EnvOpenOptions, RwTxn}; use instant_distance::{Builder, HnswMap, MapItem}; @@ -94,10 +93,10 @@ struct Point(Vec); impl instant_distance::Point for Point { fn distance(&self, other: &Self) -> f32 { - let this = UnalignedF32Slice::from_slice(&self.0); - let other = UnalignedF32Slice::from_slice(&other.0); - let p = Leaf { header: Euclidean::new_header(this), vector: Cow::Borrowed(this) }; - let q = Leaf { header: Euclidean::new_header(other), vector: Cow::Borrowed(other) }; + let this = Euclidean::craft_unaligned_vector_from_f32(&self.0); + let other = Euclidean::craft_unaligned_vector_from_f32(&other.0); + let p = Leaf { header: Euclidean::new_header(&this), vector: this }; + let q = Leaf { header: Euclidean::new_header(&other), vector: other }; arroy::distances::Euclidean::built_distance(&p, &q).sqrt() } } diff --git a/src/distance/angular.rs b/src/distance/angular.rs index b0d93d8f..6b509fe2 100644 --- a/src/distance/angular.rs +++ b/src/distance/angular.rs @@ -5,7 +5,7 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; @@ -29,7 +29,7 @@ impl Distance for Angular { "angular" } - fn new_header(vector: &UnalignedF32Slice) -> Self::Header { + fn new_header(vector: &UnalignedVector) -> Self::Header { NodeHeaderAngular { norm: Self::norm_no_header(vector) } } @@ -58,20 +58,21 @@ impl Distance for Angular { node.header.norm = dot_product(&node.vector, &node.vector).sqrt(); } - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result> { let [node_p, node_q] = two_means(rng, children, true)?; - let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); - let mut normal = - Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: Cow::Owned(vector) }; + let vector: Vec = + node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + let unaligned_vector = Self::craft_owned_unaligned_vector_from_f32(vector); + let mut normal = Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: unaligned_vector }; Self::normalize(&mut normal); - Ok(normal.vector.into_owned()) + Ok(normal.vector) } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { dot_product(p, q) } } diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 4ce103a0..3c8f88e0 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -5,9 +5,9 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; -use crate::spaces::simple::{dot_product, euclidean_distance}; +use crate::spaces::simple::dot_product; /// The Euclidean distance between two points in Euclidean space /// is the length of the line segment between them. @@ -31,7 +31,7 @@ impl Distance for BinaryQuantizedEuclidean { "binary quantized euclidean" } - fn new_header(_vector: &UnalignedF32Slice) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 } } @@ -41,38 +41,39 @@ impl Distance for BinaryQuantizedEuclidean { fn init(_node: &mut Leaf) {} - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result> { let [node_p, node_q] = two_means(rng, children, false)?; - let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); + let vector: Vec = + node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, - vector: Cow::Owned(vector), + vector: Self::craft_owned_unaligned_vector_from_f32(vector), }; Self::normalize(&mut normal); normal.header.bias = normal .vector - .iter() - .zip(node_p.vector.iter()) - .zip(node_q.vector.iter()) + .iter_f32() + .zip(node_p.vector.iter_f32()) + .zip(node_q.vector.iter_f32()) .map(|((n, p), q)| -n * (p + q) / 2.0) .sum(); - Ok(normal.vector.into_owned()) + Ok(normal.vector) } fn margin(p: &Leaf, q: &Leaf) -> f32 { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { dot_product(p, q) } } -fn binary_quantized_euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { +fn binary_quantized_euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 } diff --git a/src/distance/dot_product.rs b/src/distance/dot_product.rs index 4e5698d5..f35a7ef5 100644 --- a/src/distance/dot_product.rs +++ b/src/distance/dot_product.rs @@ -7,7 +7,7 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; use crate::internals::KeyCodec; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; use crate::{Node, NodeCodec}; @@ -34,7 +34,7 @@ impl Distance for DotProduct { "dot-product" } - fn new_header(_vector: &UnalignedF32Slice) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { // We compute the norm when we preprocess the vector, before generating the tree nodes. NodeHeaderDotProduct { extra_dim: 0.0, norm: 0.0 } } @@ -71,7 +71,8 @@ impl Distance for DotProduct { fn normalize(node: &mut Leaf) { let norm = Self::norm(node); if norm > 0.0 { - node.vector.to_mut().iter_mut().for_each(|x| *x /= norm); + let vec: Vec<_> = node.vector.iter_f32().map(|x| x / norm).collect(); + node.vector = UnalignedVector::owned_f32_vectors_from_f32_slice(vec); node.header.extra_dim /= norm; } } @@ -80,27 +81,28 @@ impl Distance for DotProduct { node.header.norm = dot_product(&node.vector, &node.vector); } - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result> { let [node_p, node_q] = two_means(rng, children, true)?; - let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); + let vector: Vec = + node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); let mut normal = Leaf:: { header: NodeHeaderDotProduct { norm: 0.0, extra_dim: 0.0 }, - vector: Cow::Owned(vector), + vector: Self::craft_owned_unaligned_vector_from_f32(vector), }; normal.header.extra_dim = node_p.header.extra_dim - node_q.header.extra_dim; Self::normalize(&mut normal); - Ok(normal.vector.into_owned()) + Ok(normal.vector) } fn margin(p: &Leaf, q: &Leaf) -> f32 { dot_product(&p.vector, &q.vector) + p.header.extra_dim * q.header.extra_dim } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { dot_product(p, q) } diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index ff882f31..e66160a8 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -5,7 +5,7 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::{dot_product, euclidean_distance}; @@ -31,7 +31,7 @@ impl Distance for Euclidean { "euclidean" } - fn new_header(_vector: &UnalignedF32Slice) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderEuclidean { bias: 0.0 } } @@ -41,32 +41,35 @@ impl Distance for Euclidean { fn init(_node: &mut Leaf) {} - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result> { let [node_p, node_q] = two_means(rng, children, false)?; - let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); - let mut normal = - Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: Cow::Owned(vector) }; + let vector: Vec<_> = + node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + let mut normal = Leaf { + header: NodeHeaderEuclidean { bias: 0.0 }, + vector: Self::craft_owned_unaligned_vector_from_f32(vector), + }; Self::normalize(&mut normal); normal.header.bias = normal .vector - .iter() - .zip(node_p.vector.iter()) - .zip(node_q.vector.iter()) + .iter_f32() + .zip(node_p.vector.iter_f32()) + .zip(node_q.vector.iter_f32()) .map(|((n, p), q)| -n * (p + q) / 2.0) .sum(); - Ok(normal.vector.into_owned()) + Ok(normal.vector) } fn margin(p: &Leaf, q: &Leaf) -> f32 { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { dot_product(p, q) } } diff --git a/src/distance/manhattan.rs b/src/distance/manhattan.rs index 0f037c66..57ed3e95 100644 --- a/src/distance/manhattan.rs +++ b/src/distance/manhattan.rs @@ -5,7 +5,7 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; @@ -30,12 +30,12 @@ impl Distance for Manhattan { "manhattan" } - fn new_header(_vector: &UnalignedF32Slice) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderManhattan { bias: 0.0 } } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() + p.vector.iter_f32().zip(q.vector.iter_f32()).map(|(p, q)| (p - q).abs()).sum() } fn normalized_distance(d: f32) -> f32 { @@ -44,32 +44,35 @@ impl Distance for Manhattan { fn init(_node: &mut Leaf) {} - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result> { let [node_p, node_q] = two_means(rng, children, false)?; - let vector = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); - let mut normal = - Leaf { header: NodeHeaderManhattan { bias: 0.0 }, vector: Cow::Owned(vector) }; + let vector: Vec<_> = + node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + let mut normal = Leaf { + header: NodeHeaderManhattan { bias: 0.0 }, + vector: Self::craft_owned_unaligned_vector_from_f32(vector), + }; Self::normalize(&mut normal); normal.header.bias = normal .vector - .iter() - .zip(node_p.vector.iter()) - .zip(node_q.vector.iter()) + .iter_f32() + .zip(node_p.vector.iter_f32()) + .zip(node_q.vector.iter_f32()) .map(|((n, p), q)| -n * (p + q) / 2.0) .sum(); - Ok(normal.vector.into_owned()) + Ok(normal.vector) } fn margin(p: &Leaf, q: &Leaf) -> f32 { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32 { + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { dot_product(p, q) } } diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 5462f543..9ddcb1f2 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt; pub use angular::{Angular, NodeHeaderAngular}; @@ -9,7 +10,7 @@ pub use manhattan::{Manhattan, NodeHeaderManhattan}; use rand::Rng; use crate::internals::{KeyCodec, Side}; -use crate::node::{Leaf, UnalignedF32Slice}; +use crate::node::{Leaf, SizeMismatch, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; use crate::NodeCodec; @@ -29,7 +30,25 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { fn name() -> &'static str; - fn new_header(vector: &UnalignedF32Slice) -> Self::Header; + fn craft_owned_unaligned_vector_from_f32(vector: Vec) -> Cow<'static, UnalignedVector> { + UnalignedVector::owned_f32_vectors_from_f32_slice(vector) + } + + fn craft_unaligned_vector_from_f32(vector: &[f32]) -> Cow { + Cow::Borrowed(UnalignedVector::f32_vectors_from_f32_slice(vector)) + } + + fn craft_unaligned_vector_from_bytes( + vector: &[u8], + ) -> Result, SizeMismatch> { + UnalignedVector::f32_vectors_from_bytes(vector).map(Cow::Borrowed) + } + + fn read_unaligned_vector(vector: &UnalignedVector) -> Vec { + vector.iter_f32().collect() + } + + fn new_header(vector: &UnalignedVector) -> Self::Header; /// Returns a non-normalized distance. fn built_distance(p: &Leaf, q: &Leaf) -> f32; @@ -54,39 +73,42 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { Self::norm_no_header(&leaf.vector) } - fn norm_no_header(v: &UnalignedF32Slice) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } fn normalize(node: &mut Leaf) { let norm = Self::norm(node); if norm > 0.0 { - node.vector.to_mut().iter_mut().for_each(|x| *x /= norm); + let vec: Vec<_> = node.vector.iter_f32().map(|x| x / norm).collect(); + node.vector = Self::craft_owned_unaligned_vector_from_f32(vec); } } fn init(node: &mut Leaf); fn update_mean(mean: &mut Leaf, new_node: &Leaf, norm: f32, c: f32) { - mean.vector - .to_mut() - .iter_mut() - .zip(new_node.vector.iter()) - .for_each(|(x, n)| *x = (*x * c + n / norm) / (c + 1.0)); + let vec: Vec<_> = mean + .vector + .iter_f32() + .zip(new_node.vector.iter_f32()) + .map(|(x, n)| (x * c + n / norm) / (c + 1.0)) + .collect(); + mean.vector = Self::craft_owned_unaligned_vector_from_f32(vec); } - fn create_split( - children: &ImmutableSubsetLeafs, + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>; + ) -> heed::Result>; fn margin(p: &Leaf, q: &Leaf) -> f32 { Self::margin_no_header(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedF32Slice, q: &UnalignedF32Slice) -> f32; + fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32; - fn side(normal_plane: &UnalignedF32Slice, node: &Leaf, rng: &mut R) -> Side { + fn side(normal_plane: &UnalignedVector, node: &Leaf, rng: &mut R) -> Side { let dot = Self::margin_no_header(&node.vector, normal_plane); if dot > 0.0 { Side::Right diff --git a/src/item_iter.rs b/src/item_iter.rs index 1b4c67fc..6806a4e9 100644 --- a/src/item_iter.rs +++ b/src/item_iter.rs @@ -15,7 +15,7 @@ impl<'t, D: Distance> Iterator for ItemIter<'t, D> { match self.inner.next() { Some(Ok((key, node))) => match node { Node::Leaf(Leaf { header: _, vector }) => { - Some(Ok((key.node.item, vector.into_owned()))) + Some(Ok((key.node.item, D::read_unaligned_vector(&vector)))) } Node::Descendants(_) | Node::SplitPlaneNormal(_) => None, }, diff --git a/src/lib.rs b/src/lib.rs index 46a61e12..dfa2bc06 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -107,7 +107,7 @@ pub mod internals { NodeHeaderAngular, NodeHeaderDotProduct, NodeHeaderEuclidean, NodeHeaderManhattan, }; pub use crate::key::KeyCodec; - pub use crate::node::{Leaf, NodeCodec, UnalignedF32Slice}; + pub use crate::node::{Leaf, NodeCodec, UnalignedVector}; /// A type that is used to decide on /// which side of a plane we move an item. diff --git a/src/node.rs b/src/node.rs index bb115c90..7e390dc9 100644 --- a/src/node.rs +++ b/src/node.rs @@ -38,7 +38,7 @@ pub struct Leaf<'a, D: Distance> { /// The header of this leaf. pub header: D::Header, /// The vector of this leaf. - pub vector: Cow<'a, UnalignedF32Slice>, + pub vector: Cow<'a, UnalignedVector>, } impl Leaf<'_, D> { @@ -49,13 +49,32 @@ impl Leaf<'_, D> { } } -/// A wrapper struct that is used to read unaligned floats directly from memory. +/// A wrapper struct that is used to read unaligned vectors directly from memory. #[repr(transparent)] -pub struct UnalignedF32Slice([u8]); +pub struct UnalignedVector([u8]); + +/// The type of the words used to quantize a vector +type QuantizedWord = usize; +/// The size of the words used to quantize a vector +const QUANTIZED_WORD_SIZE: usize = QuantizedWord::BITS as usize; + +impl UnalignedVector { + /// Creates an unaligned slice of something. It's up to the caller to ensure + /// it will be used with the same type it was created initially. + pub(crate) fn reset(vector: &mut Cow<'_, UnalignedVector>) { + match vector { + Cow::Borrowed(slice) => *vector = Cow::Owned(vec![0; slice.as_bytes().len()]), + Cow::Owned(bytes) => bytes.fill(0), + } + } + /// Creates an unaligned slice of something. It's up to the caller to ensure + /// it will be used with the same type it was created initially. + pub(crate) fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + unsafe { transmute(bytes) } + } -impl UnalignedF32Slice { /// Creates an unaligned slice of f32 wrapper from a slice of bytes. - pub fn from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { + pub(crate) fn f32_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { if bytes.len() % size_of::() == 0 { // safety: `UnalignedF32Slice` is transparent Ok(unsafe { transmute(bytes) }) @@ -64,19 +83,51 @@ impl UnalignedF32Slice { } } + /// Creates an unaligned slice of `usize` wrapper from a slice of bytes. + pub(crate) fn quantized_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { + if bytes.len() % size_of::() == 0 { + // safety: `UnalignedF32Slice` is transparent + Ok(unsafe { transmute(bytes) }) + } else { + Err(SizeMismatch) + } + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + pub(crate) fn f32_vectors_from_f32_slice(slice: &[f32]) -> &Self { + Self::f32_vectors_from_bytes(cast_slice(slice)).unwrap() + } + /// Creates an unaligned slice of f32 wrapper from a slice of f32. /// The slice is already known to be of the right length. - pub fn from_slice(slice: &[f32]) -> &Self { - Self::from_bytes(cast_slice(slice)).unwrap() + pub(crate) fn owned_f32_vectors_from_f32_slice(vec: Vec) -> Cow<'static, Self> { + let bytes = vec.into_iter().flat_map(|f| f.to_ne_bytes()).collect(); + Cow::Owned(bytes) + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + pub(crate) fn binary_quantized_vectors_from_slice(slice: &[f32]) -> Cow { + let mut output: Vec = vec![0; slice.len() / QUANTIZED_WORD_SIZE]; + for chunk in slice.chunks_exact(QUANTIZED_WORD_SIZE) { + let mut word: QuantizedWord = 0; + for bit in chunk { + let bit = bit.is_sign_positive(); + todo!() + } + } + + Cow::Owned(output) } /// Returns the original raw slice of bytes. - pub fn as_bytes(&self) -> &[u8] { + pub(crate) fn as_bytes(&self) -> &[u8] { &self.0 } /// Return the number of f32 that fits into this slice. - pub fn len(&self) -> usize { + pub(crate) fn f32_len(&self) -> usize { self.0.len() / size_of::() } @@ -87,13 +138,22 @@ impl UnalignedF32Slice { /// Returns an iterator of f32 that are read from the slice. /// The f32 are copied in memory and are therefore, aligned. - #[allow(clippy::needless_lifetimes)] - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + pub(crate) fn iter_f32(&self) -> impl Iterator + '_ { self.0.chunks_exact(size_of::()).map(NativeEndian::read_f32) } + /// Returns an iterator of f32 that are read from the slice. + /// The f32 are copied in memory and are therefore, aligned. + pub(crate) fn map_f32(&mut self, f: impl Fn(f32) -> f32) { + self.0.chunks_exact_mut(size_of::()).for_each(|chunk| { + let mut scalar = NativeEndian::read_f32(chunk); + scalar = f(scalar); + NativeEndian::write_f32(chunk, scalar); + }) + } + /// Returns the raw pointer to the start of this slice. - pub fn as_ptr(&self) -> *const u8 { + pub(crate) fn as_ptr(&self) -> *const u8 { self.0.as_ptr() } } @@ -102,21 +162,21 @@ impl UnalignedF32Slice { #[error("invalid slice of float dimension")] pub struct SizeMismatch; -impl ToOwned for UnalignedF32Slice { - type Owned = Vec; +impl ToOwned for UnalignedVector { + type Owned = Vec; fn to_owned(&self) -> Self::Owned { pod_collect_to_vec(&self.0) } } -impl Borrow for Vec { - fn borrow(&self) -> &UnalignedF32Slice { - UnalignedF32Slice::from_slice(&self[..]) +impl Borrow for Vec { + fn borrow(&self) -> &UnalignedVector { + UnalignedVector::from_bytes_unchecked(&self) } } -impl fmt::Debug for UnalignedF32Slice { +impl fmt::Debug for UnalignedVector { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { struct SmallF32(f32); impl fmt::Debug for SmallF32 { @@ -126,7 +186,7 @@ impl fmt::Debug for UnalignedF32Slice { } let mut list = f.debug_list(); - self.iter().for_each(|float| { + self.iter_f32().for_each(|float| { list.entry(&SmallF32(float)); }); list.finish() @@ -188,7 +248,7 @@ impl fmt::Debug for ItemIds<'_> { pub struct SplitPlaneNormal<'a> { pub left: NodeId, pub right: NodeId, - pub normal: Cow<'a, UnalignedF32Slice>, + pub normal: Cow<'a, UnalignedVector>, } /// The codec used internally to encode and decode nodes. @@ -228,14 +288,15 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { [LEAF_TAG, bytes @ ..] => { let (header_bytes, remaining) = bytes.split_at(size_of::()); let header = pod_read_unaligned(header_bytes); - let vector = UnalignedF32Slice::from_bytes(remaining).map(Cow::Borrowed)?; + let vector = D::craft_unaligned_vector_from_bytes(remaining)?; + Ok(Node::Leaf(Leaf { header, vector })) } [SPLIT_PLANE_NORMAL_TAG, bytes @ ..] => { let (left, bytes) = NodeId::from_bytes(bytes); let (right, bytes) = NodeId::from_bytes(bytes); Ok(Node::SplitPlaneNormal(SplitPlaneNormal { - normal: UnalignedF32Slice::from_bytes(bytes).map(Cow::Borrowed)?, + normal: D::craft_unaligned_vector_from_bytes(bytes)?, left, right, })) diff --git a/src/reader.rs b/src/reader.rs index 651c1b28..ad9c7a09 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -13,7 +13,7 @@ use roaring::RoaringBitmap; use crate::distance::Distance; use crate::internals::{KeyCodec, Side}; use crate::item_iter::ItemIter; -use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedF32Slice}; +use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedVector}; use crate::{ Database, Error, ItemId, Key, MetadataCodec, Node, NodeId, Prefix, PrefixCodec, Result, Stats, TreeStats, @@ -102,7 +102,7 @@ impl<'t, D: Distance> Reader<'t, D> { Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => { let left = recursive_depth(rtxn, database, index, left)?; let right = recursive_depth(rtxn, database, index, right)?; - let is_zero_normal = normal.iter().all(|f| f == 0.0) as usize; + let is_zero_normal = normal.iter_f32().all(|f| f == 0.0) as usize; Ok(TreeStats { depth: 1 + left.depth.max(right.depth), @@ -131,7 +131,8 @@ impl<'t, D: Distance> Reader<'t, D> { /// Returns the vector for item `i` that was previously added. pub fn item_vector(&self, rtxn: &'t RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| leaf.vector.into_owned())) + Ok(item_leaf(self.database, self.index, rtxn, item)? + .map(|leaf| D::read_unaligned_vector(&leaf.vector))) } /// Returns `true` if the index is empty. @@ -198,7 +199,7 @@ impl<'t, D: Distance> Reader<'t, D> { }); } - let vector = UnalignedF32Slice::from_slice(vector); + let vector = UnalignedVector::f32_vectors_from_f32_slice(vector); let leaf = Leaf { header: D::new_header(vector), vector: Cow::Borrowed(vector) }; self.nns_by_leaf(rtxn, &leaf, count, search_k, candidates) } diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index 7641f0e7..2e419101 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -4,7 +4,7 @@ use super::simple_avx::*; use super::simple_neon::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use super::simple_sse::*; -use crate::node::UnalignedF32Slice; +use crate::node::UnalignedVector; #[cfg(target_arch = "x86_64")] const MIN_DIM_SIZE_AVX: usize = 32; @@ -16,7 +16,7 @@ const MIN_DIM_SIZE_AVX: usize = 32; ))] const MIN_DIM_SIZE_SIMD: usize = 16; -pub fn euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { +pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("avx") @@ -36,7 +36,7 @@ pub fn euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { + if std::arch::is_aarch64_feature_detected!("neon") && u.f32_len() >= MIN_DIM_SIZE_SIMD { return unsafe { euclid_similarity_neon(u, v) }; } } @@ -46,11 +46,11 @@ pub fn euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { // Don't use dot-product: avoid catastrophic cancellation in // https://github.com/spotify/annoy/issues/314. -pub fn euclidean_distance_non_optimized(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { - u.iter().zip(v.iter()).map(|(u, v)| (u - v) * (u - v)).sum() +pub fn euclidean_distance_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter_f32().zip(v.iter_f32()).map(|(u, v)| (u - v) * (u - v)).sum() } -pub fn dot_product(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { +pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("avx") @@ -70,7 +70,7 @@ pub fn dot_product(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { + if std::arch::is_aarch64_feature_detected!("neon") && u.f32_len() >= MIN_DIM_SIZE_SIMD { return unsafe { dot_similarity_neon(u, v) }; } } @@ -78,6 +78,6 @@ pub fn dot_product(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { dot_product_non_optimized(u, v) } -pub fn dot_product_non_optimized(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { - u.iter().zip(v.iter()).map(|(a, b)| a * b).sum() +pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter_f32().zip(v.iter_f32()).map(|(a, b)| a * b).sum() } diff --git a/src/spaces/simple_neon.rs b/src/spaces/simple_neon.rs index 88703fb1..051b36e5 100644 --- a/src/spaces/simple_neon.rs +++ b/src/spaces/simple_neon.rs @@ -2,15 +2,15 @@ use std::arch::aarch64::*; use std::ptr::read_unaligned; -use crate::node::UnalignedF32Slice; +use crate::node::UnalignedVector; #[cfg(target_feature = "neon")] -pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedVector, v2: &UnalignedVector) -> f32 { // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types // from potentially unaligned memory locations safely. // https://github.com/meilisearch/arroy/pull/13 - let n = v1.len(); + let n = v1.f32_len(); let m = n - (n % 16); let mut ptr1 = v1.as_ptr() as *const f32; let mut ptr2 = v2.as_ptr() as *const f32; @@ -50,12 +50,12 @@ pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedF32Slice, v2: &Unalign } #[cfg(target_feature = "neon")] -pub(crate) unsafe fn dot_similarity_neon(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn dot_similarity_neon(v1: &UnalignedVector, v2: &UnalignedVector) -> f32 { // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types // from potentially unaligned memory locations safely. // https://github.com/meilisearch/arroy/pull/13 - let n = v1.len(); + let n = v1.f32_len(); let m = n - (n % 16); let mut ptr1 = v1.as_ptr() as *const f32; let mut ptr2 = v2.as_ptr() as *const f32; @@ -112,8 +112,8 @@ mod tests { 56., 57., 58., 59., 60., 61., ]; - let v1 = UnalignedF32Slice::from_slice(&v1[..]); - let v2 = UnalignedF32Slice::from_slice(&v2[..]); + let v1 = UnalignedVector::f32_vectors_from_f32_slice(&v1[..]); + let v2 = UnalignedVector::f32_vectors_from_f32_slice(&v2[..]); let euclid_simd = unsafe { euclid_similarity_neon(v1, v2) }; let euclid = euclidean_distance_non_optimized(v1, v2); diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 4655ddcb..5e516005 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -9,6 +9,7 @@ use tempfile::TempDir; use crate::roaring::RoaringBitmapCodec; use crate::{Database, Distance, MetadataCodec, NodeCodec, NodeMode, Reader}; +mod node; mod reader; mod writer; diff --git a/src/tests/writer.rs b/src/tests/writer.rs index d75e2e9e..683ca2b2 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -382,7 +382,7 @@ fn delete_document_in_an_empty_index_74() { wtxn.commit().unwrap(); - insta::assert_display_snapshot!(handle, @r###" + insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) @@ -407,7 +407,7 @@ fn delete_document_in_an_empty_index_74() { wtxn.commit().unwrap(); - insta::assert_display_snapshot!(handle, @r###" + insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" } diff --git a/src/writer.rs b/src/writer.rs index b7df0400..80d2cc01 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -13,7 +13,7 @@ use roaring::RoaringBitmap; use crate::distance::Distance; use crate::internals::{KeyCodec, Side}; use crate::item_iter::ItemIter; -use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedF32Slice}; +use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedVector}; use crate::node_id::NodeMode; use crate::parallel::{ ConcurrentNodeIds, ImmutableLeafs, ImmutableSubsetLeafs, ImmutableTrees, TmpNodes, @@ -87,7 +87,8 @@ impl Writer { /// Returns an `Option`al vector previous stored in this database. pub fn item_vector(&self, rtxn: &RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| leaf.vector.into_owned())) + Ok(item_leaf(self.database, self.index, rtxn, item)? + .map(|leaf| D::read_unaligned_vector(&leaf.vector))) } /// Returns `true` if the index is empty. @@ -138,8 +139,8 @@ impl Writer { }); } - let vector = UnalignedF32Slice::from_slice(vector); - let leaf = Leaf { header: D::new_header(vector), vector: Cow::Borrowed(vector) }; + let vector = D::craft_unaligned_vector_from_f32(vector); + let leaf = Leaf { header: D::new_header(&vector), vector }; self.database.put(wtxn, &Key::item(self.index, item), &Node::Leaf(leaf))?; let mut updated = self .database @@ -169,8 +170,8 @@ impl Writer { }); } - let vector = UnalignedF32Slice::from_slice(vector); - let leaf = Leaf { header: D::new_header(vector), vector: Cow::Borrowed(vector) }; + let vector = D::craft_unaligned_vector_from_f32(vector); + let leaf = Leaf { header: D::new_header(&vector), vector }; let key = Key::item(self.index, item); match self.database.put_with_flags(wtxn, PutFlags::APPEND, &key, &Node::Leaf(leaf)) { Ok(()) => (), @@ -554,7 +555,7 @@ impl Writer { let mut left_ids = RoaringBitmap::new(); let mut right_ids = RoaringBitmap::new(); - if normal.iter().all(|d| d == 0.0) { + if normal.iter_f32().all(|d| d == 0.0) { randomly_split_children(rng, to_insert, &mut left_ids, &mut right_ids); } else { for leaf in to_insert { @@ -699,7 +700,7 @@ impl Writer { let normal = D::create_split(&children, rng)?; for item_id in item_indices.iter() { let node = children.get(item_id)?.unwrap(); - match D::side(UnalignedF32Slice::from_slice(&normal), &node, rng) { + match D::side(&normal, &node, rng) { Side::Left => children_left.push(item_id), Side::Right => children_right.push(item_id), }; @@ -718,11 +719,11 @@ impl Writer { // and set the split plane to zero as a dummy plane. if split_imbalance(children_left.len(), children_right.len()) > 0.99 { randomly_split_children(rng, item_indices, &mut children_left, &mut children_right); - normal.fill(0.0); + UnalignedVector::reset(&mut normal); } let normal = SplitPlaneNormal { - normal: Cow::Owned(normal), + normal, left: self.make_tree_in_file(reader, rng, &children_left, tmp_nodes)?, right: self.make_tree_in_file(reader, rng, &children_right, tmp_nodes)?, }; From 27ed4d4aadee7af1a53417c892b198f1ffe5c60e Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Jul 2024 11:53:45 +0200 Subject: [PATCH 03/61] re-implements display for all kind of nodes taking the distance into account --- src/distance/binary_quantized_euclidean.rs | 21 +++- src/distance/mod.rs | 3 + src/node.rs | 132 ++++++++++++++------- src/reader.rs | 8 +- src/tests/mod.rs | 1 + src/writer.rs | 14 ++- 6 files changed, 124 insertions(+), 55 deletions(-) diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 3c8f88e0..c6879f96 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -5,7 +5,7 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedVector}; +use crate::node::{Leaf, SizeMismatch, UnalignedVector}; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; @@ -31,6 +31,25 @@ impl Distance for BinaryQuantizedEuclidean { "binary quantized euclidean" } + fn craft_owned_unaligned_vector_from_f32(vector: Vec) -> Cow<'static, UnalignedVector> { + // We need to allocate anyway so we use the version that take a ref + UnalignedVector::binary_quantized_vectors_from_slice(&vector) + } + + fn craft_unaligned_vector_from_f32(vector: &[f32]) -> Cow { + UnalignedVector::binary_quantized_vectors_from_slice(vector) + } + + fn craft_unaligned_vector_from_bytes( + vector: &[u8], + ) -> Result, SizeMismatch> { + UnalignedVector::quantized_vectors_from_bytes(vector).map(Cow::Borrowed) + } + + fn read_unaligned_vector(vector: &UnalignedVector) -> Vec { + vector.iter_binary_quantized().collect() + } + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 } } diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 9ddcb1f2..3b8722db 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -2,6 +2,9 @@ use std::borrow::Cow; use std::fmt; pub use angular::{Angular, NodeHeaderAngular}; +pub use binary_quantized_euclidean::{ + BinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedEuclidean, +}; use bytemuck::{Pod, Zeroable}; pub use dot_product::{DotProduct, NodeHeaderDotProduct}; pub use euclidean::{Euclidean, NodeHeaderEuclidean}; diff --git a/src/node.rs b/src/node.rs index 7e390dc9..6a3ff4b1 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,6 @@ use std::borrow::{Borrow, Cow}; use std::fmt; +use std::marker::PhantomData; use std::mem::{size_of, transmute}; use bytemuck::{bytes_of, cast_slice, pod_collect_to_vec, pod_read_unaligned}; @@ -10,13 +11,26 @@ use roaring::RoaringBitmap; use crate::distance::Distance; use crate::{ItemId, NodeId}; -#[derive(Debug, Clone)] +#[derive(Clone)] pub enum Node<'a, D: Distance> { Leaf(Leaf<'a, D>), Descendants(Descendants<'a>), SplitPlaneNormal(SplitPlaneNormal<'a>), } +impl fmt::Debug for Node<'_, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Node::Leaf(leaf) => f.debug_tuple("Leaf").field(&leaf).finish(), + Node::Descendants(desc) => f.debug_tuple("Descendants").field(&desc).finish(), + Node::SplitPlaneNormal(split) => f + .debug_tuple("SplitPlaneNormal") + .field(&DisplaySplitPlaneNormal::(split, PhantomData)) + .finish(), + } + } +} + const LEAF_TAG: u8 = 0; const DESCENDANTS_TAG: u8 = 1; const SPLIT_PLANE_NORMAL_TAG: u8 = 2; @@ -31,9 +45,21 @@ impl<'a, D: Distance> Node<'a, D> { } } +/// Small structure used to implement `Debug` for the `Leaf` and the `SplitPlaneNormal`. +struct DisplayVec(Vec); +impl fmt::Debug for DisplayVec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + self.0.iter().for_each(|float| { + list.entry(&format_args!("{:.4?}", float)); + }); + list.finish() + } +} + /// A leaf node which corresponds to the vector inputed /// by the user and the distance header. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct Leaf<'a, D: Distance> { /// The header of this leaf. pub header: D::Header, @@ -41,6 +67,13 @@ pub struct Leaf<'a, D: Distance> { pub vector: Cow<'a, UnalignedVector>, } +impl fmt::Debug for Leaf<'_, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let vec = DisplayVec(D::read_unaligned_vector(&self.vector)); + f.debug_struct("Leaf").field("header", &self.header).field("vector", &vec).finish() + } +} + impl Leaf<'_, D> { /// Converts the leaf into an owned version of itself by cloning /// the internal vector. Doing so will make it mutable. @@ -83,16 +116,6 @@ impl UnalignedVector { } } - /// Creates an unaligned slice of `usize` wrapper from a slice of bytes. - pub(crate) fn quantized_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { - if bytes.len() % size_of::() == 0 { - // safety: `UnalignedF32Slice` is transparent - Ok(unsafe { transmute(bytes) }) - } else { - Err(SizeMismatch) - } - } - /// Creates an unaligned slice of f32 wrapper from a slice of f32. /// The slice is already known to be of the right length. pub(crate) fn f32_vectors_from_f32_slice(slice: &[f32]) -> &Self { @@ -106,16 +129,27 @@ impl UnalignedVector { Cow::Owned(bytes) } - /// Creates an unaligned slice of f32 wrapper from a slice of f32. - /// The slice is already known to be of the right length. - pub(crate) fn binary_quantized_vectors_from_slice(slice: &[f32]) -> Cow { - let mut output: Vec = vec![0; slice.len() / QUANTIZED_WORD_SIZE]; - for chunk in slice.chunks_exact(QUANTIZED_WORD_SIZE) { + /// Creates a binary quantized wrapper from a slice of bytes. + pub(crate) fn quantized_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { + if bytes.len() % size_of::() == 0 { + // safety: `UnalignedF32Slice` is transparent + Ok(unsafe { transmute(bytes) }) + } else { + Err(SizeMismatch) + } + } + + /// Creates a binary quantized unaligned slice of bytes from a slice of f32. + /// Will allocate. + pub(crate) fn binary_quantized_vectors_from_slice(slice: &[f32]) -> Cow<'static, Self> { + let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); + for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { let mut word: QuantizedWord = 0; - for bit in chunk { - let bit = bit.is_sign_positive(); - todo!() + for bit in chunk.iter().rev() { + word <<= 1; + word += bit.is_sign_positive() as QuantizedWord; } + output.extend_from_slice(&word.to_ne_bytes()); } Cow::Owned(output) @@ -142,14 +176,23 @@ impl UnalignedVector { self.0.chunks_exact(size_of::()).map(NativeEndian::read_f32) } - /// Returns an iterator of f32 that are read from the slice. + /// Returns an iterator of f32 that are read from the binary quantized slice. /// The f32 are copied in memory and are therefore, aligned. - pub(crate) fn map_f32(&mut self, f: impl Fn(f32) -> f32) { - self.0.chunks_exact_mut(size_of::()).for_each(|chunk| { - let mut scalar = NativeEndian::read_f32(chunk); - scalar = f(scalar); - NativeEndian::write_f32(chunk, scalar); - }) + pub(crate) fn iter_binary_quantized(&self) -> impl Iterator + '_ { + self.0 + .chunks_exact(size_of::()) + .map(|bytes| QuantizedWord::from_ne_bytes(bytes.try_into().unwrap())) + .flat_map(|mut word| { + let mut ret = vec![0.0; QUANTIZED_WORD_SIZE]; + for index in 0..QUANTIZED_WORD_SIZE { + let bit = word & 1; + word >>= 1; + if bit == 1 { + ret[index] = 1.0; + } + } + ret + }) } /// Returns the raw pointer to the start of this slice. @@ -172,24 +215,7 @@ impl ToOwned for UnalignedVector { impl Borrow for Vec { fn borrow(&self) -> &UnalignedVector { - UnalignedVector::from_bytes_unchecked(&self) - } -} - -impl fmt::Debug for UnalignedVector { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - struct SmallF32(f32); - impl fmt::Debug for SmallF32 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_fmt(format_args!("{:.4?}", self.0)) - } - } - - let mut list = f.debug_list(); - self.iter_f32().for_each(|float| { - list.entry(&SmallF32(float)); - }); - list.finish() + UnalignedVector::from_bytes_unchecked(self) } } @@ -244,13 +270,27 @@ impl fmt::Debug for ItemIds<'_> { } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct SplitPlaneNormal<'a> { pub left: NodeId, pub right: NodeId, pub normal: Cow<'a, UnalignedVector>, } +/// Wraps a `SplitPlaneNormal` with its distance type to display it. +/// The distance is required to be able to read the normal. +pub struct DisplaySplitPlaneNormal<'a, D: Distance>(&'a SplitPlaneNormal<'a>, PhantomData); +impl fmt::Debug for DisplaySplitPlaneNormal<'_, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let normal = DisplayVec(D::read_unaligned_vector(&self.0.normal)); + f.debug_struct("SplitPlaneNormal") + .field("left", &self.0.left) + .field("right", &self.0.right) + .field("normal", &normal) + .finish() + } +} + /// The codec used internally to encode and decode nodes. pub struct NodeCodec(D); diff --git a/src/reader.rs b/src/reader.rs index ad9c7a09..8f028fa9 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -131,8 +131,12 @@ impl<'t, D: Distance> Reader<'t, D> { /// Returns the vector for item `i` that was previously added. pub fn item_vector(&self, rtxn: &'t RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)? - .map(|leaf| D::read_unaligned_vector(&leaf.vector))) + Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { + let mut vec = D::read_unaligned_vector(&leaf.vector); + // Depending on the distance we may have up to 63 additional elements in the vec + vec.drain(self.dimensions()..); + vec + })) } /// Returns `true` if the index is empty. diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 5e516005..1350b46a 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -9,6 +9,7 @@ use tempfile::TempDir; use crate::roaring::RoaringBitmapCodec; use crate::{Database, Distance, MetadataCodec, NodeCodec, NodeMode, Reader}; +mod binary_quantized; mod node; mod reader; mod writer; diff --git a/src/writer.rs b/src/writer.rs index 80d2cc01..8f48bfb6 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -55,10 +55,9 @@ impl Writer { while let Some((item_id, node)) = cursor.next().transpose()? { match node { Node::Leaf(Leaf { header: _, vector }) => { - let new_leaf = Node::Leaf(Leaf { - header: ND::new_header(&vector), - vector: Cow::Owned(vector.into_owned()), - }); + let vector = D::read_unaligned_vector(&vector); + let vector = ND::craft_owned_unaligned_vector_from_f32(vector); + let new_leaf = Node::Leaf(Leaf { header: ND::new_header(&vector), vector }); unsafe { // safety: We do not keep a reference to the current value, we own it. cursor.put_current_with_options::>( @@ -87,8 +86,11 @@ impl Writer { /// Returns an `Option`al vector previous stored in this database. pub fn item_vector(&self, rtxn: &RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)? - .map(|leaf| D::read_unaligned_vector(&leaf.vector))) + Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { + let mut vec = D::read_unaligned_vector(&leaf.vector); + vec.drain(self.dimensions..); + vec + })) } /// Returns `true` if the index is empty. From 331546573dff2f458576823799a6f9d0c8803d1b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 11:57:13 +0200 Subject: [PATCH 04/61] everything compiles and the api is clean, need work on the tests to ensure it works --- examples/compare_with_hnsw.rs | 6 +- src/distance/angular.rs | 19 +- src/distance/binary_quantized_euclidean.rs | 55 ++---- src/distance/dot_product.rs | 23 ++- src/distance/euclidean.rs | 25 ++- src/distance/manhattan.rs | 27 ++- src/distance/mod.rs | 50 +++-- src/item_iter.rs | 2 +- src/lib.rs | 11 +- src/node.rs | 213 +++------------------ src/reader.rs | 14 +- src/spaces/simple.rs | 18 +- src/spaces/simple_avx.rs | 2 +- src/spaces/simple_neon.rs | 29 +-- src/spaces/simple_sse.rs | 2 +- src/tests/writer.rs | 26 +-- src/writer.rs | 13 +- 17 files changed, 200 insertions(+), 335 deletions(-) diff --git a/examples/compare_with_hnsw.rs b/examples/compare_with_hnsw.rs index 82be3f33..b792cacf 100644 --- a/examples/compare_with_hnsw.rs +++ b/examples/compare_with_hnsw.rs @@ -2,7 +2,7 @@ use std::num::NonZeroUsize; use std::time::Instant; use arroy::distances::Euclidean; -use arroy::internals::Leaf; +use arroy::internals::{Leaf, UnalignedVector}; use arroy::{Database, Distance, ItemId, Reader, Result, Writer}; use heed::{EnvOpenOptions, RwTxn}; use instant_distance::{Builder, HnswMap, MapItem}; @@ -93,8 +93,8 @@ struct Point(Vec); impl instant_distance::Point for Point { fn distance(&self, other: &Self) -> f32 { - let this = Euclidean::craft_unaligned_vector_from_f32(&self.0); - let other = Euclidean::craft_unaligned_vector_from_f32(&other.0); + let this = UnalignedVector::from_slice(&self.0); + let other = UnalignedVector::from_slice(&other.0); let p = Leaf { header: Euclidean::new_header(&this), vector: this }; let q = Leaf { header: Euclidean::new_header(&other), vector: other }; arroy::distances::Euclidean::built_distance(&p, &q).sqrt() diff --git a/src/distance/angular.rs b/src/distance/angular.rs index 6b509fe2..adc76086 100644 --- a/src/distance/angular.rs +++ b/src/distance/angular.rs @@ -5,9 +5,10 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; +use crate::unaligned_vector::UnalignedVector; /// The Cosine similarity is a measure of similarity between two /// non-zero vectors defined in an inner product space. Cosine similarity @@ -24,12 +25,13 @@ pub struct NodeHeaderAngular { impl Distance for Angular { type Header = NodeHeaderAngular; + type VectorFormat = f32; fn name() -> &'static str { "angular" } - fn new_header(vector: &UnalignedVector) -> Self::Header { + fn new_header(vector: &UnalignedVector) -> Self::Header { NodeHeaderAngular { norm: Self::norm_no_header(vector) } } @@ -54,6 +56,10 @@ impl Distance for Angular { d } + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } + fn init(node: &mut Leaf) { node.header.norm = dot_product(&node.vector, &node.vector).sqrt(); } @@ -61,10 +67,10 @@ impl Distance for Angular { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, true)?; let vector: Vec = - node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let unaligned_vector = Self::craft_owned_unaligned_vector_from_f32(vector); let mut normal = Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: unaligned_vector }; Self::normalize(&mut normal); @@ -72,7 +78,10 @@ impl Distance for Angular { Ok(normal.vector) } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { dot_product(p, q) } } diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index c6879f96..890f0da0 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -5,9 +5,9 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, SizeMismatch, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; -use crate::spaces::simple::dot_product; +use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; /// The Euclidean distance between two points in Euclidean space /// is the length of the line segment between them. @@ -26,36 +26,22 @@ pub struct NodeHeaderBinaryQuantizedEuclidean { impl Distance for BinaryQuantizedEuclidean { type Header = NodeHeaderBinaryQuantizedEuclidean; + type VectorFormat = unaligned_vector::BinaryQuantized; fn name() -> &'static str { "binary quantized euclidean" } - fn craft_owned_unaligned_vector_from_f32(vector: Vec) -> Cow<'static, UnalignedVector> { - // We need to allocate anyway so we use the version that take a ref - UnalignedVector::binary_quantized_vectors_from_slice(&vector) - } - - fn craft_unaligned_vector_from_f32(vector: &[f32]) -> Cow { - UnalignedVector::binary_quantized_vectors_from_slice(vector) - } - - fn craft_unaligned_vector_from_bytes( - vector: &[u8], - ) -> Result, SizeMismatch> { - UnalignedVector::quantized_vectors_from_bytes(vector).map(Cow::Borrowed) - } - - fn read_unaligned_vector(vector: &UnalignedVector) -> Vec { - vector.iter_binary_quantized().collect() - } - - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 } } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - binary_quantized_euclidean_distance(&p.vector, &q.vector) + dot_product(&p.vector, &q.vector) + } + + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() } fn init(_node: &mut Leaf) {} @@ -63,36 +49,31 @@ impl Distance for BinaryQuantizedEuclidean { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec = - node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, - vector: Self::craft_owned_unaligned_vector_from_f32(vector), + vector: UnalignedVector::from_slice(&vector), }; Self::normalize(&mut normal); - normal.header.bias = normal - .vector - .iter_f32() - .zip(node_p.vector.iter_f32()) - .zip(node_q.vector.iter_f32()) - .map(|((n, p), q)| -n * (p + q) / 2.0) - .sum(); - - Ok(normal.vector) + Ok(Cow::Owned(normal.vector.into_owned())) } fn margin(p: &Leaf, q: &Leaf) -> f32 { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { dot_product(p, q) } } -fn binary_quantized_euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { +fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 } diff --git a/src/distance/dot_product.rs b/src/distance/dot_product.rs index f35a7ef5..2f6c1b73 100644 --- a/src/distance/dot_product.rs +++ b/src/distance/dot_product.rs @@ -7,9 +7,10 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; use crate::internals::KeyCodec; -use crate::node::{Leaf, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; +use crate::unaligned_vector::UnalignedVector; use crate::{Node, NodeCodec}; /// In mathematics, the dot product or scalar product is an algebraic @@ -29,12 +30,13 @@ pub struct NodeHeaderDotProduct { impl Distance for DotProduct { type Header = NodeHeaderDotProduct; + type VectorFormat = f32; fn name() -> &'static str { "dot-product" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { // We compute the norm when we preprocess the vector, before generating the tree nodes. NodeHeaderDotProduct { extra_dim: 0.0, norm: 0.0 } } @@ -64,6 +66,10 @@ impl Distance for DotProduct { (dot + leaf.header.extra_dim * leaf.header.extra_dim).sqrt() } + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } + fn normalized_distance(d: f32) -> f32 { -d } @@ -71,8 +77,8 @@ impl Distance for DotProduct { fn normalize(node: &mut Leaf) { let norm = Self::norm(node); if norm > 0.0 { - let vec: Vec<_> = node.vector.iter_f32().map(|x| x / norm).collect(); - node.vector = UnalignedVector::owned_f32_vectors_from_f32_slice(vec); + let vec: Vec<_> = node.vector.iter().map(|x| x / norm).collect(); + node.vector = UnalignedVector::from_vec(vec); node.header.extra_dim /= norm; } } @@ -84,10 +90,10 @@ impl Distance for DotProduct { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, true)?; let vector: Vec = - node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf:: { header: NodeHeaderDotProduct { norm: 0.0, extra_dim: 0.0 }, vector: Self::craft_owned_unaligned_vector_from_f32(vector), @@ -102,7 +108,10 @@ impl Distance for DotProduct { dot_product(&p.vector, &q.vector) + p.header.extra_dim * q.header.extra_dim } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { dot_product(p, q) } diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index e66160a8..a4a935d2 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -5,9 +5,10 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::{dot_product, euclidean_distance}; +use crate::unaligned_vector::UnalignedVector; /// The Euclidean distance between two points in Euclidean space /// is the length of the line segment between them. @@ -26,12 +27,13 @@ pub struct NodeHeaderEuclidean { impl Distance for Euclidean { type Header = NodeHeaderEuclidean; + type VectorFormat = f32; fn name() -> &'static str { "euclidean" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderEuclidean { bias: 0.0 } } @@ -39,15 +41,19 @@ impl Distance for Euclidean { euclidean_distance(&p.vector, &q.vector) } + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } + fn init(_node: &mut Leaf) {} fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec<_> = - node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: Self::craft_owned_unaligned_vector_from_f32(vector), @@ -56,9 +62,9 @@ impl Distance for Euclidean { normal.header.bias = normal .vector - .iter_f32() - .zip(node_p.vector.iter_f32()) - .zip(node_q.vector.iter_f32()) + .iter() + .zip(node_p.vector.iter()) + .zip(node_q.vector.iter()) .map(|((n, p), q)| -n * (p + q) / 2.0) .sum(); @@ -69,7 +75,10 @@ impl Distance for Euclidean { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { dot_product(p, q) } } diff --git a/src/distance/manhattan.rs b/src/distance/manhattan.rs index 57ed3e95..7036eedf 100644 --- a/src/distance/manhattan.rs +++ b/src/distance/manhattan.rs @@ -5,9 +5,10 @@ use rand::Rng; use super::two_means; use crate::distance::Distance; -use crate::node::{Leaf, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; use crate::spaces::simple::dot_product; +use crate::unaligned_vector::UnalignedVector; /// A taxicab geometry or a Manhattan geometry is a geometry whose usual distance function /// or metric of Euclidean geometry is replaced by a new metric in which the distance between @@ -25,32 +26,37 @@ pub struct NodeHeaderManhattan { impl Distance for Manhattan { type Header = NodeHeaderManhattan; + type VectorFormat = f32; fn name() -> &'static str { "manhattan" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderManhattan { bias: 0.0 } } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - p.vector.iter_f32().zip(q.vector.iter_f32()).map(|(p, q)| (p - q).abs()).sum() + p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() } fn normalized_distance(d: f32) -> f32 { d.max(0.0) } + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } + fn init(_node: &mut Leaf) {} fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec<_> = - node_p.vector.iter_f32().zip(node_q.vector.iter_f32()).map(|(p, q)| p - q).collect(); + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderManhattan { bias: 0.0 }, vector: Self::craft_owned_unaligned_vector_from_f32(vector), @@ -59,9 +65,9 @@ impl Distance for Manhattan { normal.header.bias = normal .vector - .iter_f32() - .zip(node_p.vector.iter_f32()) - .zip(node_q.vector.iter_f32()) + .iter() + .zip(node_p.vector.iter()) + .zip(node_q.vector.iter()) .map(|((n, p), q)| -n * (p + q) / 2.0) .sum(); @@ -72,7 +78,10 @@ impl Distance for Manhattan { p.header.bias + dot_product(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32 { + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { dot_product(p, q) } } diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 3b8722db..78033e8a 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -13,9 +13,9 @@ pub use manhattan::{Manhattan, NodeHeaderManhattan}; use rand::Rng; use crate::internals::{KeyCodec, Side}; -use crate::node::{Leaf, SizeMismatch, UnalignedVector}; +use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; -use crate::spaces::simple::dot_product; +use crate::unaligned_vector::{UnalignedVector, UnalignedVectorCodec}; use crate::NodeCodec; mod angular; @@ -30,28 +30,17 @@ mod manhattan; pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { /// A header structure with informations related to the type Header: Pod + Zeroable + fmt::Debug; + type VectorFormat: UnalignedVectorCodec; fn name() -> &'static str; - fn craft_owned_unaligned_vector_from_f32(vector: Vec) -> Cow<'static, UnalignedVector> { - UnalignedVector::owned_f32_vectors_from_f32_slice(vector) + fn craft_owned_unaligned_vector_from_f32( + vector: Vec, + ) -> Cow<'static, UnalignedVector> { + UnalignedVector::from_vec(vector) } - fn craft_unaligned_vector_from_f32(vector: &[f32]) -> Cow { - Cow::Borrowed(UnalignedVector::f32_vectors_from_f32_slice(vector)) - } - - fn craft_unaligned_vector_from_bytes( - vector: &[u8], - ) -> Result, SizeMismatch> { - UnalignedVector::f32_vectors_from_bytes(vector).map(Cow::Borrowed) - } - - fn read_unaligned_vector(vector: &UnalignedVector) -> Vec { - vector.iter_f32().collect() - } - - fn new_header(vector: &UnalignedVector) -> Self::Header; + fn new_header(vector: &UnalignedVector) -> Self::Header; /// Returns a non-normalized distance. fn built_distance(p: &Leaf, q: &Leaf) -> f32; @@ -76,14 +65,12 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { Self::norm_no_header(&leaf.vector) } - fn norm_no_header(v: &UnalignedVector) -> f32 { - dot_product(v, v).sqrt() - } + fn norm_no_header(v: &UnalignedVector) -> f32; fn normalize(node: &mut Leaf) { let norm = Self::norm(node); if norm > 0.0 { - let vec: Vec<_> = node.vector.iter_f32().map(|x| x / norm).collect(); + let vec: Vec<_> = node.vector.iter().map(|x| x / norm).collect(); node.vector = Self::craft_owned_unaligned_vector_from_f32(vec); } } @@ -93,8 +80,8 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { fn update_mean(mean: &mut Leaf, new_node: &Leaf, norm: f32, c: f32) { let vec: Vec<_> = mean .vector - .iter_f32() - .zip(new_node.vector.iter_f32()) + .iter() + .zip(new_node.vector.iter()) .map(|(x, n)| (x * c + n / norm) / (c + 1.0)) .collect(); mean.vector = Self::craft_owned_unaligned_vector_from_f32(vec); @@ -103,15 +90,22 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>; + ) -> heed::Result>>; fn margin(p: &Leaf, q: &Leaf) -> f32 { Self::margin_no_header(&p.vector, &q.vector) } - fn margin_no_header(p: &UnalignedVector, q: &UnalignedVector) -> f32; + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32; - fn side(normal_plane: &UnalignedVector, node: &Leaf, rng: &mut R) -> Side { + fn side( + normal_plane: &UnalignedVector, + node: &Leaf, + rng: &mut R, + ) -> Side { let dot = Self::margin_no_header(&node.vector, normal_plane); if dot > 0.0 { Side::Right diff --git a/src/item_iter.rs b/src/item_iter.rs index 6806a4e9..b9a1a63b 100644 --- a/src/item_iter.rs +++ b/src/item_iter.rs @@ -15,7 +15,7 @@ impl<'t, D: Distance> Iterator for ItemIter<'t, D> { match self.inner.next() { Some(Ok((key, node))) => match node { Node::Leaf(Leaf { header: _, vector }) => { - Some(Ok((key.node.item, D::read_unaligned_vector(&vector)))) + Some(Ok((key.node.item, vector.to_vec()))) } Node::Descendants(_) | Node::SplitPlaneNormal(_) => None, }, diff --git a/src/lib.rs b/src/lib.rs index dfa2bc06..0c0e873b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,6 +88,7 @@ mod writer; #[cfg(test)] mod tests; +mod unaligned_vector; pub use distance::Distance; pub use error::Error; @@ -104,10 +105,12 @@ pub mod internals { use rand::Rng; pub use crate::distance::{ - NodeHeaderAngular, NodeHeaderDotProduct, NodeHeaderEuclidean, NodeHeaderManhattan, + NodeHeaderAngular, NodeHeaderBinaryQuantizedEuclidean, NodeHeaderDotProduct, + NodeHeaderEuclidean, NodeHeaderManhattan, }; pub use crate::key::KeyCodec; - pub use crate::node::{Leaf, NodeCodec, UnalignedVector}; + pub use crate::node::{Leaf, NodeCodec}; + pub use crate::unaligned_vector::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; /// A type that is used to decide on /// which side of a plane we move an item. @@ -132,7 +135,9 @@ pub mod internals { /// The set of distances implementing the [`Distance`] and supported by arroy. pub mod distances { - pub use crate::distance::{Angular, DotProduct, Euclidean, Manhattan}; + pub use crate::distance::{ + Angular, BinaryQuantizedEuclidean, DotProduct, Euclidean, Manhattan, + }; } /// A custom Result type that is returning an arroy error by default. diff --git a/src/node.rs b/src/node.rs index 6a3ff4b1..197dcd56 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,34 +1,21 @@ -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::fmt; -use std::marker::PhantomData; -use std::mem::{size_of, transmute}; +use std::mem::size_of; -use bytemuck::{bytes_of, cast_slice, pod_collect_to_vec, pod_read_unaligned}; +use bytemuck::{bytes_of, cast_slice, pod_read_unaligned}; use byteorder::{ByteOrder, NativeEndian}; use heed::{BoxedError, BytesDecode, BytesEncode}; use roaring::RoaringBitmap; use crate::distance::Distance; +use crate::unaligned_vector::UnalignedVector; use crate::{ItemId, NodeId}; -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum Node<'a, D: Distance> { Leaf(Leaf<'a, D>), Descendants(Descendants<'a>), - SplitPlaneNormal(SplitPlaneNormal<'a>), -} - -impl fmt::Debug for Node<'_, D> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Node::Leaf(leaf) => f.debug_tuple("Leaf").field(&leaf).finish(), - Node::Descendants(desc) => f.debug_tuple("Descendants").field(&desc).finish(), - Node::SplitPlaneNormal(split) => f - .debug_tuple("SplitPlaneNormal") - .field(&DisplaySplitPlaneNormal::(split, PhantomData)) - .finish(), - } - } + SplitPlaneNormal(SplitPlaneNormal<'a, D>), } const LEAF_TAG: u8 = 0; @@ -45,32 +32,24 @@ impl<'a, D: Distance> Node<'a, D> { } } -/// Small structure used to implement `Debug` for the `Leaf` and the `SplitPlaneNormal`. -struct DisplayVec(Vec); -impl fmt::Debug for DisplayVec { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut list = f.debug_list(); - self.0.iter().for_each(|float| { - list.entry(&format_args!("{:.4?}", float)); - }); - list.finish() - } -} - /// A leaf node which corresponds to the vector inputed /// by the user and the distance header. -#[derive(Clone)] pub struct Leaf<'a, D: Distance> { /// The header of this leaf. pub header: D::Header, /// The vector of this leaf. - pub vector: Cow<'a, UnalignedVector>, + pub vector: Cow<'a, UnalignedVector>, } impl fmt::Debug for Leaf<'_, D> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let vec = DisplayVec(D::read_unaligned_vector(&self.vector)); - f.debug_struct("Leaf").field("header", &self.header).field("vector", &vec).finish() + f.debug_struct("Leaf").field("header", &self.header).field("vector", &self.vector).finish() + } +} + +impl Clone for Leaf<'_, D> { + fn clone(&self) -> Self { + Self { header: self.header, vector: self.vector.clone() } } } @@ -82,143 +61,6 @@ impl Leaf<'_, D> { } } -/// A wrapper struct that is used to read unaligned vectors directly from memory. -#[repr(transparent)] -pub struct UnalignedVector([u8]); - -/// The type of the words used to quantize a vector -type QuantizedWord = usize; -/// The size of the words used to quantize a vector -const QUANTIZED_WORD_SIZE: usize = QuantizedWord::BITS as usize; - -impl UnalignedVector { - /// Creates an unaligned slice of something. It's up to the caller to ensure - /// it will be used with the same type it was created initially. - pub(crate) fn reset(vector: &mut Cow<'_, UnalignedVector>) { - match vector { - Cow::Borrowed(slice) => *vector = Cow::Owned(vec![0; slice.as_bytes().len()]), - Cow::Owned(bytes) => bytes.fill(0), - } - } - /// Creates an unaligned slice of something. It's up to the caller to ensure - /// it will be used with the same type it was created initially. - pub(crate) fn from_bytes_unchecked(bytes: &[u8]) -> &Self { - unsafe { transmute(bytes) } - } - - /// Creates an unaligned slice of f32 wrapper from a slice of bytes. - pub(crate) fn f32_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { - if bytes.len() % size_of::() == 0 { - // safety: `UnalignedF32Slice` is transparent - Ok(unsafe { transmute(bytes) }) - } else { - Err(SizeMismatch) - } - } - - /// Creates an unaligned slice of f32 wrapper from a slice of f32. - /// The slice is already known to be of the right length. - pub(crate) fn f32_vectors_from_f32_slice(slice: &[f32]) -> &Self { - Self::f32_vectors_from_bytes(cast_slice(slice)).unwrap() - } - - /// Creates an unaligned slice of f32 wrapper from a slice of f32. - /// The slice is already known to be of the right length. - pub(crate) fn owned_f32_vectors_from_f32_slice(vec: Vec) -> Cow<'static, Self> { - let bytes = vec.into_iter().flat_map(|f| f.to_ne_bytes()).collect(); - Cow::Owned(bytes) - } - - /// Creates a binary quantized wrapper from a slice of bytes. - pub(crate) fn quantized_vectors_from_bytes(bytes: &[u8]) -> Result<&Self, SizeMismatch> { - if bytes.len() % size_of::() == 0 { - // safety: `UnalignedF32Slice` is transparent - Ok(unsafe { transmute(bytes) }) - } else { - Err(SizeMismatch) - } - } - - /// Creates a binary quantized unaligned slice of bytes from a slice of f32. - /// Will allocate. - pub(crate) fn binary_quantized_vectors_from_slice(slice: &[f32]) -> Cow<'static, Self> { - let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); - for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { - let mut word: QuantizedWord = 0; - for bit in chunk.iter().rev() { - word <<= 1; - word += bit.is_sign_positive() as QuantizedWord; - } - output.extend_from_slice(&word.to_ne_bytes()); - } - - Cow::Owned(output) - } - - /// Returns the original raw slice of bytes. - pub(crate) fn as_bytes(&self) -> &[u8] { - &self.0 - } - - /// Return the number of f32 that fits into this slice. - pub(crate) fn f32_len(&self) -> usize { - self.0.len() / size_of::() - } - - /// Returns wether it is empty or not. - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Returns an iterator of f32 that are read from the slice. - /// The f32 are copied in memory and are therefore, aligned. - pub(crate) fn iter_f32(&self) -> impl Iterator + '_ { - self.0.chunks_exact(size_of::()).map(NativeEndian::read_f32) - } - - /// Returns an iterator of f32 that are read from the binary quantized slice. - /// The f32 are copied in memory and are therefore, aligned. - pub(crate) fn iter_binary_quantized(&self) -> impl Iterator + '_ { - self.0 - .chunks_exact(size_of::()) - .map(|bytes| QuantizedWord::from_ne_bytes(bytes.try_into().unwrap())) - .flat_map(|mut word| { - let mut ret = vec![0.0; QUANTIZED_WORD_SIZE]; - for index in 0..QUANTIZED_WORD_SIZE { - let bit = word & 1; - word >>= 1; - if bit == 1 { - ret[index] = 1.0; - } - } - ret - }) - } - - /// Returns the raw pointer to the start of this slice. - pub(crate) fn as_ptr(&self) -> *const u8 { - self.0.as_ptr() - } -} - -#[derive(Debug, thiserror::Error)] -#[error("invalid slice of float dimension")] -pub struct SizeMismatch; - -impl ToOwned for UnalignedVector { - type Owned = Vec; - - fn to_owned(&self) -> Self::Owned { - pod_collect_to_vec(&self.0) - } -} - -impl Borrow for Vec { - fn borrow(&self) -> &UnalignedVector { - UnalignedVector::from_bytes_unchecked(self) - } -} - #[derive(Clone)] pub struct Descendants<'a> { // A descendants node can only contains references to the leaf nodes. @@ -270,27 +112,28 @@ impl fmt::Debug for ItemIds<'_> { } } -#[derive(Clone)] -pub struct SplitPlaneNormal<'a> { +pub struct SplitPlaneNormal<'a, D: Distance> { pub left: NodeId, pub right: NodeId, - pub normal: Cow<'a, UnalignedVector>, + pub normal: Cow<'a, UnalignedVector>, } -/// Wraps a `SplitPlaneNormal` with its distance type to display it. -/// The distance is required to be able to read the normal. -pub struct DisplaySplitPlaneNormal<'a, D: Distance>(&'a SplitPlaneNormal<'a>, PhantomData); -impl fmt::Debug for DisplaySplitPlaneNormal<'_, D> { +impl fmt::Debug for SplitPlaneNormal<'_, D> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let normal = DisplayVec(D::read_unaligned_vector(&self.0.normal)); f.debug_struct("SplitPlaneNormal") - .field("left", &self.0.left) - .field("right", &self.0.right) - .field("normal", &normal) + .field("left", &self.left) + .field("right", &self.right) + .field("normal", &self.normal) .finish() } } +impl Clone for SplitPlaneNormal<'_, D> { + fn clone(&self) -> Self { + Self { left: self.left, right: self.right, normal: self.normal.clone() } + } +} + /// The codec used internally to encode and decode nodes. pub struct NodeCodec(D); @@ -328,7 +171,7 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { [LEAF_TAG, bytes @ ..] => { let (header_bytes, remaining) = bytes.split_at(size_of::()); let header = pod_read_unaligned(header_bytes); - let vector = D::craft_unaligned_vector_from_bytes(remaining)?; + let vector = UnalignedVector::::from_bytes(remaining)?; Ok(Node::Leaf(Leaf { header, vector })) } @@ -336,7 +179,7 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { let (left, bytes) = NodeId::from_bytes(bytes); let (right, bytes) = NodeId::from_bytes(bytes); Ok(Node::SplitPlaneNormal(SplitPlaneNormal { - normal: D::craft_unaligned_vector_from_bytes(bytes)?, + normal: UnalignedVector::::from_bytes(bytes)?, left, right, })) diff --git a/src/reader.rs b/src/reader.rs index 8f028fa9..c4f73c6e 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1,4 +1,3 @@ -use std::borrow::Cow; use std::cmp::Reverse; use std::collections::BinaryHeap; use std::iter::repeat; @@ -13,7 +12,8 @@ use roaring::RoaringBitmap; use crate::distance::Distance; use crate::internals::{KeyCodec, Side}; use crate::item_iter::ItemIter; -use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedVector}; +use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal}; +use crate::unaligned_vector::UnalignedVector; use crate::{ Database, Error, ItemId, Key, MetadataCodec, Node, NodeId, Prefix, PrefixCodec, Result, Stats, TreeStats, @@ -102,7 +102,7 @@ impl<'t, D: Distance> Reader<'t, D> { Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => { let left = recursive_depth(rtxn, database, index, left)?; let right = recursive_depth(rtxn, database, index, right)?; - let is_zero_normal = normal.iter_f32().all(|f| f == 0.0) as usize; + let is_zero_normal = normal.iter().all(|f| f == 0.0) as usize; Ok(TreeStats { depth: 1 + left.depth.max(right.depth), @@ -132,8 +132,8 @@ impl<'t, D: Distance> Reader<'t, D> { /// Returns the vector for item `i` that was previously added. pub fn item_vector(&self, rtxn: &'t RoTxn, item: ItemId) -> Result>> { Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { - let mut vec = D::read_unaligned_vector(&leaf.vector); - // Depending on the distance we may have up to 63 additional elements in the vec + let mut vec = leaf.vector.to_vec(); + // Depending on the distance we may have additional elements in the vec that needs to be removed. vec.drain(self.dimensions()..); vec })) @@ -203,8 +203,8 @@ impl<'t, D: Distance> Reader<'t, D> { }); } - let vector = UnalignedVector::f32_vectors_from_f32_slice(vector); - let leaf = Leaf { header: D::new_header(vector), vector: Cow::Borrowed(vector) }; + let vector = UnalignedVector::from_slice(vector); + let leaf = Leaf { header: D::new_header(&vector), vector }; self.nns_by_leaf(rtxn, &leaf, count, search_k, candidates) } diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index 2e419101..f80d11a4 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -4,7 +4,7 @@ use super::simple_avx::*; use super::simple_neon::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use super::simple_sse::*; -use crate::node::UnalignedVector; +use crate::unaligned_vector::UnalignedVector; #[cfg(target_arch = "x86_64")] const MIN_DIM_SIZE_AVX: usize = 32; @@ -16,7 +16,7 @@ const MIN_DIM_SIZE_AVX: usize = 32; ))] const MIN_DIM_SIZE_SIMD: usize = 16; -pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { +pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("avx") @@ -36,7 +36,7 @@ pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - if std::arch::is_aarch64_feature_detected!("neon") && u.f32_len() >= MIN_DIM_SIZE_SIMD { + if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { return unsafe { euclid_similarity_neon(u, v) }; } } @@ -46,11 +46,11 @@ pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { // Don't use dot-product: avoid catastrophic cancellation in // https://github.com/spotify/annoy/issues/314. -pub fn euclidean_distance_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { - u.iter_f32().zip(v.iter_f32()).map(|(u, v)| (u - v) * (u - v)).sum() +pub fn euclidean_distance_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter().zip(v.iter()).map(|(u, v)| (u - v) * (u - v)).sum() } -pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { +pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("avx") @@ -70,7 +70,7 @@ pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - if std::arch::is_aarch64_feature_detected!("neon") && u.f32_len() >= MIN_DIM_SIZE_SIMD { + if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { return unsafe { dot_similarity_neon(u, v) }; } } @@ -78,6 +78,6 @@ pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { dot_product_non_optimized(u, v) } -pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { - u.iter_f32().zip(v.iter_f32()).map(|(a, b)| a * b).sum() +pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter().zip(v.iter()).map(|(a, b)| a * b).sum() } diff --git a/src/spaces/simple_avx.rs b/src/spaces/simple_avx.rs index fd342905..f52c2427 100644 --- a/src/spaces/simple_avx.rs +++ b/src/spaces/simple_avx.rs @@ -1,7 +1,7 @@ use std::arch::x86_64::*; use std::ptr::read_unaligned; -use crate::node::UnalignedF32Slice; +use crate::unaligned_vector::UnalignedVector; #[target_feature(enable = "avx")] #[target_feature(enable = "fma")] diff --git a/src/spaces/simple_neon.rs b/src/spaces/simple_neon.rs index 051b36e5..d295555a 100644 --- a/src/spaces/simple_neon.rs +++ b/src/spaces/simple_neon.rs @@ -1,16 +1,18 @@ #[cfg(target_feature = "neon")] +use crate::unaligned_vector::UnalignedVector; use std::arch::aarch64::*; use std::ptr::read_unaligned; -use crate::node::UnalignedVector; - #[cfg(target_feature = "neon")] -pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedVector, v2: &UnalignedVector) -> f32 { +pub(crate) unsafe fn euclid_similarity_neon( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types // from potentially unaligned memory locations safely. // https://github.com/meilisearch/arroy/pull/13 - let n = v1.f32_len(); + let n = v1.len(); let m = n - (n % 16); let mut ptr1 = v1.as_ptr() as *const f32; let mut ptr2 = v2.as_ptr() as *const f32; @@ -50,12 +52,15 @@ pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedVector, v2: &Unaligned } #[cfg(target_feature = "neon")] -pub(crate) unsafe fn dot_similarity_neon(v1: &UnalignedVector, v2: &UnalignedVector) -> f32 { +pub(crate) unsafe fn dot_similarity_neon( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types // from potentially unaligned memory locations safely. // https://github.com/meilisearch/arroy/pull/13 - let n = v1.f32_len(); + let n = v1.len(); let m = n - (n % 16); let mut ptr1 = v1.as_ptr() as *const f32; let mut ptr2 = v2.as_ptr() as *const f32; @@ -112,15 +117,15 @@ mod tests { 56., 57., 58., 59., 60., 61., ]; - let v1 = UnalignedVector::f32_vectors_from_f32_slice(&v1[..]); - let v2 = UnalignedVector::f32_vectors_from_f32_slice(&v2[..]); + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); - let euclid_simd = unsafe { euclid_similarity_neon(v1, v2) }; - let euclid = euclidean_distance_non_optimized(v1, v2); + let euclid_simd = unsafe { euclid_similarity_neon(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); assert_eq!(euclid_simd, euclid); - let dot_simd = unsafe { dot_similarity_neon(v1, v2) }; - let dot = dot_product_non_optimized(v1, v2); + let dot_simd = unsafe { dot_similarity_neon(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); assert_eq!(dot_simd, dot); // let cosine_simd = unsafe { cosine_preprocess_neon(v1.clone()) }; diff --git a/src/spaces/simple_sse.rs b/src/spaces/simple_sse.rs index 6281f3df..c42c4f7b 100644 --- a/src/spaces/simple_sse.rs +++ b/src/spaces/simple_sse.rs @@ -4,7 +4,7 @@ use std::arch::x86::*; use std::arch::x86_64::*; use std::ptr::read_unaligned; -use crate::node::UnalignedF32Slice; +use crate::unaligned_vector::UnalignedVector; #[target_feature(enable = "sse")] unsafe fn hsum128_ps_sse(x: __m128) -> f32 { diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 683ca2b2..9cbe8935 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -641,7 +641,7 @@ fn add_one_item_incrementally_in_an_empty_db() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } "###); @@ -660,7 +660,7 @@ fn add_one_item_incrementally_in_a_one_item_db() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } "###); @@ -695,8 +695,8 @@ fn add_one_item_incrementally_to_create_a_split_node() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) - Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) + Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0, 0.0] }) Tree 0: Descendants(Descendants { descendants: [0, 1] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } "###); @@ -735,17 +735,17 @@ fn add_one_item_incrementally() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) - Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) - Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] }) - Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] }) - Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) - Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) + Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0, 0.0] }) + Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0, 0.0] }) + Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0, 0.0] }) + Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0, 0.0] }) + Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0, 0.0] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0, 0.0] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0, 0.0] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0, 0.0] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); diff --git a/src/writer.rs b/src/writer.rs index 8f48bfb6..8e054e06 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -13,7 +13,7 @@ use roaring::RoaringBitmap; use crate::distance::Distance; use crate::internals::{KeyCodec, Side}; use crate::item_iter::ItemIter; -use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal, UnalignedVector}; +use crate::node::{Descendants, ItemIds, Leaf, SplitPlaneNormal}; use crate::node_id::NodeMode; use crate::parallel::{ ConcurrentNodeIds, ImmutableLeafs, ImmutableSubsetLeafs, ImmutableTrees, TmpNodes, @@ -21,6 +21,7 @@ use crate::parallel::{ }; use crate::reader::item_leaf; use crate::roaring::RoaringBitmapCodec; +use crate::unaligned_vector::UnalignedVector; use crate::{ Database, Error, ItemId, Key, Metadata, MetadataCodec, Node, NodeCodec, NodeId, Prefix, PrefixCodec, Result, @@ -55,7 +56,7 @@ impl Writer { while let Some((item_id, node)) = cursor.next().transpose()? { match node { Node::Leaf(Leaf { header: _, vector }) => { - let vector = D::read_unaligned_vector(&vector); + let vector = vector.to_vec(); let vector = ND::craft_owned_unaligned_vector_from_f32(vector); let new_leaf = Node::Leaf(Leaf { header: ND::new_header(&vector), vector }); unsafe { @@ -87,7 +88,7 @@ impl Writer { /// Returns an `Option`al vector previous stored in this database. pub fn item_vector(&self, rtxn: &RoTxn, item: ItemId) -> Result>> { Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { - let mut vec = D::read_unaligned_vector(&leaf.vector); + let mut vec = leaf.vector.to_vec(); vec.drain(self.dimensions..); vec })) @@ -141,7 +142,7 @@ impl Writer { }); } - let vector = D::craft_unaligned_vector_from_f32(vector); + let vector = UnalignedVector::from_slice(vector); let leaf = Leaf { header: D::new_header(&vector), vector }; self.database.put(wtxn, &Key::item(self.index, item), &Node::Leaf(leaf))?; let mut updated = self @@ -172,7 +173,7 @@ impl Writer { }); } - let vector = D::craft_unaligned_vector_from_f32(vector); + let vector = UnalignedVector::from_slice(vector); let leaf = Leaf { header: D::new_header(&vector), vector }; let key = Key::item(self.index, item); match self.database.put_with_flags(wtxn, PutFlags::APPEND, &key, &Node::Leaf(leaf)) { @@ -557,7 +558,7 @@ impl Writer { let mut left_ids = RoaringBitmap::new(); let mut right_ids = RoaringBitmap::new(); - if normal.iter_f32().all(|d| d == 0.0) { + if normal.iter().all(|d| d == 0.0) { randomly_split_children(rng, to_insert, &mut left_ids, &mut right_ids); } else { for leaf in to_insert { From a27a238bfab7e64f645323de2d2256c8e20870a2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 14:15:20 +0200 Subject: [PATCH 05/61] wip --- src/tests/binary_quantized.rs | 34 ++++++ src/tests/node.rs | 87 ++++++++++++++ src/unaligned_vector/binary_quantized.rs | 64 ++++++++++ src/unaligned_vector/f32.rs | 45 +++++++ src/unaligned_vector/mod.rs | 145 +++++++++++++++++++++++ 5 files changed, 375 insertions(+) create mode 100644 src/tests/binary_quantized.rs create mode 100644 src/tests/node.rs create mode 100644 src/unaligned_vector/binary_quantized.rs create mode 100644 src/unaligned_vector/f32.rs create mode 100644 src/unaligned_vector/mod.rs diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs new file mode 100644 index 00000000..71f61b9e --- /dev/null +++ b/src/tests/binary_quantized.rs @@ -0,0 +1,34 @@ +use crate::{ + distance::BinaryQuantizedEuclidean, + tests::{create_database, rng}, + Writer, +}; + +#[test] +fn write_and_retrieve_binary_quantized_vector() { + let handle = create_database::(); + let mut wtxn = handle.env.write_txn().unwrap(); + let writer = Writer::new(handle.database, 0, 5); + writer.add_item(&mut wtxn, 0, &[-2.0, -1.0, 0.0, 1.0, 2.0]).unwrap(); + let vec = writer.item_vector(&wtxn, 0).unwrap().unwrap(); + insta::assert_debug_snapshot!(vec, @r###" + [ + 0.0, + 0.0, + 1.0, + 1.0, + 1.0, + ] + "###); + + writer.build(&mut wtxn, &mut rng(), Some(1)).unwrap(); + wtxn.commit().unwrap(); + + insta::assert_snapshot!(handle, @r###" + ================== + Dumping index 0 + Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, "0.0, ..."] }) + Tree 0: Descendants(Descendants { descendants: [0] }) + Root: Metadata { dimensions: 5, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } + "###); +} diff --git a/src/tests/node.rs b/src/tests/node.rs new file mode 100644 index 00000000..c2f61637 --- /dev/null +++ b/src/tests/node.rs @@ -0,0 +1,87 @@ +use insta::{assert_debug_snapshot, assert_snapshot}; + +use crate::unaligned_vector::{BinaryQuantized, UnalignedVector}; + +#[test] +fn unaligned_f32_vec() { + let original: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; + let bytes: Vec = original.iter().flat_map(|f| f.to_ne_bytes()).collect(); + + let unaligned_owned_from_f32 = UnalignedVector::::from_vec(original.clone()); + assert_eq!(bytes, unaligned_owned_from_f32.as_bytes()); + + let unchecked_unaligned_owned_from_bytes = UnalignedVector::::from_bytes_unchecked(&bytes); + assert_eq!(bytes, unchecked_unaligned_owned_from_bytes.as_bytes()); + + let unaligned_owned_from_bytes = UnalignedVector::::from_bytes(&bytes).unwrap(); + assert_eq!(bytes, unaligned_owned_from_bytes.as_bytes()); +} + +#[test] +fn unaligned_binary_quantized_smol() { + let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; + + let unaligned = UnalignedVector::::from_slice(&original); + let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); + assert_snapshot!(s, @r###" + 00011010 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + "###); + + let deser: Vec<_> = unaligned.iter().collect(); + assert_debug_snapshot!(deser[0..original.len()], @r###" + [ + 0.0, + 1.0, + 0.0, + 1.0, + 1.0, + ] + "###); +} + +#[test] +fn unaligned_binary_quantized_large() { + let original: Vec = (0..100) + .map(|n| if n % 3 == 0 || n % 5 == 0 { n as f32 * -1.0 } else { n as f32 }) + .collect(); + + // Two numbers should be used + let unaligned = UnalignedVector::::from_slice(&original); + let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); + assert_snapshot!(s, @r###" + 10010110 + 01101001 + 11001011 + 10110100 + 01100101 + 11011010 + 00110010 + 01101101 + 10011001 + 10110110 + 01001100 + 01011011 + 00000110 + 00000000 + 00000000 + 00000000 + "###); + + let deser: Vec<_> = unaligned.to_vec(); + assert_snapshot!(format!("{:?}", &deser[0..original.len()]), +@"[0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]"); + for (orig, deser) in original.iter().zip(&deser) { + if orig.is_sign_positive() { + assert_eq!(deser, &1.0); + } else { + assert_eq!(deser, &0.0); + } + } +} diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs new file mode 100644 index 00000000..b6b5ec2a --- /dev/null +++ b/src/unaligned_vector/binary_quantized.rs @@ -0,0 +1,64 @@ +use std::{ + borrow::Cow, + mem::{size_of, transmute}, +}; + +use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; + +/// The type of the words used to quantize a vector +type QuantizedWord = usize; +/// The size of the words used to quantize a vector +const QUANTIZED_WORD_SIZE: usize = QuantizedWord::BITS as usize; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum BinaryQuantized {} + +impl UnalignedVectorCodec for BinaryQuantized { + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + if bytes.len() % size_of::() == 0 { + // safety: `UnalignedVector` is transparent + Ok(Cow::Borrowed(unsafe { transmute(bytes) })) + } else { + Err(SizeMismatch) + } + } + + fn from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { + let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); + for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { + let mut word: QuantizedWord = 0; + for bit in chunk.iter().rev() { + word <<= 1; + word += bit.is_sign_positive() as QuantizedWord; + } + output.extend_from_slice(&word.to_ne_bytes()); + } + + Cow::Owned(output) + } + + fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector> { + Cow::Owned(Self::from_slice(&vec).into_owned()) + } + + fn iter(vec: &UnalignedVector) -> impl Iterator + '_ { + vec.vector + .chunks_exact(size_of::()) + .map(|bytes| QuantizedWord::from_ne_bytes(bytes.try_into().unwrap())) + .flat_map(|mut word| { + let mut ret = vec![0.0; QUANTIZED_WORD_SIZE]; + for index in 0..QUANTIZED_WORD_SIZE { + let bit = word & 1; + word >>= 1; + if bit == 1 { + ret[index] = 1.0; + } + } + ret + }) + } + + fn len(vec: &UnalignedVector) -> usize { + vec.vector.len() / size_of::() + } +} diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs new file mode 100644 index 00000000..9a16a3bc --- /dev/null +++ b/src/unaligned_vector/f32.rs @@ -0,0 +1,45 @@ +use std::{ + borrow::Cow, + mem::{size_of, transmute}, +}; + +use bytemuck::cast_slice; +use byteorder::{ByteOrder, NativeEndian}; + +use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; + +impl UnalignedVectorCodec for f32 { + /// Creates an unaligned slice of f32 wrapper from a slice of bytes. + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + if bytes.len() % size_of::() == 0 { + // safety: `UnalignedF32Slice` is transparent + Ok(Cow::Borrowed(unsafe { transmute(bytes) })) + } else { + Err(SizeMismatch) + } + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_slice(slice: &[f32]) -> Cow> { + Self::from_bytes(cast_slice(slice)).unwrap() + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector> { + let bytes = vec.into_iter().flat_map(|f| f.to_ne_bytes()).collect(); + Cow::Owned(bytes) + } + + /// Returns an iterator of f32 that are read from the slice. + /// The f32 are copied in memory and are therefore, aligned. + fn iter(vec: &UnalignedVector) -> impl Iterator + '_ { + vec.vector.chunks_exact(size_of::()).map(NativeEndian::read_f32) + } + + /// Return the number of f32 that fits into this slice. + fn len(vec: &UnalignedVector) -> usize { + vec.vector.len() / size_of::() + } +} diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs new file mode 100644 index 00000000..f0a8c080 --- /dev/null +++ b/src/unaligned_vector/mod.rs @@ -0,0 +1,145 @@ +use std::{ + borrow::{Borrow, Cow}, + fmt, + marker::PhantomData, + mem::transmute, +}; + +pub use binary_quantized::BinaryQuantized; + +use bytemuck::pod_collect_to_vec; + +mod binary_quantized; +mod f32; + +pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { + /// Creates an unaligned vector from a slice of bytes. + // Don't allocate. + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch>; + + /// Creates an unaligned vector from a slice of f32. + /// May allocate depending on the codec. + fn from_slice(slice: &[f32]) -> Cow>; + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector>; + + /// Returns an iterator of f32 that are read from the vector. + /// The f32 are copied in memory and are therefore, aligned. + fn iter(vec: &UnalignedVector) -> impl Iterator + '_; + + /// Returns the len of the vector in terms of elements. + fn len(vec: &UnalignedVector) -> usize; +} + +/// A wrapper struct that is used to read unaligned vectors directly from memory. +#[repr(transparent)] +pub struct UnalignedVector { + format: PhantomData Codec>, + vector: [u8], +} + +impl UnalignedVector { + /// Creates an unaligned slice of something. It's up to the caller to ensure + /// it will be used with the same type it was created initially. + pub(crate) fn reset(vector: &mut Cow<'_, UnalignedVector>) { + match vector { + Cow::Borrowed(slice) => *vector = Cow::Owned(vec![0; slice.as_bytes().len()]), + Cow::Owned(bytes) => bytes.fill(0), + } + } + + /// Creates an unaligned vector from a slice of bytes. + // Don't allocate. + pub fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + Codec::from_bytes(bytes) + } + + /// Creates an unaligned vector from a slice of f32. + /// May allocate depending on the codec. + pub fn from_slice(slice: &[f32]) -> Cow> { + Codec::from_slice(slice) + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + pub fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector> { + Codec::from_vec(vec) + } + + /// Returns an iterator of f32 that are read from the vector. + /// The f32 are copied in memory and are therefore, aligned. + pub fn iter(&self) -> impl Iterator + '_ { + Codec::iter(self) + } + + /// Returns an allocated and aligned `Vec`. + pub fn to_vec(&self) -> Vec { + self.iter().collect() + } + + /// Returns the len of the vector in terms of elements. + pub fn len(&self) -> usize { + Codec::len(self) + } + + /// Creates an unaligned slice of something. It's up to the caller to ensure + /// it will be used with the same type it was created initially. + pub(crate) fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + unsafe { transmute(bytes) } + } + + /// Returns the original raw slice of bytes. + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.vector + } + + /// Returns wether it is empty or not. + pub fn is_empty(&self) -> bool { + self.vector.is_empty() + } + /// Returns the raw pointer to the start of this slice. + pub(crate) fn as_ptr(&self) -> *const u8 { + self.vector.as_ptr() + } +} + +#[derive(Debug, thiserror::Error)] +#[error("invalid slice of float dimension")] +pub struct SizeMismatch; + +impl ToOwned for UnalignedVector { + type Owned = Vec; + + fn to_owned(&self) -> Self::Owned { + pod_collect_to_vec(&self.vector) + } +} + +impl Borrow> for Vec { + fn borrow(&self) -> &UnalignedVector { + UnalignedVector::from_bytes_unchecked(self) + } +} + +impl fmt::Debug for UnalignedVector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + + let vec = self.to_vec(); + list.entries(vec.iter().take(10)); + if vec.len() < 10 { + return list.finish(); + } + + // With binary quantization we may be padding with a lot of zeros + if vec[10..].iter().all(|v| *v == 0.0) { + list.entry(&"0.0, ..."); + } else if vec[10..].iter().all(|v| *v == 0.0) { + list.entry(&"other ..."); + } + + list.finish() + } +} From 3634125a7f0a67d623c7af6a8386142a3ac31a93 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 15:50:31 +0200 Subject: [PATCH 06/61] fix the formatter and add a few comments --- src/tests/binary_quantized.rs | 2 +- ...write_and_update_lot_of_random_points.snap | 289 +++++++++--------- src/tests/writer.rs | 26 +- src/unaligned_vector/binary_quantized.rs | 5 +- src/unaligned_vector/f32.rs | 5 +- src/unaligned_vector/mod.rs | 30 +- 6 files changed, 188 insertions(+), 169 deletions(-) diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index 71f61b9e..109ca28f 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -27,7 +27,7 @@ fn write_and_retrieve_binary_quantized_vector() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, "0.0, ..."] }) + Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, "0.0, ..."] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 5, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } "###); diff --git a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap index 226dd88f..f7b0ddec 100644 --- a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap +++ b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap @@ -4,203 +4,202 @@ expression: handle --- ================== Dumping index 0 -Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8013, 0.2371, 0.6955, 0.8606, 0.5280, 0.2667, 0.6057, 0.9830, 0.9430, 0.0479, 0.2927, 0.4487, 0.8798, 0.0246, 0.3401, 0.7418, 0.0595, 0.0652, 0.7750, 0.7425, 0.4517, 0.2155, 0.1841, 0.3845, 0.9954, 0.1512, 0.4894, 0.9152, 0.7847, 0.9849] }) -Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4199, 0.2620, 0.2655, 0.8414, 0.0192, 0.3828, 0.2561, 0.2692, 0.0368, 0.4624, 0.8739, 0.5080, 0.4557, 0.8887, 0.0685, 0.6341, 0.1904, 0.2201, 0.6947, 0.8503, 0.4607, 0.6554, 0.2088, 0.0586, 0.6857, 0.9343, 0.4788, 0.9246, 0.7944, 0.0473] }) -Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1040, 0.9647, 0.8238, 0.5344, 0.4903, 0.4420, 0.7937, 0.4028, 0.2083, 0.3315, 0.6684, 0.2849, 0.8002, 0.9037, 0.3644, 0.1173, 0.7807, 0.5366, 0.1554, 0.9343, 0.6308, 0.3727, 0.0552, 0.0634, 0.3584, 0.4273, 0.9748, 0.8893, 0.6025, 0.9890] }) -Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0001, 0.7870, 0.9674, 0.4568, 0.5250, 0.2701, 0.2417, 0.8742, 0.6869, 0.8759, 0.8323, 0.5742, 0.3201, 0.7171, 0.0057, 0.6038, 0.0168, 0.5729, 0.4393, 0.7574, 0.5881, 0.2978, 0.0910, 0.8530, 0.5171, 0.8481, 0.2869, 0.5211, 0.4583, 0.3767] }) -Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8616, 0.3239, 0.5030, 0.3022, 0.8817, 0.3255, 0.9170, 0.9928, 0.3439, 0.0860, 0.2202, 0.6240, 0.6195, 0.3361, 0.7113, 0.8040, 0.7845, 0.4275, 0.3626, 0.1384, 0.7909, 0.7083, 0.1976, 0.1506, 0.6972, 0.5359, 0.8010, 0.3805, 0.8072, 0.6701] }) -Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.7778, 0.8818, 0.3427, 0.1293, 0.7240, 0.4773, 0.2871, 0.3988, 0.4014, 0.2562, 0.7026, 0.0131, 0.3608, 0.1922, 0.1645, 0.1968, 0.7862, 0.9030, 0.2735, 0.8840, 0.6386, 0.5482, 0.3175, 0.8519, 0.5081, 0.5617, 0.5580, 0.3213, 0.4471] }) -Item 6: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8761, 0.9344, 0.8748, 0.9176, 0.4249, 0.1069, 0.0392, 0.1024, 0.9981, 0.7313, 0.3394, 0.6192, 0.4707, 0.3849, 0.7316, 0.5298, 0.8744, 0.1086, 0.0631, 0.6646, 0.4413, 0.5919, 0.0467, 0.5988, 0.8101, 0.4828, 0.8831, 0.1383, 0.1076, 0.0762] }) -Item 7: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5699, 0.5919, 0.7788, 0.3705, 0.3807, 0.1708, 0.3678, 0.5629, 0.0612, 0.4826, 0.0217, 0.7778, 0.2831, 0.3242, 0.5338, 0.6375, 0.1274, 0.4257, 0.2305, 0.7650, 0.4528, 0.2800, 0.0278, 0.0397, 0.6639, 0.2003, 0.5412, 0.3166, 0.5944, 0.9598] }) -Item 8: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9520, 0.3982, 0.4226, 0.7733, 0.5495, 0.2244, 0.7816, 0.2531, 0.6199, 0.1072, 0.4570, 0.1867, 0.5403, 0.2813, 0.1820, 0.6583, 0.8015, 0.6706, 0.3091, 0.4282, 0.6033, 0.9524, 0.4419, 0.9046, 0.2348, 0.0132, 0.2073, 0.9664, 0.0525, 0.8284] }) -Item 9: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8134, 0.9647, 0.6640, 0.4464, 0.7439, 0.6904, 0.3159, 0.7607, 0.3483, 0.9963, 0.2933, 0.9251, 0.1057, 0.1036, 0.1371, 0.4079, 0.0195, 0.6610, 0.8298, 0.9571, 0.5529, 0.4484, 0.5659, 0.0259, 0.9343, 0.5204, 0.4442, 0.9591, 0.9487, 0.7276] }) -Item 10: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0050, 0.0899, 0.0005, 0.4564, 0.8086, 0.0683, 0.5358, 0.2569, 0.1496, 0.6153, 0.1664, 0.6660, 0.7205, 0.4417, 0.0528, 0.3227, 0.8926, 0.1879, 0.5557, 0.9117, 0.9192, 0.8943, 0.5542, 0.1462, 0.6786, 0.9411, 0.7647, 0.7636, 0.8593, 0.1476] }) -Item 11: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6560, 0.2343, 0.4079, 0.5972, 0.5766, 0.6739, 0.2985, 0.6167, 0.4834, 0.2818, 0.3920, 0.2409, 0.9702, 0.0911, 0.2453, 0.6558, 0.9055, 0.7360, 0.3131, 0.7204, 0.0299, 0.7458, 0.0238, 0.1737, 0.7046, 0.6704, 0.8351, 0.2749, 0.9873, 0.7914] }) -Item 12: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2447, 0.1475, 0.7740, 0.4535, 0.7574, 0.0332, 0.5334, 0.6643, 0.1125, 0.4113, 0.6932, 0.4522, 0.7108, 0.8101, 0.4659, 0.3048, 0.7430, 0.8442, 0.9418, 0.2419, 0.7590, 0.0127, 0.6989, 0.7743, 0.4338, 0.2675, 0.9357, 0.9794, 0.0114, 0.8744] }) -Item 13: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1726, 0.7399, 0.0271, 0.8721, 0.8725, 0.4023, 0.1558, 0.1044, 0.2096, 0.2081, 0.1872, 0.5438, 0.1299, 0.3760, 0.7304, 0.5688, 0.6662, 0.0684, 0.8029, 0.4273, 0.5500, 0.9148, 0.7332, 0.9506, 0.5606, 0.8472, 0.4429, 0.7944, 0.9688, 0.3389] }) -Item 14: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4818, 0.7828, 0.7237, 0.2860, 0.0901, 0.4310, 0.0837, 0.7802, 0.8863, 0.1804, 0.3504, 0.7707, 0.7848, 0.5609, 0.0585, 0.3819, 0.9120, 0.8155, 0.8139, 0.9035, 0.2916, 0.6647, 0.1535, 0.2028, 0.9424, 0.2528, 0.1006, 0.4131, 0.6779, 0.4195] }) -Item 15: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6031, 0.2970, 0.9485, 0.0272, 0.4336, 0.1339, 0.2209, 0.8350, 0.2566, 0.9481, 0.3899, 0.9689, 0.2817, 0.1451, 0.1504, 0.0124, 0.1305, 0.9029, 0.4726, 0.2814, 0.2904, 0.8287, 0.2799, 0.9755, 0.4346, 0.8260, 0.5369, 0.8875, 0.3421, 0.7837] }) -Item 16: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4348, 0.7310, 0.5390, 0.4001, 0.3141, 0.8369, 0.4671, 0.1519, 0.5237, 0.1241, 0.7721, 0.0097, 0.3834, 0.4036, 0.0274, 0.1508, 0.1198, 0.0151, 0.9768, 0.0396, 0.1998, 0.5217, 0.5453, 0.6266, 0.2753, 0.5716, 0.7083, 0.0762, 0.2390, 0.0972] }) -Item 17: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0819, 0.5362, 0.8215, 0.2263, 0.3109, 0.2738, 0.6133, 0.5147, 0.9334, 0.9877, 0.5054, 0.8811, 0.4353, 0.3735, 0.7596, 0.8341, 0.5202, 0.8083, 0.0235, 0.4160, 0.1656, 0.6664, 0.2896, 0.5461, 0.7987, 0.2628, 0.7858, 0.5026, 0.4464, 0.4663] }) -Item 18: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5407, 0.5762, 0.7453, 0.1027, 0.5595, 0.5039, 0.8272, 0.9763, 0.9918, 0.1477, 0.3887, 0.8795, 0.8468, 0.6697, 0.7647, 0.7957, 0.1173, 0.9591, 0.4926, 0.3932, 0.5712, 0.5981, 0.3877, 0.3695, 0.2631, 0.7901, 0.6963, 0.9067, 0.3417, 0.5209] }) -Item 19: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1390, 0.7531, 0.3249, 0.8754, 0.9984, 0.7362, 0.0281, 0.2016, 0.9443, 0.1989, 0.1268, 0.7461, 0.2756, 0.8266, 0.0999, 0.5222, 0.6392, 0.9377, 0.9354, 0.7583, 0.1164, 0.8645, 0.8242, 0.1620, 0.2974, 0.3209, 0.9473, 0.4661, 0.5575, 0.2633] }) -Item 20: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9335, 0.4981, 0.2509, 0.4837, 0.1620, 0.1082, 0.0741, 0.0183, 0.5846, 0.5706, 0.4327, 0.9265, 0.9559, 0.0970, 0.7162, 0.1286, 0.1047, 0.7652, 0.5422, 0.4261, 0.2670, 0.8095, 0.6462, 0.1504, 0.0946, 0.8216, 0.1483, 0.2682, 0.9818, 0.2397] }) -Item 21: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6252, 0.9240, 0.7227, 0.6810, 0.2563, 0.3321, 0.2283, 0.9226, 0.8494, 0.0356, 0.9350, 0.3206, 0.0841, 0.3868, 0.5729, 0.1906, 0.8904, 0.1226, 0.3204, 0.3544, 0.6959, 0.0374, 0.7859, 0.8741, 0.1515, 0.8625, 0.3210, 0.2074, 0.0444, 0.0310] }) -Item 22: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9798, 0.5036, 0.8106, 0.3013, 0.1513, 0.2881, 0.4811, 0.6230, 0.1926, 0.0604, 0.9986, 0.4975, 0.2812, 0.3033, 0.9867, 0.4158, 0.3100, 0.9800, 0.9948, 0.3650, 0.5287, 0.8765, 0.9845, 0.9363, 0.8329, 0.6553, 0.4192, 0.1622, 0.5437, 0.3298] }) -Item 23: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9915, 0.6886, 0.1976, 0.1725, 0.6776, 0.1356, 0.3842, 0.4424, 0.6939, 0.8016, 0.5727, 0.7027, 0.1770, 0.7853, 0.6088, 0.0002, 0.2742, 0.6954, 0.8246, 0.8889, 0.8154, 0.2628, 0.3025, 0.8792, 0.3541, 0.2896, 0.9661, 0.0181, 0.2668, 0.6278] }) -Item 24: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2392, 0.8345, 0.1999, 0.5550, 0.2999, 0.8526, 0.2634, 0.0980, 0.1580, 0.5939, 0.7135, 0.3773, 0.5572, 0.0797, 0.8508, 0.1269, 0.1327, 0.7427, 0.7931, 0.2178, 0.3421, 0.8584, 0.6015, 0.7949, 0.1152, 0.9695, 0.9902, 0.6558, 0.8369, 0.7019] }) -Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0116, 0.5323, 0.3940, 0.1382, 0.1987, 0.7287, 0.4026, 0.1442, 0.5957, 0.0340, 0.5421, 0.8017, 0.8851, 0.5251, 0.2615, 0.4368, 0.9452, 0.0516, 0.7547, 0.5079, 0.7231, 0.4768, 0.3608, 0.3594, 0.9920, 0.5132, 0.9998, 0.8522, 0.5859, 0.5981] }) -Item 26: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3722, 0.7760, 0.6038, 0.9467, 0.4463, 0.6653, 0.5265, 0.1535, 0.8139, 0.9773, 0.2104, 0.7780, 0.4305, 0.6290, 0.1186, 0.3106, 0.5666, 0.4629, 0.0102, 0.4231, 0.3733, 0.3488, 0.1761, 0.3340, 0.8173, 0.7430, 0.1783, 0.8278, 0.4972, 0.0260] }) -Item 27: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3569, 0.9070, 0.8412, 0.3201, 0.6998, 0.8674, 0.5494, 0.2322, 0.2969, 0.9787, 0.2782, 0.1101, 0.0628, 0.9354, 0.1072, 0.6459, 0.4551, 0.4685, 0.0322, 0.5495, 0.2973, 0.7192, 0.0850, 0.8548, 0.7578, 0.2394, 0.7491, 0.5172, 0.6877, 0.2777] }) -Item 28: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8309, 0.4235, 0.2111, 0.9452, 0.9005, 0.0275, 0.1471, 0.2638, 0.2873, 0.2079, 0.1183, 0.3621, 0.5452, 0.9270, 0.3800, 0.9089, 0.7835, 0.0238, 0.2991, 0.4794, 0.2441, 0.7294, 0.0166, 0.9878, 0.1538, 0.1817, 0.7583, 0.2159, 0.5874, 0.6450] }) -Item 29: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6021, 0.0690, 0.1080, 0.2535, 0.6817, 0.7899, 0.3297, 0.8656, 0.1566, 0.3560, 0.0137, 0.0839, 0.7669, 0.8890, 0.1798, 0.3938, 0.1183, 0.2023, 0.1738, 0.5623, 0.8859, 0.2806, 0.1558, 0.2857, 0.9514, 0.8324, 0.4661, 0.6666, 0.4993, 0.5062] }) -Item 30: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5015, 0.9497, 0.6473, 0.1317, 0.4497, 0.9958, 0.1906, 0.0105, 0.2717, 0.2399, 0.2287, 0.0693, 0.9920, 0.0844, 0.0187, 0.9390, 0.7147, 0.7885, 0.3896, 0.1995, 0.7610, 0.4565, 0.9594, 0.4016, 0.4674, 0.5858, 0.4572, 0.9347, 0.7984, 0.0231] }) -Item 31: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7584, 0.3448, 0.7315, 0.5405, 0.6797, 0.3253, 0.8611, 0.0846, 0.0174, 0.0882, 0.5256, 0.9214, 0.1212, 0.8043, 0.2995, 0.8307, 0.4346, 0.9285, 0.9904, 0.9735, 0.4889, 0.0600, 0.7663, 0.0742, 0.9123, 0.6835, 0.4067, 0.1188, 0.7512, 0.4707] }) -Item 32: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3037, 0.1721, 0.4483, 0.7763, 0.2161, 0.2408, 0.7451, 0.7175, 0.6738, 0.0753, 0.3334, 0.9976, 0.7975, 0.2614, 0.9354, 0.3370, 0.6843, 0.7947, 0.3408, 0.2448, 0.2690, 0.7625, 0.6667, 0.3800, 0.9235, 0.9035, 0.0226, 0.4824, 0.7046, 0.2842] }) -Item 33: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3494, 0.5787, 0.8027, 0.9625, 0.5944, 0.6781, 0.4204, 0.5899, 0.0209, 0.9001, 0.1186, 0.5955, 0.7368, 0.2369, 0.7933, 0.2004, 0.7130, 0.4379, 0.5571, 0.4914, 0.2432, 0.2769, 0.7139, 0.3401, 0.8933, 0.5249, 0.9387, 0.5985, 0.6670, 0.3357] }) -Item 34: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8829, 0.2487, 0.4700, 0.7563, 0.1986, 0.8682, 0.1629, 0.1449, 0.2589, 0.0392, 0.2071, 0.5999, 0.7669, 0.2180, 0.7131, 0.0241, 0.6476, 0.8051, 0.5594, 0.4492, 0.4122, 0.0527, 0.3617, 0.3289, 0.9076, 0.9373, 0.6428, 0.9747, 0.0441, 0.6113] }) -Item 35: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3281, 0.1178, 0.0533, 0.4172, 0.3990, 0.0395, 0.8533, 0.1435, 0.9799, 0.4063, 0.4503, 0.5466, 0.5694, 0.0182, 0.0664, 0.3508, 0.3090, 0.3746, 0.9400, 0.6872, 0.7995, 0.4485, 0.9186, 0.1649, 0.5424, 0.5505, 0.5592, 0.7658, 0.9042, 0.6767] }) -Item 36: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8020, 0.7339, 0.9494, 0.9035, 0.2586, 0.5700, 0.6807, 0.1325, 0.1968, 0.5123, 0.0027, 0.2356, 0.8374, 0.9083, 0.7504, 0.2301, 0.9856, 0.4399, 0.2213, 0.8110, 0.6575, 0.9430, 0.9701, 0.3977, 0.7544, 0.9734, 0.4183, 0.0417, 0.8305, 0.7916] }) -Item 37: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6603, 0.3675, 0.1019, 0.2193, 0.3180, 0.0591, 0.9934, 0.8583, 0.7473, 0.3644, 0.7934, 0.7521, 0.2084, 0.5456, 0.2541, 0.8014, 0.8205, 0.1972, 0.2444, 0.4832, 0.7058, 0.8444, 0.4029, 0.8783, 0.4841, 0.3386, 0.0945, 0.1890, 0.4473, 0.1546] }) -Item 38: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0262, 0.7409, 0.1552, 0.9316, 0.1472, 0.6711, 0.8818, 0.6288, 0.1830, 0.8944, 0.0148, 0.8748, 0.4876, 0.6091, 0.4447, 0.2800, 0.0979, 0.7653, 0.6259, 0.3943, 0.1646, 0.0771, 0.5043, 0.2619, 0.5546, 0.7695, 0.3207, 0.0640, 0.2817, 0.3900] }) -Item 39: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4498, 0.4107, 0.9992, 0.3928, 0.4779, 0.4661, 0.1282, 0.8140, 0.7490, 0.5641, 0.2624, 0.5265, 0.6044, 0.9400, 0.3332, 0.9998, 0.7337, 0.6396, 0.9172, 0.7666, 0.9220, 0.7287, 0.8280, 0.0537, 0.1497, 0.9594, 0.1406, 0.4087, 0.7885, 0.8048] }) -Item 40: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0779, 0.2777, 0.8682, 0.3638, 0.1313, 0.7436, 0.0733, 0.0458, 0.8121, 0.2161, 0.2002, 0.4553, 0.1921, 0.7905, 0.2677, 0.5215, 0.6748, 0.0273, 0.7163, 0.3988, 0.1404, 0.5427, 0.2634, 0.7257, 0.7462, 0.2062, 0.6791, 0.7941, 0.0781, 0.7834] }) -Item 41: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7821, 0.0623, 0.1168, 0.3609, 0.6040, 0.8336, 0.0911, 0.2181, 0.7485, 0.0281, 0.8549, 0.9804, 0.1108, 0.9639, 0.4405, 0.9132, 0.7031, 0.3452, 0.5902, 0.0231, 0.6480, 0.1488, 0.9591, 0.2111, 0.4024, 0.9373, 0.5868, 0.4497, 0.0518, 0.1472] }) -Item 42: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2568, 0.4789, 0.6587, 0.7855, 0.9144, 0.3430, 0.0823, 0.2256, 0.8146, 0.2751, 0.4924, 0.4183, 0.2933, 0.7582, 0.5340, 0.8217, 0.0018, 0.3841, 0.6997, 0.9684, 0.1435, 0.8392, 0.6970, 0.0385, 0.9371, 0.4834, 0.6669, 0.8241, 0.5233, 0.4539] }) -Item 43: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8608, 0.8689, 0.4918, 0.2239, 0.9291, 0.0622, 0.6843, 0.4184, 0.4703, 0.3202, 0.7687, 0.8275, 0.9704, 0.7889, 0.7334, 0.1453, 0.3702, 0.5082, 0.8797, 0.2501, 0.9118, 0.1180, 0.7303, 0.8331, 0.1301, 0.8686, 0.6935, 0.0703, 0.2890, 0.0618] }) -Item 44: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6144, 0.5794, 0.3617, 0.7858, 0.8199, 0.8128, 0.0766, 0.6796, 0.2179, 0.3666, 0.6606, 0.2883, 0.7257, 0.3833, 0.0447, 0.0349, 0.6694, 0.0713, 0.6720, 0.1282, 0.8548, 0.5132, 0.5607, 0.0019, 0.6611, 0.3308, 0.5003, 0.8641, 0.2419, 0.6414] }) -Item 45: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9838, 0.4742, 0.5307, 0.6989, 0.9323, 0.2140, 0.1371, 0.1113, 0.0322, 0.3001, 0.1340, 0.0790, 0.2782, 0.9782, 0.5140, 0.3046, 0.1680, 0.8445, 0.9321, 0.7381, 0.3928, 0.6103, 0.6585, 0.9092, 0.4978, 0.6749, 0.5345, 0.3792, 0.8163, 0.1454] }) -Item 46: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9427, 0.5559, 0.3745, 0.7126, 0.6987, 0.9411, 0.1747, 0.2418, 0.5506, 0.2967, 0.8277, 0.0310, 0.2703, 0.2709, 0.4543, 0.5251, 0.3257, 0.6729, 0.1525, 0.4311, 0.5048, 0.2075, 0.8545, 0.4771, 0.2648, 0.9991, 0.8024, 0.9899, 0.9572, 0.8654] }) -Item 47: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4506, 0.8658, 0.1164, 0.2339, 0.2266, 0.9050, 0.5849, 0.9792, 0.5951, 0.7706, 0.7796, 0.1371, 0.4991, 0.7207, 0.9812, 0.4095, 0.2353, 0.3570, 0.2602, 0.7291, 0.5657, 0.2764, 0.5556, 0.4127, 0.1003, 0.8519, 0.6812, 0.6548, 0.0776, 0.4340] }) -Item 48: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5565, 0.1465, 0.0304, 0.5234, 0.3295, 0.7092, 0.4073, 0.1632, 0.1200, 0.2177, 0.0689, 0.6135, 0.3846, 0.4304, 0.0500, 0.2129, 0.2971, 0.1506, 0.9715, 0.9342, 0.2374, 0.3957, 0.0690, 0.8184, 0.9631, 0.3977, 0.3957, 0.2023, 0.0423, 0.2887] }) -Item 49: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1322, 0.0670, 0.3404, 0.5339, 0.0229, 0.5964, 0.5497, 0.3819, 0.6553, 0.7129, 0.9637, 0.7032, 0.9449, 0.2401, 0.9760, 0.5352, 0.2430, 0.3744, 0.8652, 0.0512, 0.1682, 0.1739, 0.4144, 0.1880, 0.5934, 0.5135, 0.7059, 0.9658, 0.2671, 0.3260] }) -Item 50: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3692, 0.5773, 0.3161, 0.6517, 0.8682, 0.3599, 0.3781, 0.9383, 0.2134, 0.4018, 0.1500, 0.1781, 0.4345, 0.4835, 0.9135, 0.4374, 0.2108, 0.0648, 0.8693, 0.9976, 0.9400, 0.5552, 0.6205, 0.6198, 0.8230, 0.6537, 0.2967, 0.6439, 0.4720, 0.0310] }) -Item 51: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3643, 0.3282, 0.2580, 0.4477, 0.5920, 0.3581, 0.3502, 0.2441, 0.1707, 0.1243, 0.0999, 0.5953, 0.2748, 0.6587, 0.7290, 0.5587, 0.7189, 0.3957, 0.6531, 0.1442, 0.7119, 0.2412, 0.8803, 0.4895, 0.3027, 0.4314, 0.0367, 0.8662, 0.6177, 0.3332] }) -Item 52: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8903, 0.8028, 0.1029, 0.2941, 0.2599, 0.0219, 0.1355, 0.5165, 0.8002, 0.3078, 0.3179, 0.8688, 0.0647, 0.0900, 0.5074, 0.9400, 0.7376, 0.8803, 0.5407, 0.2104, 0.1794, 0.5748, 0.7948, 0.2866, 0.4960, 0.2474, 0.2759, 0.9624, 0.7611, 0.6257] }) -Item 53: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5928, 0.6267, 0.1717, 0.1604, 0.6506, 0.0302, 0.8289, 0.5930, 0.9304, 0.6067, 0.5622, 0.9136, 0.8093, 0.3523, 0.6691, 0.5854, 0.3384, 0.7957, 0.4830, 0.0315, 0.7294, 0.3134, 0.4225, 0.1835, 0.5813, 0.1435, 0.7924, 0.1626, 0.4706, 0.8306] }) -Item 54: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2889, 0.9493, 0.8938, 0.7127, 0.4381, 0.0183, 0.8352, 0.3095, 0.4705, 0.2917, 0.7906, 0.2911, 0.3932, 0.1622, 0.3431, 0.6335, 0.4265, 0.0735, 0.6467, 0.2161, 0.4258, 0.6989, 0.3564, 0.8927, 0.1347, 0.8975, 0.7207, 0.2613, 0.6087, 0.2291] }) -Item 55: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6095, 0.6335, 0.4740, 0.0746, 0.3871, 0.1016, 0.6414, 0.3076, 0.5484, 0.7602, 0.6928, 0.6693, 0.5314, 0.5478, 0.8312, 0.5319, 0.0348, 0.1317, 0.2634, 0.5546, 0.5113, 0.8326, 0.7357, 0.3584, 0.4124, 0.0272, 0.6879, 0.2599, 0.7837, 0.4725] }) -Item 56: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5139, 0.5344, 0.7468, 0.8617, 0.5994, 0.9846, 0.2361, 0.3984, 0.5078, 0.0666, 0.0221, 0.7019, 0.8516, 0.4629, 0.8699, 0.8525, 0.6960, 0.6640, 0.8220, 0.3141, 0.8946, 0.5284, 0.8459, 0.9466, 0.3926, 0.4402, 0.9754, 0.4765, 0.4905, 0.7996] }) -Item 57: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1524, 0.3441, 0.2368, 0.4350, 0.5328, 0.3005, 0.7021, 0.3614, 0.6369, 0.7984, 0.1099, 0.2671, 0.9108, 0.9017, 0.3862, 0.5157, 0.4510, 0.5185, 0.4465, 0.2198, 0.5385, 0.3734, 0.2170, 0.7698, 0.4575, 0.7720, 0.0857, 0.3963, 0.8207, 0.6391] }) -Item 58: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6107, 0.3400, 0.5636, 0.3807, 0.7145, 0.4766, 0.5889, 0.5437, 0.2690, 0.7633, 0.3959, 0.5819, 0.7150, 0.0753, 0.9928, 0.1159, 0.1633, 0.5916, 0.9617, 0.3563, 0.6703, 0.1401, 0.5655, 0.6356, 0.5906, 0.4854, 0.1440, 0.0011, 0.1217, 0.4162] }) -Item 59: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6227, 0.1202, 0.9715, 0.3936, 0.3423, 0.7479, 0.6526, 0.1867, 0.5568, 0.0922, 0.4923, 0.6243, 0.1646, 0.4228, 0.6323, 0.3442, 0.2133, 0.7990, 0.4992, 0.0482, 0.6260, 0.0885, 0.9299, 0.4293, 0.7760, 0.7470, 0.9417, 0.0020, 0.4020, 0.9238] }) -Item 60: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1100, 0.1244, 0.0460, 0.6082, 0.1978, 0.9696, 0.9862, 0.1179, 0.9388, 0.2901, 0.6424, 0.7064, 0.0247, 0.5278, 0.4262, 0.0971, 0.7494, 0.8127, 0.5567, 0.0963, 0.4559, 0.7393, 0.6480, 0.6909, 0.7549, 0.0439, 0.9496, 0.1551, 0.6588, 0.4218] }) -Item 61: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1347, 0.7000, 0.0530, 0.4959, 0.8227, 0.9831, 0.5433, 0.5201, 0.7924, 0.3847, 0.4618, 0.7762, 0.3668, 0.3105, 0.9197, 0.1617, 0.8675, 0.7686, 0.5316, 0.6798, 0.9551, 0.6542, 0.3051, 0.9479, 0.5483, 0.9541, 0.1520, 0.7273, 0.3857, 0.7027] }) -Item 62: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1364, 0.8633, 0.9297, 0.6135, 0.2317, 0.4359, 0.2391, 0.3977, 0.5448, 0.5308, 0.6295, 0.7363, 0.8984, 0.5580, 0.1391, 0.8332, 0.5848, 0.6468, 0.0576, 0.8127, 0.7673, 0.7922, 0.3125, 0.2874, 0.8588, 0.7406, 0.6889, 0.3174, 0.7565, 0.0756] }) -Item 63: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9221, 0.3578, 0.3207, 0.9945, 0.9288, 0.4608, 0.3001, 0.0296, 0.4678, 0.7422, 0.1848, 0.2948, 0.9534, 0.1223, 0.3098, 0.1023, 0.2691, 0.5410, 0.6640, 0.7102, 0.6446, 0.8912, 0.2151, 0.8837, 0.4653, 0.7481, 0.9103, 0.1118, 0.4085, 0.3791] }) -Item 64: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4806, 0.3718, 0.0560, 0.5007, 0.9558, 0.4234, 0.8126, 0.4764, 0.4901, 0.6482, 0.4207, 0.2268, 0.3556, 0.6862, 0.9233, 0.9845, 0.6235, 0.6472, 0.6851, 0.4445, 0.4305, 0.5253, 0.9606, 0.4910, 0.0767, 0.1004, 0.8427, 0.8943, 0.1777, 0.3763] }) -Item 65: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5677, 0.1247, 0.4928, 0.4097, 0.8433, 0.9238, 0.7848, 0.4437, 0.4696, 0.9886, 0.1417, 0.4364, 0.4471, 0.6284, 0.2548, 0.7256, 0.0324, 0.6394, 0.6730, 0.4224, 0.6438, 0.4449, 0.2229, 0.5860, 0.9926, 0.9454, 0.8358, 0.2026, 0.9971, 0.0270] }) -Item 66: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6083, 0.8526, 0.7679, 0.5156, 0.8480, 0.2792, 0.3200, 0.0453, 0.2084, 0.4759, 0.9403, 0.0743, 0.5665, 0.2733, 0.1400, 0.8295, 0.5458, 0.8038, 0.1603, 0.4128, 0.8930, 0.4420, 0.6424, 0.2340, 0.0255, 0.5873, 0.5887, 0.4600, 0.8987, 0.3656] }) -Item 67: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5299, 0.5421, 0.8194, 0.1018, 0.5426, 0.9350, 0.3228, 0.7979, 0.7473, 0.1118, 0.2515, 0.0225, 0.1534, 0.4736, 0.5641, 0.1919, 0.5379, 0.6815, 0.8908, 0.6717, 0.8996, 0.8896, 0.0856, 0.5125, 0.2434, 0.0755, 0.5839, 0.1712, 0.9467, 0.6559] }) -Item 68: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2676, 0.9678, 0.0177, 0.3344, 0.6398, 0.0680, 0.6478, 0.0822, 0.8348, 0.7480, 0.5276, 0.5062, 0.3791, 0.8248, 0.7260, 0.1881, 0.9817, 0.8896, 0.4107, 0.6907, 0.1315, 0.3241, 0.5918, 0.8271, 0.0586, 0.7400, 0.0069, 0.3202, 0.4187, 0.8247] }) -Item 69: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6163, 0.7762, 0.4365, 0.6713, 0.5647, 0.3449, 0.6615, 0.9430, 0.5941, 0.3563, 0.8698, 0.7265, 0.7299, 0.6801, 0.1361, 0.3680, 0.3622, 0.9737, 0.9003, 0.5171, 0.3981, 0.6434, 0.2324, 0.3246, 0.5110, 0.6495, 0.7496, 0.4635, 0.8431, 0.4580] }) -Item 70: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2508, 0.2228, 0.0580, 0.2005, 0.3293, 0.6717, 0.7376, 0.4489, 0.1877, 0.7376, 0.3991, 0.7369, 0.4428, 0.7775, 0.3183, 0.7320, 0.7348, 0.3190, 0.6882, 0.8851, 0.6177, 0.1295, 0.5318, 0.2348, 0.8751, 0.4623, 0.3082, 0.2943, 0.9015, 0.4626] }) -Item 71: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6478, 0.9371, 0.2378, 0.4279, 0.1400, 0.2146, 0.3193, 0.7330, 0.7932, 0.7295, 0.9781, 0.8327, 0.1761, 0.1474, 0.8589, 0.0800, 0.5614, 0.4009, 0.9556, 0.7030, 0.7711, 0.5020, 0.7911, 0.6455, 0.6838, 0.0809, 0.9930, 0.7730, 0.2199, 0.3138] }) -Item 72: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3637, 0.8624, 0.6657, 0.4426, 0.9055, 0.7527, 0.3679, 0.4067, 0.8130, 0.2441, 0.3602, 0.2777, 0.2114, 0.5577, 0.5203, 0.3862, 0.5795, 0.7360, 0.4321, 0.6384, 0.5414, 0.2486, 0.7852, 0.3590, 0.1454, 0.6277, 0.6258, 0.8744, 0.8989, 0.5967] }) -Item 73: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4640, 0.7218, 0.0154, 0.0829, 0.4829, 0.5139, 0.4344, 0.5872, 0.2770, 0.3745, 0.8848, 0.3805, 0.3193, 0.6432, 0.2470, 0.6752, 0.1428, 0.8068, 0.2241, 0.2959, 0.8348, 0.8973, 0.1213, 0.7855, 0.3006, 0.1461, 0.5417, 0.2111, 0.8150, 0.1504] }) -Item 74: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5250, 0.1137, 0.4081, 0.2765, 0.1755, 0.3861, 0.7862, 0.8678, 0.3681, 0.1279, 0.3726, 0.6535, 0.8735, 0.4611, 0.3582, 0.0981, 0.6012, 0.8937, 0.1000, 0.8548, 0.9741, 0.9287, 0.5661, 0.1689, 0.9944, 0.1641, 0.8835, 0.6598, 0.2704, 0.4093] }) -Item 75: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3639, 0.3210, 0.3718, 0.7818, 0.6264, 0.2524, 0.6018, 0.4059, 0.9744, 0.3568, 0.4892, 0.7652, 0.4566, 0.0884, 0.4999, 0.1489, 0.3169, 0.9850, 0.7573, 0.3145, 0.0845, 0.5885, 0.2073, 0.0937, 0.4518, 0.0957, 0.1403, 0.6862, 0.4749, 0.3771] }) -Item 76: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5349, 0.5286, 0.2229, 0.8101, 0.6677, 0.9457, 0.9903, 0.6100, 0.3207, 0.0250, 0.1856, 0.8487, 0.8339, 0.3332, 0.8071, 0.3050, 0.0553, 0.2480, 0.8978, 0.4265, 0.6479, 0.1854, 0.2101, 0.0461, 0.0991, 0.9447, 0.4235, 0.6987, 0.3664, 0.0319] }) -Item 77: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6003, 0.7202, 0.3803, 0.5052, 0.4006, 0.6708, 0.0438, 0.8432, 0.8772, 0.6849, 0.4162, 0.6977, 0.3395, 0.6632, 0.0256, 0.3843, 0.0968, 0.0266, 0.0174, 0.0328, 0.1925, 0.2962, 0.7774, 0.8711, 0.5001, 0.5312, 0.5863, 0.1766, 0.7026, 0.8613] }) -Item 78: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7850, 0.8396, 0.1563, 0.0331, 0.3667, 0.6701, 0.5455, 0.2239, 0.8733, 0.0339, 0.4671, 0.0209, 0.0339, 0.2941, 0.5505, 0.7765, 0.0752, 0.9327, 0.6638, 0.4238, 0.7932, 0.0898, 0.3300, 0.9143, 0.0324, 0.8421, 0.8228, 0.5095, 0.1214, 0.5619] }) -Item 79: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8093, 0.7442, 0.3738, 0.9164, 0.5923, 0.7353, 0.5379, 0.6815, 0.5925, 0.7954, 0.9246, 0.9350, 0.7121, 0.9404, 0.7657, 0.6102, 0.3958, 0.8172, 0.1868, 0.5326, 0.0352, 0.5308, 0.4925, 0.8650, 0.7519, 0.6908, 0.0251, 0.4628, 0.2902, 0.3531] }) -Item 80: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5135, 0.8836, 0.7895, 0.4037, 0.6198, 0.1685, 0.2186, 0.6238, 0.3219, 0.6977, 0.4806, 0.2072, 0.4488, 0.2823, 0.5175, 0.4700, 0.3884, 0.7752, 0.9699, 0.6290, 0.5182, 0.4828, 0.9614, 0.5398, 0.2815, 0.6249, 0.2054, 0.3607, 0.5475, 0.7528] }) -Item 81: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2370, 0.7334, 0.4755, 0.8921, 0.1448, 0.2971, 0.2116, 0.1124, 0.7297, 0.2965, 0.6643, 0.1020, 0.4147, 0.5717, 0.5803, 0.1797, 0.3495, 0.7602, 0.5506, 0.6812, 0.6435, 0.2448, 0.0042, 0.2285, 0.7938, 0.3224, 0.0413, 0.3119, 0.0898, 0.8277] }) -Item 82: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9034, 0.0318, 0.8865, 0.1217, 0.2441, 0.7340, 0.2467, 0.2411, 0.7661, 0.5926, 0.6339, 0.6470, 0.8512, 0.9797, 0.8513, 0.1059, 0.0247, 0.7131, 0.9073, 0.0132, 0.9954, 0.4102, 0.6152, 0.5895, 0.6839, 0.3159, 0.1004, 0.5669, 0.2915, 0.1503] }) -Item 83: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3071, 0.2166, 0.0566, 0.5153, 0.8628, 0.9601, 0.6390, 0.4052, 0.2759, 0.4989, 0.2220, 0.9184, 0.5489, 0.0201, 0.1469, 0.1779, 0.1827, 0.6549, 0.8811, 0.2189, 0.8447, 0.8635, 0.3256, 0.5651, 0.0724, 0.9818, 0.3070, 0.3512, 0.2026, 0.7946] }) -Item 84: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2057, 0.2311, 0.9211, 0.3561, 0.6051, 0.3712, 0.7174, 0.4273, 0.7494, 0.1833, 0.2720, 0.9787, 0.8918, 0.4156, 0.8195, 0.0671, 0.4287, 0.6879, 0.9447, 0.1533, 0.6358, 0.4703, 0.1989, 0.0168, 0.7957, 0.5340, 0.7317, 0.6926, 0.7569, 0.2755] }) -Item 85: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4718, 0.3682, 0.4350, 0.3129, 0.1289, 0.7526, 0.8249, 0.5640, 0.9296, 0.8479, 0.1283, 0.2632, 0.3694, 0.7950, 0.0273, 0.9558, 0.6983, 0.7563, 0.1747, 0.2701, 0.7434, 0.5281, 0.8098, 0.8743, 0.5457, 0.8847, 0.5409, 0.8640, 0.9360, 0.3401] }) -Item 86: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9871, 0.6448, 0.0883, 0.6999, 0.1526, 0.2259, 0.4263, 0.9240, 0.2756, 0.3507, 0.1650, 0.1196, 0.1506, 0.2522, 0.7583, 0.1968, 0.8939, 0.9972, 0.0911, 0.9558, 0.4209, 0.0939, 0.0936, 0.1702, 0.8146, 0.7677, 0.2072, 0.6328, 0.4451, 0.5579] }) -Item 87: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2909, 0.8867, 0.3238, 0.4342, 0.3491, 0.4305, 0.8452, 0.0936, 0.1220, 0.3452, 0.1736, 0.7292, 0.1167, 0.4834, 0.1016, 0.2478, 0.2028, 0.9520, 0.2854, 0.3369, 0.4047, 0.7799, 0.3272, 0.9902, 0.0833, 0.1636, 0.7756, 0.8540, 0.4763, 0.6465] }) -Item 88: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1525, 0.7012, 0.9764, 0.8845, 0.5382, 0.6827, 0.8005, 0.4046, 0.9374, 0.6809, 0.6383, 0.9244, 0.7854, 0.7037, 0.4220, 0.0141, 0.4728, 0.8868, 0.9550, 0.1127, 0.0053, 0.3766, 0.5752, 0.2369, 0.8159, 0.3157, 0.5809, 0.5965, 0.5334, 0.2019] }) -Item 89: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6270, 0.3774, 0.7874, 0.7272, 0.4240, 0.1508, 0.0360, 0.5710, 0.2254, 0.0950, 0.4095, 0.3464, 0.8501, 0.4572, 0.3022, 0.0795, 0.0845, 0.0360, 0.3839, 0.5728, 0.5501, 0.4156, 0.9986, 0.5015, 0.4543, 0.9016, 0.3680, 0.3139, 0.7894, 0.8085] }) -Item 90: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4082, 0.6284, 0.5104, 0.7905, 0.1476, 0.2033, 0.4106, 0.2252, 0.8351, 0.3193, 0.5570, 0.5288, 0.5884, 0.3909, 0.3600, 0.5733, 0.4198, 0.0680, 0.7674, 0.5035, 0.2838, 0.7335, 0.4276, 0.7977, 0.3828, 0.8553, 0.1026, 0.5539, 0.8805, 0.1420] }) -Item 91: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8560, 0.9670, 0.3969, 0.5008, 0.1567, 0.7420, 0.0072, 0.1891, 0.9690, 0.0387, 0.9630, 0.3936, 0.1125, 0.4424, 0.0855, 0.7900, 0.8814, 0.8120, 0.2263, 0.8761, 0.2498, 0.8021, 0.0934, 0.3020, 0.8538, 0.9658, 0.8355, 0.4396, 0.6029, 0.0986] }) -Item 92: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7990, 0.2114, 0.7110, 0.6670, 0.0472, 0.2463, 0.5026, 0.0325, 0.2475, 0.1461, 0.4610, 0.7633, 0.7501, 0.3765, 0.0501, 0.1671, 0.8281, 0.6928, 0.9685, 0.4547, 0.3742, 0.3809, 0.9774, 0.7615, 0.7871, 0.5825, 0.8942, 0.8298, 0.7062, 0.9405] }) -Item 93: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5034, 0.4728, 0.9950, 0.7630, 0.0154, 0.4453, 0.6893, 0.6996, 0.0246, 0.0245, 0.1628, 0.7592, 0.4518, 0.3659, 0.5843, 0.9270, 0.5562, 0.2315, 0.2196, 0.7557, 0.7544, 0.5390, 0.9575, 0.7762, 0.6434, 0.8592, 0.7840, 0.1388, 0.2920, 0.1637] }) -Item 94: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6522, 0.5475, 0.9462, 0.7139, 0.9925, 0.7899, 0.9753, 0.6984, 0.4501, 0.8380, 0.9894, 0.4523, 0.7725, 0.1341, 0.8773, 0.5224, 0.2336, 0.7734, 0.8476, 0.1302, 0.0686, 0.9910, 0.0262, 0.5346, 0.3452, 0.8651, 0.8749, 0.1996, 0.3920, 0.5587] }) -Item 95: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8157, 0.8688, 0.2659, 0.6080, 0.5274, 0.1883, 0.7562, 0.8511, 0.6928, 0.8151, 0.3489, 0.0685, 0.7027, 0.6863, 0.5350, 0.2973, 0.2547, 0.5162, 0.4468, 0.8645, 0.3545, 0.8380, 0.6886, 0.7425, 0.8855, 0.7851, 0.0179, 0.2890, 0.2872, 0.6186] }) -Item 96: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6829, 0.8851, 0.9375, 0.7638, 0.4942, 0.5561, 0.4220, 0.9060, 0.2907, 0.6102, 0.5620, 0.6907, 0.1597, 0.8826, 0.0216, 0.7857, 0.6494, 0.0691, 0.4100, 0.5620, 0.9864, 0.7445, 0.0043, 0.4133, 0.6850, 0.4214, 0.1267, 0.0643, 0.5029, 0.6847] }) -Item 97: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.4087, 0.7257, 0.3157, 0.9526, 0.5290, 0.5818, 0.5460, 0.1906, 0.9422, 0.7444, 0.2185, 0.1185, 0.7708, 0.8692, 0.3749, 0.7770, 0.2513, 0.6984, 0.8243, 0.4377, 0.5163, 0.2195, 0.0276, 0.8162, 0.6553, 0.7764, 0.3520, 0.9825, 0.5628] }) -Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5644, 0.3510, 0.4506, 0.6384, 0.6324, 0.5511, 0.5272, 0.8371, 0.9137, 0.4253, 0.8809, 0.0745, 0.0019, 0.3211, 0.0516, 0.1217, 0.5940, 0.4217, 0.8276, 0.7744, 0.1488, 0.5828, 0.6356, 0.1285, 0.5882, 0.6628, 0.9896, 0.7303, 0.5647, 0.7796] }) -Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, 0.9164, 0.5001, 0.9171, 0.7590, 0.2505, 0.0364, 0.8099, 0.6270, 0.6354, 0.1587, 0.4080, 0.2992, 0.0561, 0.0112, 0.4348, 0.1503, 0.6197, 0.5171, 0.2871, 0.8704] }) +Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8013, 0.2371, 0.6955, 0.8606, 0.5280, 0.2667, 0.6057, 0.9830, 0.9430, 0.0479, "other ..."] }) +Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4199, 0.2620, 0.2655, 0.8414, 0.0192, 0.3828, 0.2561, 0.2692, 0.0368, 0.4624, "other ..."] }) +Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1040, 0.9647, 0.8238, 0.5344, 0.4903, 0.4420, 0.7937, 0.4028, 0.2083, 0.3315, "other ..."] }) +Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0001, 0.7870, 0.9674, 0.4568, 0.5250, 0.2701, 0.2417, 0.8742, 0.6869, 0.8759, "other ..."] }) +Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8616, 0.3239, 0.5030, 0.3022, 0.8817, 0.3255, 0.9170, 0.9928, 0.3439, 0.0860, "other ..."] }) +Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.7778, 0.8818, 0.3427, 0.1293, 0.7240, 0.4773, 0.2871, 0.3988, 0.4014, "other ..."] }) +Item 6: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8761, 0.9344, 0.8748, 0.9176, 0.4249, 0.1069, 0.0392, 0.1024, 0.9981, 0.7313, "other ..."] }) +Item 7: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5699, 0.5919, 0.7788, 0.3705, 0.3807, 0.1708, 0.3678, 0.5629, 0.0612, 0.4826, "other ..."] }) +Item 8: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9520, 0.3982, 0.4226, 0.7733, 0.5495, 0.2244, 0.7816, 0.2531, 0.6199, 0.1072, "other ..."] }) +Item 9: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8134, 0.9647, 0.6640, 0.4464, 0.7439, 0.6904, 0.3159, 0.7607, 0.3483, 0.9963, "other ..."] }) +Item 10: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0050, 0.0899, 0.0005, 0.4564, 0.8086, 0.0683, 0.5358, 0.2569, 0.1496, 0.6153, "other ..."] }) +Item 11: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6560, 0.2343, 0.4079, 0.5972, 0.5766, 0.6739, 0.2985, 0.6167, 0.4834, 0.2818, "other ..."] }) +Item 12: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2447, 0.1475, 0.7740, 0.4535, 0.7574, 0.0332, 0.5334, 0.6643, 0.1125, 0.4113, "other ..."] }) +Item 13: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1726, 0.7399, 0.0271, 0.8721, 0.8725, 0.4023, 0.1558, 0.1044, 0.2096, 0.2081, "other ..."] }) +Item 14: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4818, 0.7828, 0.7237, 0.2860, 0.0901, 0.4310, 0.0837, 0.7802, 0.8863, 0.1804, "other ..."] }) +Item 15: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6031, 0.2970, 0.9485, 0.0272, 0.4336, 0.1339, 0.2209, 0.8350, 0.2566, 0.9481, "other ..."] }) +Item 16: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4348, 0.7310, 0.5390, 0.4001, 0.3141, 0.8369, 0.4671, 0.1519, 0.5237, 0.1241, "other ..."] }) +Item 17: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0819, 0.5362, 0.8215, 0.2263, 0.3109, 0.2738, 0.6133, 0.5147, 0.9334, 0.9877, "other ..."] }) +Item 18: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5407, 0.5762, 0.7453, 0.1027, 0.5595, 0.5039, 0.8272, 0.9763, 0.9918, 0.1477, "other ..."] }) +Item 19: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1390, 0.7531, 0.3249, 0.8754, 0.9984, 0.7362, 0.0281, 0.2016, 0.9443, 0.1989, "other ..."] }) +Item 20: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9335, 0.4981, 0.2509, 0.4837, 0.1620, 0.1082, 0.0741, 0.0183, 0.5846, 0.5706, "other ..."] }) +Item 21: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6252, 0.9240, 0.7227, 0.6810, 0.2563, 0.3321, 0.2283, 0.9226, 0.8494, 0.0356, "other ..."] }) +Item 22: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9798, 0.5036, 0.8106, 0.3013, 0.1513, 0.2881, 0.4811, 0.6230, 0.1926, 0.0604, "other ..."] }) +Item 23: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9915, 0.6886, 0.1976, 0.1725, 0.6776, 0.1356, 0.3842, 0.4424, 0.6939, 0.8016, "other ..."] }) +Item 24: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2392, 0.8345, 0.1999, 0.5550, 0.2999, 0.8526, 0.2634, 0.0980, 0.1580, 0.5939, "other ..."] }) +Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0116, 0.5323, 0.3940, 0.1382, 0.1987, 0.7287, 0.4026, 0.1442, 0.5957, 0.0340, "other ..."] }) +Item 26: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3722, 0.7760, 0.6038, 0.9467, 0.4463, 0.6653, 0.5265, 0.1535, 0.8139, 0.9773, "other ..."] }) +Item 27: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3569, 0.9070, 0.8412, 0.3201, 0.6998, 0.8674, 0.5494, 0.2322, 0.2969, 0.9787, "other ..."] }) +Item 28: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8309, 0.4235, 0.2111, 0.9452, 0.9005, 0.0275, 0.1471, 0.2638, 0.2873, 0.2079, "other ..."] }) +Item 29: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6021, 0.0690, 0.1080, 0.2535, 0.6817, 0.7899, 0.3297, 0.8656, 0.1566, 0.3560, "other ..."] }) +Item 30: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5015, 0.9497, 0.6473, 0.1317, 0.4497, 0.9958, 0.1906, 0.0105, 0.2717, 0.2399, "other ..."] }) +Item 31: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7584, 0.3448, 0.7315, 0.5405, 0.6797, 0.3253, 0.8611, 0.0846, 0.0174, 0.0882, "other ..."] }) +Item 32: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3037, 0.1721, 0.4483, 0.7763, 0.2161, 0.2408, 0.7451, 0.7175, 0.6738, 0.0753, "other ..."] }) +Item 33: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3494, 0.5787, 0.8027, 0.9625, 0.5944, 0.6781, 0.4204, 0.5899, 0.0209, 0.9001, "other ..."] }) +Item 34: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8829, 0.2487, 0.4700, 0.7563, 0.1986, 0.8682, 0.1629, 0.1449, 0.2589, 0.0392, "other ..."] }) +Item 35: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3281, 0.1178, 0.0533, 0.4172, 0.3990, 0.0395, 0.8533, 0.1435, 0.9799, 0.4063, "other ..."] }) +Item 36: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8020, 0.7339, 0.9494, 0.9035, 0.2586, 0.5700, 0.6807, 0.1325, 0.1968, 0.5123, "other ..."] }) +Item 37: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6603, 0.3675, 0.1019, 0.2193, 0.3180, 0.0591, 0.9934, 0.8583, 0.7473, 0.3644, "other ..."] }) +Item 38: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0262, 0.7409, 0.1552, 0.9316, 0.1472, 0.6711, 0.8818, 0.6288, 0.1830, 0.8944, "other ..."] }) +Item 39: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4498, 0.4107, 0.9992, 0.3928, 0.4779, 0.4661, 0.1282, 0.8140, 0.7490, 0.5641, "other ..."] }) +Item 40: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0779, 0.2777, 0.8682, 0.3638, 0.1313, 0.7436, 0.0733, 0.0458, 0.8121, 0.2161, "other ..."] }) +Item 41: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7821, 0.0623, 0.1168, 0.3609, 0.6040, 0.8336, 0.0911, 0.2181, 0.7485, 0.0281, "other ..."] }) +Item 42: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2568, 0.4789, 0.6587, 0.7855, 0.9144, 0.3430, 0.0823, 0.2256, 0.8146, 0.2751, "other ..."] }) +Item 43: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8608, 0.8689, 0.4918, 0.2239, 0.9291, 0.0622, 0.6843, 0.4184, 0.4703, 0.3202, "other ..."] }) +Item 44: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6144, 0.5794, 0.3617, 0.7858, 0.8199, 0.8128, 0.0766, 0.6796, 0.2179, 0.3666, "other ..."] }) +Item 45: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9838, 0.4742, 0.5307, 0.6989, 0.9323, 0.2140, 0.1371, 0.1113, 0.0322, 0.3001, "other ..."] }) +Item 46: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9427, 0.5559, 0.3745, 0.7126, 0.6987, 0.9411, 0.1747, 0.2418, 0.5506, 0.2967, "other ..."] }) +Item 47: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4506, 0.8658, 0.1164, 0.2339, 0.2266, 0.9050, 0.5849, 0.9792, 0.5951, 0.7706, "other ..."] }) +Item 48: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5565, 0.1465, 0.0304, 0.5234, 0.3295, 0.7092, 0.4073, 0.1632, 0.1200, 0.2177, "other ..."] }) +Item 49: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1322, 0.0670, 0.3404, 0.5339, 0.0229, 0.5964, 0.5497, 0.3819, 0.6553, 0.7129, "other ..."] }) +Item 50: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3692, 0.5773, 0.3161, 0.6517, 0.8682, 0.3599, 0.3781, 0.9383, 0.2134, 0.4018, "other ..."] }) +Item 51: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3643, 0.3282, 0.2580, 0.4477, 0.5920, 0.3581, 0.3502, 0.2441, 0.1707, 0.1243, "other ..."] }) +Item 52: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8903, 0.8028, 0.1029, 0.2941, 0.2599, 0.0219, 0.1355, 0.5165, 0.8002, 0.3078, "other ..."] }) +Item 53: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5928, 0.6267, 0.1717, 0.1604, 0.6506, 0.0302, 0.8289, 0.5930, 0.9304, 0.6067, "other ..."] }) +Item 54: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2889, 0.9493, 0.8938, 0.7127, 0.4381, 0.0183, 0.8352, 0.3095, 0.4705, 0.2917, "other ..."] }) +Item 55: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6095, 0.6335, 0.4740, 0.0746, 0.3871, 0.1016, 0.6414, 0.3076, 0.5484, 0.7602, "other ..."] }) +Item 56: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5139, 0.5344, 0.7468, 0.8617, 0.5994, 0.9846, 0.2361, 0.3984, 0.5078, 0.0666, "other ..."] }) +Item 57: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1524, 0.3441, 0.2368, 0.4350, 0.5328, 0.3005, 0.7021, 0.3614, 0.6369, 0.7984, "other ..."] }) +Item 58: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6107, 0.3400, 0.5636, 0.3807, 0.7145, 0.4766, 0.5889, 0.5437, 0.2690, 0.7633, "other ..."] }) +Item 59: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6227, 0.1202, 0.9715, 0.3936, 0.3423, 0.7479, 0.6526, 0.1867, 0.5568, 0.0922, "other ..."] }) +Item 60: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1100, 0.1244, 0.0460, 0.6082, 0.1978, 0.9696, 0.9862, 0.1179, 0.9388, 0.2901, "other ..."] }) +Item 61: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1347, 0.7000, 0.0530, 0.4959, 0.8227, 0.9831, 0.5433, 0.5201, 0.7924, 0.3847, "other ..."] }) +Item 62: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1364, 0.8633, 0.9297, 0.6135, 0.2317, 0.4359, 0.2391, 0.3977, 0.5448, 0.5308, "other ..."] }) +Item 63: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9221, 0.3578, 0.3207, 0.9945, 0.9288, 0.4608, 0.3001, 0.0296, 0.4678, 0.7422, "other ..."] }) +Item 64: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4806, 0.3718, 0.0560, 0.5007, 0.9558, 0.4234, 0.8126, 0.4764, 0.4901, 0.6482, "other ..."] }) +Item 65: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5677, 0.1247, 0.4928, 0.4097, 0.8433, 0.9238, 0.7848, 0.4437, 0.4696, 0.9886, "other ..."] }) +Item 66: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6083, 0.8526, 0.7679, 0.5156, 0.8480, 0.2792, 0.3200, 0.0453, 0.2084, 0.4759, "other ..."] }) +Item 67: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5299, 0.5421, 0.8194, 0.1018, 0.5426, 0.9350, 0.3228, 0.7979, 0.7473, 0.1118, "other ..."] }) +Item 68: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2676, 0.9678, 0.0177, 0.3344, 0.6398, 0.0680, 0.6478, 0.0822, 0.8348, 0.7480, "other ..."] }) +Item 69: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6163, 0.7762, 0.4365, 0.6713, 0.5647, 0.3449, 0.6615, 0.9430, 0.5941, 0.3563, "other ..."] }) +Item 70: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2508, 0.2228, 0.0580, 0.2005, 0.3293, 0.6717, 0.7376, 0.4489, 0.1877, 0.7376, "other ..."] }) +Item 71: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6478, 0.9371, 0.2378, 0.4279, 0.1400, 0.2146, 0.3193, 0.7330, 0.7932, 0.7295, "other ..."] }) +Item 72: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3637, 0.8624, 0.6657, 0.4426, 0.9055, 0.7527, 0.3679, 0.4067, 0.8130, 0.2441, "other ..."] }) +Item 73: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4640, 0.7218, 0.0154, 0.0829, 0.4829, 0.5139, 0.4344, 0.5872, 0.2770, 0.3745, "other ..."] }) +Item 74: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5250, 0.1137, 0.4081, 0.2765, 0.1755, 0.3861, 0.7862, 0.8678, 0.3681, 0.1279, "other ..."] }) +Item 75: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3639, 0.3210, 0.3718, 0.7818, 0.6264, 0.2524, 0.6018, 0.4059, 0.9744, 0.3568, "other ..."] }) +Item 76: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5349, 0.5286, 0.2229, 0.8101, 0.6677, 0.9457, 0.9903, 0.6100, 0.3207, 0.0250, "other ..."] }) +Item 77: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6003, 0.7202, 0.3803, 0.5052, 0.4006, 0.6708, 0.0438, 0.8432, 0.8772, 0.6849, "other ..."] }) +Item 78: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7850, 0.8396, 0.1563, 0.0331, 0.3667, 0.6701, 0.5455, 0.2239, 0.8733, 0.0339, "other ..."] }) +Item 79: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8093, 0.7442, 0.3738, 0.9164, 0.5923, 0.7353, 0.5379, 0.6815, 0.5925, 0.7954, "other ..."] }) +Item 80: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5135, 0.8836, 0.7895, 0.4037, 0.6198, 0.1685, 0.2186, 0.6238, 0.3219, 0.6977, "other ..."] }) +Item 81: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2370, 0.7334, 0.4755, 0.8921, 0.1448, 0.2971, 0.2116, 0.1124, 0.7297, 0.2965, "other ..."] }) +Item 82: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9034, 0.0318, 0.8865, 0.1217, 0.2441, 0.7340, 0.2467, 0.2411, 0.7661, 0.5926, "other ..."] }) +Item 83: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3071, 0.2166, 0.0566, 0.5153, 0.8628, 0.9601, 0.6390, 0.4052, 0.2759, 0.4989, "other ..."] }) +Item 84: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2057, 0.2311, 0.9211, 0.3561, 0.6051, 0.3712, 0.7174, 0.4273, 0.7494, 0.1833, "other ..."] }) +Item 85: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4718, 0.3682, 0.4350, 0.3129, 0.1289, 0.7526, 0.8249, 0.5640, 0.9296, 0.8479, "other ..."] }) +Item 86: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9871, 0.6448, 0.0883, 0.6999, 0.1526, 0.2259, 0.4263, 0.9240, 0.2756, 0.3507, "other ..."] }) +Item 87: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2909, 0.8867, 0.3238, 0.4342, 0.3491, 0.4305, 0.8452, 0.0936, 0.1220, 0.3452, "other ..."] }) +Item 88: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1525, 0.7012, 0.9764, 0.8845, 0.5382, 0.6827, 0.8005, 0.4046, 0.9374, 0.6809, "other ..."] }) +Item 89: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6270, 0.3774, 0.7874, 0.7272, 0.4240, 0.1508, 0.0360, 0.5710, 0.2254, 0.0950, "other ..."] }) +Item 90: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4082, 0.6284, 0.5104, 0.7905, 0.1476, 0.2033, 0.4106, 0.2252, 0.8351, 0.3193, "other ..."] }) +Item 91: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8560, 0.9670, 0.3969, 0.5008, 0.1567, 0.7420, 0.0072, 0.1891, 0.9690, 0.0387, "other ..."] }) +Item 92: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7990, 0.2114, 0.7110, 0.6670, 0.0472, 0.2463, 0.5026, 0.0325, 0.2475, 0.1461, "other ..."] }) +Item 93: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5034, 0.4728, 0.9950, 0.7630, 0.0154, 0.4453, 0.6893, 0.6996, 0.0246, 0.0245, "other ..."] }) +Item 94: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6522, 0.5475, 0.9462, 0.7139, 0.9925, 0.7899, 0.9753, 0.6984, 0.4501, 0.8380, "other ..."] }) +Item 95: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8157, 0.8688, 0.2659, 0.6080, 0.5274, 0.1883, 0.7562, 0.8511, 0.6928, 0.8151, "other ..."] }) +Item 96: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6829, 0.8851, 0.9375, 0.7638, 0.4942, 0.5561, 0.4220, 0.9060, 0.2907, 0.6102, "other ..."] }) +Item 97: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.4087, 0.7257, 0.3157, 0.9526, 0.5290, 0.5818, 0.5460, 0.1906, 0.9422, "other ..."] }) +Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5644, 0.3510, 0.4506, 0.6384, 0.6324, 0.5511, 0.5272, 0.8371, 0.9137, 0.4253, "other ..."] }) +Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [4, 8, 12, 18, 21, 22, 23, 24, 43, 47, 51, 56, 58, 59, 61, 64, 73, 76, 78, 80, 82, 83, 87] }) Tree 1: Descendants(Descendants { descendants: [15, 17, 20, 32, 37, 38, 49, 52, 53, 54, 55, 57, 60, 68, 71, 75, 77, 79, 84, 88, 94, 95] }) -Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, 0.1109, 0.2686, 0.0968, -0.0643, -0.0972, -0.0898, 0.0456, 0.0027, -0.2022, 0.0048, -0.4755, 0.1153, -0.1116, -0.1966, 0.2755, -0.0994, -0.0549, -0.0779, 0.1455, 0.0050] }) +Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) Tree 3: Descendants(Descendants { descendants: [0, 9, 10, 29, 31, 34, 35, 42, 48, 50, 70, 74, 89, 92, 93] }) Tree 4: Descendants(Descendants { descendants: [2, 3, 5, 6, 7, 11, 14, 25, 39, 40, 44, 62, 67, 69, 81, 86, 91, 96, 97, 98, 99] }) Tree 5: Descendants(Descendants { descendants: [1, 13, 16, 19, 26, 27, 28, 30, 33, 36, 41, 45, 46, 63, 65, 66, 72, 85, 90] }) -Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, -0.1816, -0.0939, 0.0021, 0.0709, 0.0202, 0.0530, -0.1488, -0.2081, -0.0009, -0.2521, -0.0654, 0.0246, 0.4066, 0.3606, -0.2069, 0.1391, 0.0840, 0.1356, 0.1600, -0.3060] }) -Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, 0.2901, -0.1181, 0.1028, 0.1873, -0.1748, 0.1071, 0.3026, 0.1918, -0.2636, -0.1360, -0.0094, 0.1939, -0.2513, 0.0142, -0.2946, -0.0155, -0.0352, -0.2004, 0.1297, -0.1179] }) -Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, -0.0911, -0.1466, 0.0229, 0.1060, -0.4102, 0.1031, 0.2331, -0.1867, -0.0179, 0.4030, -0.0541, 0.1071, -0.1108, -0.3500, 0.2832, 0.1696, 0.0784, 0.1984, 0.1857, -0.1249] }) +Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) +Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) +Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) Tree 9: Descendants(Descendants { descendants: [1, 6, 7, 10, 13, 26, 28, 31, 36, 39, 42, 48, 50, 62, 70, 86, 96, 97] }) Tree 10: Descendants(Descendants { descendants: [2, 3, 9, 11, 27, 29, 30, 45, 46, 57, 65, 66, 67, 72, 73, 85, 91] }) -Tree 11: SplitPlaneNormal(SplitPlaneNormal { left: Tree(9), right: Tree(10), normal: [0.0163, 0.0439, -0.0411, -0.3070, 0.1504, 0.3722, 0.0400, 0.0252, 0.0917, -0.0341, 0.1062, -0.3781, -0.0183, -0.0133, -0.2361, -0.0074, -0.2533, 0.3440, -0.1389, -0.2804, 0.0114, -0.0358, -0.0088, 0.1891, -0.2116, -0.0120, 0.1843, 0.2466, 0.2429, 0.0446] }) +Tree 11: SplitPlaneNormal(SplitPlaneNormal { left: Tree(9), right: Tree(10), normal: [0.0163, 0.0439, -0.0411, -0.3070, 0.1504, 0.3722, 0.0400, 0.0252, 0.0917, -0.0341, "other ..."] }) Tree 12: Descendants(Descendants { descendants: [0, 4, 8, 17, 18, 19, 32, 35, 37, 41, 47, 51, 52, 53, 54, 55, 60, 61, 64, 68, 75, 77, 78, 79, 87, 90, 95, 98] }) Tree 13: Descendants(Descendants { descendants: [5, 12, 15, 16, 20, 21, 22, 23, 24, 38, 43, 56, 58, 59, 63, 69, 71, 74, 76, 80, 82, 83, 89, 93, 94] }) Tree 14: Descendants(Descendants { descendants: [14, 25, 33, 34, 40, 44, 49, 81, 84, 88, 92, 99] }) -Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(13), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, -0.1642, 0.0048, 0.1560, 0.1240, -0.1897, 0.0059, 0.2732, -0.0767, -0.0073, -0.0686, -0.1380, -0.2031, -0.1076, -0.3940, 0.3151, -0.1967, -0.0063, 0.3692, 0.0094, 0.1459] }) -Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(12), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, 0.0512, -0.0020, 0.2535, -0.0405, -0.0259, -0.4431, -0.1803, 0.1423, 0.2680, -0.0370, 0.1828, -0.0471, 0.0297, 0.0426, 0.1825, 0.0618, 0.1394, -0.1949, -0.2250, -0.0376] }) -Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, 0.2006, 0.3335, 0.2843, -0.1757, 0.2793, -0.2980, -0.0359, 0.0865, 0.2789, -0.3740, 0.0533, -0.0015, 0.3312, -0.0091, -0.0798, -0.1063, -0.0473, -0.0169, -0.3053, 0.0218] }) +Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(13), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) +Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(12), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) +Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) Tree 18: Descendants(Descendants { descendants: [0, 2, 4, 6, 7, 9, 11, 14, 25, 26, 27, 28, 29, 33, 34, 40, 42, 62, 67, 81, 86, 91, 96, 97, 98] }) Tree 19: Descendants(Descendants { descendants: [1, 8, 10, 13, 19, 30, 31, 35, 36, 39, 44, 45, 46, 50, 51, 52, 54, 56, 66, 72, 80, 85, 89, 90, 92, 93] }) -Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, 0.0527, -0.0864, 0.0909, -0.1323, -0.0939, 0.1246, 0.0144, 0.1074, 0.1608, -0.2549, 0.2505, 0.0488, 0.5129, 0.1660, -0.4090, 0.2014, -0.1974, 0.1653, 0.2648, -0.0579] }) +Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) Tree 21: Descendants(Descendants { descendants: [3, 5, 15, 16, 17, 18, 20, 22, 24, 32, 38, 48, 49, 58, 59, 60, 63, 65, 69, 74, 75, 76, 79, 82, 83, 84, 87, 88, 94] }) Tree 22: Descendants(Descendants { descendants: [12, 21, 23, 37, 41, 43, 47, 53, 55, 57, 61, 64, 68, 70, 71, 73, 77, 78, 95, 99] }) -Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(22), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, 0.1131, -0.1640, -0.0971, 0.1107, 0.0928, 0.1311, 0.2739, -0.1771, -0.1730, 0.0449, 0.3326, -0.1653, 0.1866, 0.1290, -0.3017, -0.1810, -0.0166, -0.0532, -0.0940, 0.2350] }) -Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, 0.1803, 0.3667, 0.0478, 0.1034, 0.1925, -0.2745, -0.3940, 0.2451, 0.0596, -0.1108, 0.0388, -0.1178, -0.1064, 0.1941, -0.0789, 0.0339, 0.0391, -0.2001, -0.2129, -0.1413] }) +Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(22), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) +Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) Tree 25: Descendants(Descendants { descendants: [0, 14, 32, 34, 74, 84, 92] }) Tree 26: Descendants(Descendants { descendants: [5, 12, 20, 22, 24, 38, 43, 49, 58, 59, 76, 82, 83, 88, 94] }) Tree 27: Descendants(Descendants { descendants: [3, 4, 6, 8, 15, 17, 18, 21, 23, 37, 47, 50, 52, 53, 54, 55, 61, 64, 67, 68, 71, 73, 75, 78, 79, 80, 87, 95] }) -Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(27), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, 0.0476, -0.0620, -0.1644, 0.2042, -0.1229, 0.2581, 0.1689, -0.0211, -0.5080, 0.2612, -0.1435, 0.1440, -0.1411, 0.2912, -0.1177, 0.0276, -0.1104, 0.2365, -0.0692, 0.0538] }) +Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(27), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) Tree 29: Descendants(Descendants { descendants: [2, 7, 9, 11, 31, 33, 35, 39, 44, 46, 66, 69, 72, 81, 86, 89, 97, 98, 99] }) -Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, -0.1136, -0.1094, 0.0732, 0.0961, -0.3974, -0.0322, 0.0921, -0.0225, 0.0918, 0.1908, -0.0333, -0.2674, -0.0643, -0.5829, 0.1107, -0.0245, -0.0154, 0.0397, 0.2926, 0.2113] }) -Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, 0.2846, -0.2087, -0.2004, 0.2058, 0.1070, 0.1817, -0.1476, 0.0179, 0.0776, 0.1242, 0.0699, 0.0648, -0.0274, 0.2480, -0.4002, 0.2701, -0.0571, -0.2140, 0.0239, -0.1267] }) +Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) +Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) Tree 32: Descendants(Descendants { descendants: [13, 16, 19, 25, 26, 27, 30, 40, 41, 42, 56, 60, 62, 77, 85, 90, 91] }) Tree 33: Descendants(Descendants { descendants: [1, 10, 28, 29, 36, 45, 48, 51, 57, 63, 65, 70, 93, 96] }) -Tree 34: SplitPlaneNormal(SplitPlaneNormal { left: Tree(32), right: Tree(33), normal: [0.1342, -0.2475, -0.0482, 0.0488, 0.0620, -0.3127, 0.1362, 0.1854, -0.4856, 0.1147, -0.1943, -0.0282, 0.2406, 0.1370, 0.1011, -0.0536, -0.0401, -0.1737, 0.0283, 0.2694, 0.2782, -0.0758, -0.1242, -0.1515, 0.0530, 0.2086, -0.2914, -0.1344, 0.0606, 0.0679] }) -Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, -0.3277, -0.0323, -0.0636, 0.2488, -0.4443, 0.3214, 0.0959, -0.3218, -0.0695, 0.0767, 0.0436, 0.1616, 0.0567, 0.1653, 0.1340, 0.0084, 0.0783, -0.0016, 0.1253, -0.1028] }) +Tree 34: SplitPlaneNormal(SplitPlaneNormal { left: Tree(32), right: Tree(33), normal: [0.1342, -0.2475, -0.0482, 0.0488, 0.0620, -0.3127, 0.1362, 0.1854, -0.4856, 0.1147, "other ..."] }) +Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) Tree 36: Descendants(Descendants { descendants: [8, 12, 15, 16, 21, 22, 24, 30, 34, 46, 52, 53, 56, 59, 60, 71, 74, 87, 89, 92, 93] }) Tree 37: Descendants(Descendants { descendants: [2, 5, 7, 9, 14, 17, 19, 25, 27, 31, 36, 39, 40, 42, 55, 62, 66, 67, 72, 80, 81, 84, 86, 88, 91, 96, 97] }) Tree 38: Descendants(Descendants { descendants: [0, 4, 6, 11, 23, 28, 33, 44, 54, 63, 69, 75, 94, 98, 99] }) -Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(37), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, 0.0407, -0.1623, 0.2173, -0.3136, 0.0590, -0.2003, -0.0082, -0.1270, 0.0564, -0.2095, -0.0657, 0.1016, -0.0702, 0.2284, -0.2728, -0.0290, 0.3925, -0.0802, -0.0253, -0.0275] }) -Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, -0.0863, -0.0199, -0.0004, 0.2405, -0.1246, 0.1038, 0.1006, -0.0305, 0.0380, 0.3559, -0.2894, 0.0328, -0.4506, -0.3225, 0.2490, -0.0197, -0.1629, -0.2224, 0.1068, -0.0759] }) +Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(37), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) +Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) Tree 41: Descendants(Descendants { descendants: [1, 3, 18, 29, 41, 43, 45, 47, 48, 49, 50, 58, 64, 65, 73, 76, 77, 78, 82, 83, 85] }) Tree 42: Descendants(Descendants { descendants: [10, 13, 20, 26, 32, 35, 37, 38, 51, 57, 61, 68, 70, 79, 90, 95] }) -Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, -0.0601, 0.2616, 0.0565, -0.0938, -0.0145, -0.1146, 0.4269, -0.0358, -0.0730, 0.0392, -0.2603, 0.1109, 0.0687, 0.0439, 0.1691, -0.0846, -0.4510, -0.0385, 0.2366, 0.0783] }) -Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, -0.1252, 0.1579, 0.0484, 0.1324, 0.1851, 0.1471, -0.2672, -0.1088, 0.0051, -0.0882, 0.0127, -0.0930, 0.0175, 0.2037, -0.1296, 0.3565, -0.3824, 0.0678, 0.0265, -0.2280] }) +Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) +Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) Tree 45: Descendants(Descendants { descendants: [20, 22, 23, 43, 49, 53, 55, 58, 75, 82, 88, 94] }) Tree 46: Descendants(Descendants { descendants: [12, 16, 21, 30, 31, 39, 41, 45, 46, 47, 51, 56, 59, 66, 72, 77, 78, 80, 85, 91, 93, 98] }) Tree 47: Descendants(Descendants { descendants: [8, 13, 19, 27, 37, 40, 42, 52, 64, 67, 68, 71, 73, 87] }) -Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, -0.0698, -0.0018, -0.1918, 0.0338, 0.0339, -0.1357, 0.0752, 0.0238, 0.0309, 0.2366, -0.2525, 0.4409, -0.3780, 0.1873, 0.0832, -0.4678, -0.0565, 0.1858, -0.0473, -0.0123] }) +Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) Tree 49: Descendants(Descendants { descendants: [6, 28, 54, 90, 96] }) -Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, 0.0647, 0.0797, -0.0989, 0.1190, -0.1396, 0.0312, 0.0572, -0.6574, -0.0868, 0.0124, -0.2229, 0.1733, -0.2335, 0.1060, 0.2380, 0.1268, -0.0459, -0.2752, -0.1312, -0.2836] }) +Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) Tree 51: Descendants(Descendants { descendants: [0, 1, 4, 10, 29, 35, 48, 50, 57, 63, 65, 70, 76, 83, 89, 95, 97] }) Tree 52: Descendants(Descendants { descendants: [2, 3, 5, 7, 9, 11, 14, 15, 17, 18, 24, 25, 26, 32, 33, 34, 36, 38, 44, 60, 61, 62, 69, 74, 79, 81, 84, 86, 92, 99] }) -Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(52), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, 0.1305, 0.3916, -0.0534, -0.0555, 0.1314, -0.1056, 0.2684, 0.3731, -0.1052, -0.2472, -0.1977, -0.0885, 0.0235, 0.0173, 0.0686, -0.2534, 0.1249, 0.0815, -0.1741, 0.1279] }) -Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, -0.1406, 0.2167, 0.4664, -0.1600, -0.0316, -0.3137, -0.0300, 0.0707, -0.0475, 0.1332, -0.0198, 0.0259, -0.3368, -0.2182, 0.3876, 0.0855, -0.0989, 0.1142, 0.0094, 0.1087] }) -Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, -0.2281, -0.1307, -0.1940, 0.0743, -0.2224, 0.2621, 0.4020, -0.1358, -0.1421, 0.2556, 0.1564, 0.0275, 0.0829, -0.2441, 0.1231, 0.2096, 0.0415, 0.3321, 0.2420, 0.0255] }) +Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(52), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) +Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) +Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) Tree 56: Descendants(Descendants { descendants: [1, 2, 3, 6, 7, 14, 26, 27, 33, 36, 62, 81, 86, 93, 96, 99] }) Tree 57: Descendants(Descendants { descendants: [0, 4, 10, 11, 28, 29, 39, 42, 44, 50, 57, 67, 70, 74, 95, 97, 98] }) -Tree 58: SplitPlaneNormal(SplitPlaneNormal { left: Tree(56), right: Tree(57), normal: [0.0169, -0.4385, -0.4607, -0.2781, 0.3243, 0.0145, 0.1026, 0.2875, 0.0069, -0.1195, -0.0590, -0.2438, 0.0106, -0.0414, 0.0657, 0.0604, -0.0301, -0.0759, 0.3529, 0.0122, 0.0495, 0.0277, 0.0632, -0.0544, -0.0413, -0.0149, -0.0304, 0.0662, 0.2347, 0.1688] }) +Tree 58: SplitPlaneNormal(SplitPlaneNormal { left: Tree(56), right: Tree(57), normal: [0.0169, -0.4385, -0.4607, -0.2781, 0.3243, 0.0145, 0.1026, 0.2875, 0.0069, -0.1195, "other ..."] }) Tree 59: Descendants(Descendants { descendants: [5, 9, 15, 23, 31, 34, 38, 45, 48, 58, 63, 65, 69, 77, 80, 83, 89] }) -Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, -0.0216, 0.1138, -0.0032, -0.1261, -0.1610, -0.1209, -0.4143, 0.2001, 0.4793, -0.2008, -0.0190, -0.2578, 0.2425, 0.1568, -0.0856, 0.2075, -0.2392, -0.0009, -0.1147, 0.0704] }) +Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) Tree 61: Descendants(Descendants { descendants: [17, 19, 20, 25, 32, 35, 37, 40, 41, 49, 52, 53, 55, 60, 71, 75, 79, 82, 84, 88, 90, 91, 92] }) Tree 62: Descendants(Descendants { descendants: [8, 12, 13, 16, 18, 21, 22, 24, 30, 43, 46, 47, 51, 54, 56, 59, 61, 64, 66, 68, 72, 73, 76, 78, 85, 87, 94] }) -Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(62), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, -0.0027, -0.4418, -0.0800, -0.0528, 0.1247, 0.0186, -0.1231, -0.0172, -0.1034, -0.0108, 0.1670, -0.1303, 0.0481, 0.2372, -0.4116, 0.2883, 0.0878, -0.0047, -0.0383, 0.1339] }) -Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, 0.2931, 0.1127, -0.0574, -0.0419, 0.2841, 0.1814, 0.0900, 0.0942, 0.0058, -0.3365, -0.0987, 0.0550, 0.2670, 0.1378, -0.2335, -0.1600, 0.0364, 0.2521, -0.1054, -0.1411] }) +Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(62), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) +Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) Tree 65: Descendants(Descendants { descendants: [0, 2, 3, 7, 9, 14, 17, 25, 37, 39, 40, 55, 60, 65, 67, 70, 77, 96, 97, 98] }) Tree 66: Descendants(Descendants { descendants: [1, 6, 8, 10, 11, 13, 19, 26, 27, 28, 30, 42, 46, 52, 62, 66, 72, 73, 85, 87, 90, 91] }) -Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, -0.0097, -0.1884, 0.1172, -0.0446, -0.0250, 0.1777, 0.0816, 0.2413, -0.2184, -0.1328, -0.0861, 0.1612, 0.1595, 0.1337, -0.2581, 0.2318, 0.1348, 0.1098, 0.0504, -0.3532] }) +Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) Tree 68: Descendants(Descendants { descendants: [32, 34, 35, 44, 74, 75, 84, 92, 99] }) Tree 69: Descendants(Descendants { descendants: [4, 15, 18, 20, 24, 29, 38, 47, 57, 58, 61, 63, 64, 68, 69, 76, 79, 81, 83, 86, 94, 95] }) Tree 70: Descendants(Descendants { descendants: [5, 22, 31, 33, 36, 45, 48, 50, 51, 54, 56, 80, 89, 93] }) -Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(69), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, -0.2211, 0.0110, -0.1644, -0.0635, -0.1158, 0.1109, 0.0872, -0.1639, 0.1730, 0.0744, 0.1254, 0.0237, 0.4010, -0.0193, 0.1347, -0.0681, 0.1437, -0.1188, 0.2499, -0.0817] }) +Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(69), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) Tree 72: Descendants(Descendants { descendants: [12, 16, 21, 23, 41, 43, 49, 53, 59, 71, 78, 82, 88] }) -Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, 0.4196, 0.0262, -0.2664, 0.1847, 0.0711, -0.0195, 0.0464, 0.0459, 0.1556, -0.2079, -0.0035, -0.3500, 0.1718, -0.1083, -0.0923, -0.2715, 0.1083, -0.0153, -0.2891, -0.1430] }) -Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, 0.2265, -0.2023, -0.2901, 0.1725, 0.2272, 0.2300, -0.0068, -0.0465, 0.0117, 0.1397, 0.0488, -0.0288, 0.1493, 0.4854, -0.2355, 0.0637, -0.0046, -0.2136, -0.2705, -0.1199] }) -Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, 0.0673, 0.1867, 0.3739, -0.1279, 0.3181, -0.3485, -0.1212, 0.2294, 0.2856, -0.2235, 0.0543, -0.2036, 0.1905, -0.0273, -0.0616, 0.2454, -0.1052, -0.0761, -0.3185, -0.0837] }) +Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) +Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) +Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) Tree 76: Descendants(Descendants { descendants: [0, 1, 6, 19, 28, 40, 42, 48, 63, 81, 91] }) Tree 77: Descendants(Descendants { descendants: [2, 4, 7, 10, 11, 14, 17, 20, 25, 26, 31, 32, 33, 34, 36, 62, 70, 74, 86, 93, 99] }) -Tree 78: SplitPlaneNormal(SplitPlaneNormal { left: Tree(76), right: Tree(77), normal: [-0.2023, 0.0033, 0.1400, -0.2031, -0.1474, -0.0398, 0.4147, 0.3168, -0.3442, 0.1913, -0.1551, 0.1629, -0.0057, -0.0611, 0.2405, 0.0100, 0.1013, 0.1488, -0.3546, -0.0093, 0.2763, -0.0540, 0.2623, -0.0881, 0.0638, 0.0429, 0.0299, -0.0982, 0.0684, 0.0016] }) +Tree 78: SplitPlaneNormal(SplitPlaneNormal { left: Tree(76), right: Tree(77), normal: [-0.2023, 0.0033, 0.1400, -0.2031, -0.1474, -0.0398, 0.4147, 0.3168, -0.3442, 0.1913, "other ..."] }) Tree 79: Descendants(Descendants { descendants: [35, 38, 49, 60, 75, 76, 84, 88, 92] }) -Tree 80: SplitPlaneNormal(SplitPlaneNormal { left: Tree(78), right: Tree(79), normal: [-0.2920, -0.3421, -0.2821, -0.1193, -0.0368, 0.1317, 0.2692, -0.1174, -0.0436, -0.0298, 0.0512, 0.2419, 0.0831, -0.0746, 0.0552, -0.2475, -0.1448, 0.0537, 0.4436, -0.3278, -0.0175, -0.1502, 0.2574, -0.0383, -0.1137, -0.0473, 0.0075, 0.1496, -0.0283, -0.0335] }) +Tree 80: SplitPlaneNormal(SplitPlaneNormal { left: Tree(78), right: Tree(79), normal: [-0.2920, -0.3421, -0.2821, -0.1193, -0.0368, 0.1317, 0.2692, -0.1174, -0.0436, -0.0298, "other ..."] }) Tree 81: Descendants(Descendants { descendants: [8, 27, 30, 85] }) Tree 82: Descendants(Descendants { descendants: [12, 18, 21, 23, 29, 37, 39, 41, 43, 44, 46, 47, 51, 52, 53, 55, 56, 59, 64, 66, 67, 68, 69, 71, 72, 78, 82, 96, 97, 98] }) Tree 83: Descendants(Descendants { descendants: [3, 5, 9, 13, 15, 16, 22, 24, 45, 50, 54, 57, 58, 61, 65, 73, 77, 79, 80, 83, 87, 89, 90, 94, 95] }) -Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(82), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, -0.1129, 0.0524, -0.0186, -0.2463, -0.1155, -0.1463, -0.4634, 0.0699, 0.0128, 0.1095, -0.1902, 0.3119, -0.0996, 0.4041, 0.0501, 0.2183, -0.1728, -0.0823, 0.0981, -0.0756] }) -Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, 0.1917, 0.2602, -0.0370, -0.0627, 0.4363, -0.2168, -0.1736, 0.1961, 0.3870, -0.0136, 0.1538, -0.1195, 0.0285, -0.2140, 0.0699, 0.0224, 0.3888, -0.1619, -0.1619, 0.0292] }) -Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, 0.2534, -0.2271, -0.2660, 0.0689, -0.0707, -0.0678, -0.1642, 0.0867, 0.0469, -0.1977, 0.2930, 0.0458, 0.2275, 0.3742, -0.3834, 0.1165, -0.1414, -0.0012, -0.0766, -0.0228] }) +Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(82), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) +Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) +Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) Tree 87: Descendants(Descendants { descendants: [8, 13, 16, 21, 27, 28, 29, 30, 37, 41, 46, 47, 48, 56, 60, 64, 73, 78, 85, 91] }) Tree 88: Descendants(Descendants { descendants: [1, 2, 3, 6, 7, 14, 17, 40, 42, 52, 62, 67, 77, 96, 98] }) Tree 89: Descendants(Descendants { descendants: [20, 26, 32, 38, 49, 58, 61, 68, 75, 76, 79, 81, 86, 88, 94] }) Tree 90: Descendants(Descendants { descendants: [24, 31, 33, 36, 39, 45, 50, 51, 54, 57, 63, 66, 72, 80, 89, 90, 93, 95, 97] }) Tree 91: Descendants(Descendants { descendants: [0, 4, 5, 9, 12, 15, 18, 22, 23, 34, 35, 43, 44, 53, 55, 69, 71, 74, 82, 83, 84, 87, 92, 99] }) -Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, 0.1311, 0.3498, 0.1850, -0.0395, -0.0264, -0.2005, -0.1037, 0.0984, 0.1482, -0.2048, 0.1539, -0.1091, -0.2066, -0.1567, 0.2054, -0.3106, 0.1361, 0.2352, -0.2780, 0.1194] }) +Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) Tree 93: Descendants(Descendants { descendants: [10, 11, 19, 25, 59, 65, 70] }) -Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, -0.2013, 0.0197, -0.0472, -0.0336, -0.1988, 0.0397, 0.1914, 0.1412, -0.1298, -0.1525, -0.1154, -0.0438, -0.1335, -0.2600, 0.1097, 0.1125, 0.1885, 0.0315, 0.2691, 0.0511] }) -Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, -0.1367, -0.0389, -0.0556, 0.2224, -0.2718, 0.1765, 0.1163, -0.2332, 0.0342, 0.2299, 0.2840, 0.0873, 0.3227, -0.1082, 0.2161, -0.1491, 0.3666, -0.0066, 0.2141, 0.0844] }) -Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, -0.0180, 0.2335, 0.3782, -0.1127, 0.2746, -0.2657, -0.0579, 0.1808, 0.2623, -0.3329, 0.1801, -0.2106, 0.1692, -0.1191, 0.0152, 0.1214, 0.0252, -0.0236, -0.1220, -0.1448] }) -Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, 0.0177, 0.3192, 0.1521, -0.1484, 0.1918, -0.2794, -0.1141, 0.2598, 0.1453, 0.1133, -0.1149, -0.0455, 0.0697, -0.2537, 0.1797, -0.0423, -0.0470, 0.0886, 0.0868, 0.2083] }) +Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) +Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) +Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) +Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" } - diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 9cbe8935..683ca2b2 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -641,7 +641,7 @@ fn add_one_item_incrementally_in_an_empty_db() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } "###); @@ -660,7 +660,7 @@ fn add_one_item_incrementally_in_a_one_item_db() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } "###); @@ -695,8 +695,8 @@ fn add_one_item_incrementally_to_create_a_split_node() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) - Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0, 0.0] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) + Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [0, 1] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } "###); @@ -735,17 +735,17 @@ fn add_one_item_incrementally() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0, 0.0] }) - Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0, 0.0] }) - Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0, 0.0] }) - Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0, 0.0] }) - Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0, 0.0] }) - Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0, 0.0] }) + Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) + Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) + Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] }) + Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] }) + Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) + Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0, 0.0] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0, 0.0] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0, 0.0] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index b6b5ec2a..43faf4e1 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -15,11 +15,12 @@ pub enum BinaryQuantized {} impl UnalignedVectorCodec for BinaryQuantized { fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { - if bytes.len() % size_of::() == 0 { + let rem = bytes.len() % size_of::(); + if rem == 0 { // safety: `UnalignedVector` is transparent Ok(Cow::Borrowed(unsafe { transmute(bytes) })) } else { - Err(SizeMismatch) + Err(SizeMismatch { vector_codec: "binary quantized", rem }) } } diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index 9a16a3bc..3cbb633f 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -11,11 +11,12 @@ use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; impl UnalignedVectorCodec for f32 { /// Creates an unaligned slice of f32 wrapper from a slice of bytes. fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { - if bytes.len() % size_of::() == 0 { + let rem = bytes.len() % size_of::(); + if rem == 0 { // safety: `UnalignedF32Slice` is transparent Ok(Cow::Borrowed(unsafe { transmute(bytes) })) } else { - Err(SizeMismatch) + Err(SizeMismatch { vector_codec: "f32", rem }) } } diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index f0a8c080..bdc72f0d 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -12,9 +12,10 @@ use bytemuck::pod_collect_to_vec; mod binary_quantized; mod f32; +/// Determine the way the vectors should be read and written from the database pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// Creates an unaligned vector from a slice of bytes. - // Don't allocate. + /// Don't allocate. fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch>; /// Creates an unaligned vector from a slice of f32. @@ -51,7 +52,7 @@ impl UnalignedVector { } /// Creates an unaligned vector from a slice of bytes. - // Don't allocate. + /// Don't allocate. pub fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { Codec::from_bytes(bytes) } @@ -105,9 +106,17 @@ impl UnalignedVector { } } +/// Returned in case you tried to make an unaligned vector from a slice of bytes that don't have the right number of elements #[derive(Debug, thiserror::Error)] -#[error("invalid slice of float dimension")] -pub struct SizeMismatch; +#[error( + "Slice of bytes contains {rem} too many bytes to be decoded with the {vector_codec} codec." +)] +pub struct SizeMismatch { + /// The name of the codec used. + vector_codec: &'static str, + /// The number of bytes remaining after decoding as many words as possible. + rem: usize, +} impl ToOwned for UnalignedVector { type Owned = Vec; @@ -127,8 +136,17 @@ impl fmt::Debug for UnalignedVector { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut list = f.debug_list(); + struct Number(f32); + impl fmt::Debug for Number { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:0.4}", self.0) + } + } + let vec = self.to_vec(); - list.entries(vec.iter().take(10)); + for v in vec.iter().take(10) { + list.entry(&Number(*v)); + } if vec.len() < 10 { return list.finish(); } @@ -136,7 +154,7 @@ impl fmt::Debug for UnalignedVector { // With binary quantization we may be padding with a lot of zeros if vec[10..].iter().all(|v| *v == 0.0) { list.entry(&"0.0, ..."); - } else if vec[10..].iter().all(|v| *v == 0.0) { + } else { list.entry(&"other ..."); } From 3576a881e89eec002408713eca7c90edc15ae90e Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 16:35:12 +0200 Subject: [PATCH 07/61] rewrite the iterator of binary quantized slices --- ...ite_and_update_lot_of_random_points-2.snap | 299 +++++++++--------- src/unaligned_vector/binary_quantized.rs | 60 +++- 2 files changed, 195 insertions(+), 164 deletions(-) diff --git a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap index 56765efb..da91c990 100644 --- a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap +++ b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap @@ -4,213 +4,212 @@ expression: handle --- ================== Dumping index 0 -Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5919, 0.9953, 0.7271, 0.7734, 0.5761, 0.8882, 0.8497, 0.0817, 0.3989, 0.3384, 0.1674, 0.1351, 0.7610, 0.5052, 0.5143, 0.7102, 0.4465, 0.7144, 0.3132, 0.4332, 0.9812, 0.2139, 0.8465, 0.2794, 0.7061, 0.4487, 0.9708, 0.6317, 0.9469, 0.1785] }) -Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4199, 0.2620, 0.2655, 0.8414, 0.0192, 0.3828, 0.2561, 0.2692, 0.0368, 0.4624, 0.8739, 0.5080, 0.4557, 0.8887, 0.0685, 0.6341, 0.1904, 0.2201, 0.6947, 0.8503, 0.4607, 0.6554, 0.2088, 0.0586, 0.6857, 0.9343, 0.4788, 0.9246, 0.7944, 0.0473] }) -Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2240, 0.0136, 0.1967, 0.7482, 0.6849, 0.7949, 0.0809, 0.5504, 0.6339, 0.6978, 0.6777, 0.5128, 0.2356, 0.9192, 0.6383, 0.3607, 0.2178, 0.4241, 0.2823, 0.1077, 0.3125, 0.6897, 0.5020, 0.4331, 0.8987, 0.9540, 0.8982, 0.6903, 0.1349, 0.6635] }) -Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0001, 0.7870, 0.9674, 0.4568, 0.5250, 0.2701, 0.2417, 0.8742, 0.6869, 0.8759, 0.8323, 0.5742, 0.3201, 0.7171, 0.0057, 0.6038, 0.0168, 0.5729, 0.4393, 0.7574, 0.5881, 0.2978, 0.0910, 0.8530, 0.5171, 0.8481, 0.2869, 0.5211, 0.4583, 0.3767] }) -Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7711, 0.4016, 0.1489, 0.9991, 0.5236, 0.5613, 0.4596, 0.2214, 0.5016, 0.1191, 0.4541, 0.5222, 0.5986, 0.9983, 0.2310, 0.2574, 0.9207, 0.7641, 0.0694, 0.7722, 0.5875, 0.4063, 0.3378, 0.9968, 0.9350, 0.2326, 0.9584, 0.5538, 0.3051, 0.6269] }) -Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.7778, 0.8818, 0.3427, 0.1293, 0.7240, 0.4773, 0.2871, 0.3988, 0.4014, 0.2562, 0.7026, 0.0131, 0.3608, 0.1922, 0.1645, 0.1968, 0.7862, 0.9030, 0.2735, 0.8840, 0.6386, 0.5482, 0.3175, 0.8519, 0.5081, 0.5617, 0.5580, 0.3213, 0.4471] }) -Item 6: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7473, 0.3347, 0.9786, 0.4693, 0.3540, 0.1280, 0.9628, 0.2076, 0.3437, 0.4589, 0.1810, 0.1655, 0.1075, 0.3601, 0.5140, 0.5744, 0.3271, 0.1997, 0.8790, 0.5547, 0.9704, 0.5718, 0.8628, 0.8789, 0.7082, 0.9686, 0.3710, 0.9644, 0.1321, 0.4467] }) -Item 7: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5699, 0.5919, 0.7788, 0.3705, 0.3807, 0.1708, 0.3678, 0.5629, 0.0612, 0.4826, 0.0217, 0.7778, 0.2831, 0.3242, 0.5338, 0.6375, 0.1274, 0.4257, 0.2305, 0.7650, 0.4528, 0.2800, 0.0278, 0.0397, 0.6639, 0.2003, 0.5412, 0.3166, 0.5944, 0.9598] }) -Item 8: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1874, 0.7676, 0.9707, 0.5148, 0.7242, 0.4714, 0.1278, 0.2181, 0.2651, 0.5674, 0.8457, 0.5267, 0.8284, 0.0233, 0.2096, 0.3571, 0.0995, 0.3933, 0.4728, 0.0075, 0.0694, 0.8901, 0.7110, 0.5995, 0.0891, 0.7751, 0.1643, 0.8042, 0.1305, 0.3012] }) -Item 9: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8134, 0.9647, 0.6640, 0.4464, 0.7439, 0.6904, 0.3159, 0.7607, 0.3483, 0.9963, 0.2933, 0.9251, 0.1057, 0.1036, 0.1371, 0.4079, 0.0195, 0.6610, 0.8298, 0.9571, 0.5529, 0.4484, 0.5659, 0.0259, 0.9343, 0.5204, 0.4442, 0.9591, 0.9487, 0.7276] }) -Item 10: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3157, 0.8096, 0.5024, 0.5421, 0.4327, 0.3696, 0.6485, 0.7972, 0.9792, 0.9554, 0.6188, 0.0626, 0.1792, 0.6594, 0.7769, 0.8802, 0.6502, 0.6366, 0.5225, 0.8591, 0.2867, 0.7058, 0.4472, 0.7954, 0.4904, 0.5646, 0.7717, 0.1219, 0.2071, 0.2306] }) -Item 11: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6560, 0.2343, 0.4079, 0.5972, 0.5766, 0.6739, 0.2985, 0.6167, 0.4834, 0.2818, 0.3920, 0.2409, 0.9702, 0.0911, 0.2453, 0.6558, 0.9055, 0.7360, 0.3131, 0.7204, 0.0299, 0.7458, 0.0238, 0.1737, 0.7046, 0.6704, 0.8351, 0.2749, 0.9873, 0.7914] }) -Item 12: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1559, 0.1898, 0.2475, 0.4582, 0.5481, 0.5846, 0.1844, 0.4898, 0.2916, 0.2669, 0.7149, 0.9492, 0.0534, 0.0481, 0.2997, 0.6405, 0.7399, 0.4172, 0.3718, 0.2076, 0.2163, 0.1118, 0.8913, 0.1185, 0.9281, 0.7586, 0.4557, 0.3610, 0.9430, 0.8552] }) -Item 13: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1726, 0.7399, 0.0271, 0.8721, 0.8725, 0.4023, 0.1558, 0.1044, 0.2096, 0.2081, 0.1872, 0.5438, 0.1299, 0.3760, 0.7304, 0.5688, 0.6662, 0.0684, 0.8029, 0.4273, 0.5500, 0.9148, 0.7332, 0.9506, 0.5606, 0.8472, 0.4429, 0.7944, 0.9688, 0.3389] }) -Item 14: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1712, 0.6340, 0.2866, 0.6282, 0.9072, 0.7589, 0.6432, 0.8676, 0.2942, 0.4375, 0.1915, 0.0701, 0.8306, 0.1394, 0.7766, 0.2529, 0.8725, 0.4990, 0.6116, 0.6328, 0.8781, 0.0654, 0.3875, 0.9613, 0.2420, 0.2797, 0.0223, 0.1828, 0.8776, 0.9457] }) -Item 15: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6031, 0.2970, 0.9485, 0.0272, 0.4336, 0.1339, 0.2209, 0.8350, 0.2566, 0.9481, 0.3899, 0.9689, 0.2817, 0.1451, 0.1504, 0.0124, 0.1305, 0.9029, 0.4726, 0.2814, 0.2904, 0.8287, 0.2799, 0.9755, 0.4346, 0.8260, 0.5369, 0.8875, 0.3421, 0.7837] }) -Item 16: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8145, 0.6759, 0.1411, 0.7878, 0.3556, 0.8440, 0.5301, 0.2627, 0.7050, 0.4145, 0.4917, 0.9546, 0.6272, 0.6493, 0.5966, 0.0112, 0.3779, 0.1356, 0.6096, 0.3088, 0.9535, 0.9812, 0.9142, 0.2969, 0.5824, 0.0956, 0.0494, 0.7254, 0.9232, 0.1656] }) -Item 17: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0819, 0.5362, 0.8215, 0.2263, 0.3109, 0.2738, 0.6133, 0.5147, 0.9334, 0.9877, 0.5054, 0.8811, 0.4353, 0.3735, 0.7596, 0.8341, 0.5202, 0.8083, 0.0235, 0.4160, 0.1656, 0.6664, 0.2896, 0.5461, 0.7987, 0.2628, 0.7858, 0.5026, 0.4464, 0.4663] }) -Item 18: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4594, 0.9245, 0.5151, 0.7390, 0.6121, 0.5891, 0.5795, 0.6295, 0.0595, 0.6471, 0.9697, 0.1731, 0.3990, 0.0142, 0.4745, 0.6821, 0.7069, 0.6385, 0.9143, 0.0997, 0.4233, 0.4696, 0.9810, 0.8888, 0.7751, 0.1469, 0.7990, 0.6329, 0.5171, 0.2222] }) -Item 19: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1390, 0.7531, 0.3249, 0.8754, 0.9984, 0.7362, 0.0281, 0.2016, 0.9443, 0.1989, 0.1268, 0.7461, 0.2756, 0.8266, 0.0999, 0.5222, 0.6392, 0.9377, 0.9354, 0.7583, 0.1164, 0.8645, 0.8242, 0.1620, 0.2974, 0.3209, 0.9473, 0.4661, 0.5575, 0.2633] }) -Item 20: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5102, 0.8700, 0.3910, 0.0807, 0.1067, 0.0692, 0.7037, 0.4229, 0.0066, 0.1894, 0.8055, 0.1438, 0.2038, 0.1398, 0.6604, 0.3894, 0.5771, 0.8752, 0.5691, 0.2017, 0.2038, 0.2523, 0.1656, 0.7198, 0.9908, 0.9873, 0.1991, 0.2202, 0.5113, 0.3629] }) -Item 21: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6252, 0.9240, 0.7227, 0.6810, 0.2563, 0.3321, 0.2283, 0.9226, 0.8494, 0.0356, 0.9350, 0.3206, 0.0841, 0.3868, 0.5729, 0.1906, 0.8904, 0.1226, 0.3204, 0.3544, 0.6959, 0.0374, 0.7859, 0.8741, 0.1515, 0.8625, 0.3210, 0.2074, 0.0444, 0.0310] }) -Item 22: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8077, 0.3229, 0.5031, 0.0472, 0.6957, 0.9603, 0.8790, 0.2009, 0.3837, 0.1765, 0.3607, 0.8933, 0.3599, 0.4145, 0.6767, 0.4458, 0.8279, 0.0868, 0.9844, 0.4436, 0.6281, 0.0342, 0.3575, 0.3818, 0.0302, 0.0269, 0.2284, 0.6082, 0.7576, 0.2471] }) -Item 23: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9915, 0.6886, 0.1976, 0.1725, 0.6776, 0.1356, 0.3842, 0.4424, 0.6939, 0.8016, 0.5727, 0.7027, 0.1770, 0.7853, 0.6088, 0.0002, 0.2742, 0.6954, 0.8246, 0.8889, 0.8154, 0.2628, 0.3025, 0.8792, 0.3541, 0.2896, 0.9661, 0.0181, 0.2668, 0.6278] }) -Item 24: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1651, 0.4144, 0.2816, 0.0037, 0.3038, 0.2344, 0.3321, 0.1985, 0.4704, 0.3710, 0.0685, 0.8947, 0.6470, 0.8409, 0.3697, 0.2380, 0.2569, 0.0567, 0.6305, 0.1852, 0.0517, 0.5162, 0.4308, 0.9393, 0.3359, 0.8628, 0.0185, 0.6288, 0.0726, 0.3604] }) -Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0116, 0.5323, 0.3940, 0.1382, 0.1987, 0.7287, 0.4026, 0.1442, 0.5957, 0.0340, 0.5421, 0.8017, 0.8851, 0.5251, 0.2615, 0.4368, 0.9452, 0.0516, 0.7547, 0.5079, 0.7231, 0.4768, 0.3608, 0.3594, 0.9920, 0.5132, 0.9998, 0.8522, 0.5859, 0.5981] }) -Item 26: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7192, 0.5087, 0.9102, 0.0269, 0.7119, 0.7909, 0.0259, 0.6221, 0.9168, 0.1528, 0.7924, 0.9934, 0.7933, 0.8670, 0.8508, 0.3588, 0.8401, 0.6364, 0.7732, 0.4859, 0.6940, 0.7124, 0.1809, 0.9183, 0.8297, 0.2409, 0.2959, 0.3889, 0.1961, 0.8226] }) -Item 27: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3569, 0.9070, 0.8412, 0.3201, 0.6998, 0.8674, 0.5494, 0.2322, 0.2969, 0.9787, 0.2782, 0.1101, 0.0628, 0.9354, 0.1072, 0.6459, 0.4551, 0.4685, 0.0322, 0.5495, 0.2973, 0.7192, 0.0850, 0.8548, 0.7578, 0.2394, 0.7491, 0.5172, 0.6877, 0.2777] }) -Item 28: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5587, 0.8792, 0.9279, 0.4203, 0.9903, 0.7524, 0.0600, 0.9020, 0.8837, 0.1950, 0.9960, 0.5826, 0.0503, 0.0252, 0.7039, 0.5048, 0.6060, 0.1319, 0.2606, 0.1377, 0.7441, 0.0135, 0.0619, 0.4194, 0.5945, 0.9413, 0.2151, 0.5847, 0.3696, 0.0190] }) -Item 29: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6021, 0.0690, 0.1080, 0.2535, 0.6817, 0.7899, 0.3297, 0.8656, 0.1566, 0.3560, 0.0137, 0.0839, 0.7669, 0.8890, 0.1798, 0.3938, 0.1183, 0.2023, 0.1738, 0.5623, 0.8859, 0.2806, 0.1558, 0.2857, 0.9514, 0.8324, 0.4661, 0.6666, 0.4993, 0.5062] }) -Item 30: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9353, 0.1703, 0.1992, 0.7788, 0.4769, 0.5431, 0.4292, 0.5249, 0.7629, 0.9059, 0.9850, 0.5671, 0.3269, 0.8024, 0.0646, 0.4227, 0.5682, 0.6560, 0.0801, 0.9351, 0.7265, 0.8319, 0.4562, 0.3787, 0.7330, 0.9286, 0.1837, 0.8404, 0.2514, 0.5222] }) -Item 31: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7584, 0.3448, 0.7315, 0.5405, 0.6797, 0.3253, 0.8611, 0.0846, 0.0174, 0.0882, 0.5256, 0.9214, 0.1212, 0.8043, 0.2995, 0.8307, 0.4346, 0.9285, 0.9904, 0.9735, 0.4889, 0.0600, 0.7663, 0.0742, 0.9123, 0.6835, 0.4067, 0.1188, 0.7512, 0.4707] }) -Item 32: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3473, 0.1415, 0.1873, 0.7472, 0.3808, 0.6412, 0.1944, 0.5869, 0.0722, 0.3215, 0.2138, 0.9172, 0.9432, 0.7344, 0.6013, 0.2246, 0.7490, 0.7489, 0.9924, 0.5685, 0.3723, 0.2297, 0.1589, 0.7127, 0.7569, 0.8008, 0.2684, 0.8629, 0.8452, 0.6085] }) -Item 33: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3494, 0.5787, 0.8027, 0.9625, 0.5944, 0.6781, 0.4204, 0.5899, 0.0209, 0.9001, 0.1186, 0.5955, 0.7368, 0.2369, 0.7933, 0.2004, 0.7130, 0.4379, 0.5571, 0.4914, 0.2432, 0.2769, 0.7139, 0.3401, 0.8933, 0.5249, 0.9387, 0.5985, 0.6670, 0.3357] }) -Item 34: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4076, 0.8830, 0.7654, 0.5911, 0.3055, 0.0942, 0.7024, 0.9471, 0.9752, 0.4628, 0.9139, 0.2575, 0.9084, 0.8982, 0.1891, 0.1241, 0.8916, 0.9075, 0.5341, 0.5373, 0.3614, 0.1611, 0.7268, 0.7789, 0.5822, 0.5474, 0.7846, 0.3485, 0.7666, 0.6316] }) -Item 35: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3281, 0.1178, 0.0533, 0.4172, 0.3990, 0.0395, 0.8533, 0.1435, 0.9799, 0.4063, 0.4503, 0.5466, 0.5694, 0.0182, 0.0664, 0.3508, 0.3090, 0.3746, 0.9400, 0.6872, 0.7995, 0.4485, 0.9186, 0.1649, 0.5424, 0.5505, 0.5592, 0.7658, 0.9042, 0.6767] }) -Item 36: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6939, 0.0973, 0.2992, 0.4254, 0.0919, 0.8203, 0.3851, 0.0823, 0.4547, 0.4283, 0.7629, 0.1972, 0.4169, 0.2463, 0.9524, 0.0418, 0.5042, 0.8957, 0.6272, 0.7204, 0.4137, 0.3103, 0.8987, 0.1995, 0.9046, 0.4746, 0.5808, 0.6296, 0.3545, 0.5605] }) -Item 37: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6603, 0.3675, 0.1019, 0.2193, 0.3180, 0.0591, 0.9934, 0.8583, 0.7473, 0.3644, 0.7934, 0.7521, 0.2084, 0.5456, 0.2541, 0.8014, 0.8205, 0.1972, 0.2444, 0.4832, 0.7058, 0.8444, 0.4029, 0.8783, 0.4841, 0.3386, 0.0945, 0.1890, 0.4473, 0.1546] }) -Item 38: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7093, 0.6425, 0.0423, 0.0877, 0.7820, 0.6701, 0.4963, 0.1499, 0.8631, 0.8113, 0.4576, 0.0670, 0.8585, 0.2630, 0.2871, 0.4369, 0.8413, 0.1169, 0.4160, 0.5313, 0.4760, 0.2117, 0.7608, 0.2017, 0.5997, 0.4663, 0.2783, 0.8645, 0.1948, 0.5615] }) -Item 39: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4498, 0.4107, 0.9992, 0.3928, 0.4779, 0.4661, 0.1282, 0.8140, 0.7490, 0.5641, 0.2624, 0.5265, 0.6044, 0.9400, 0.3332, 0.9998, 0.7337, 0.6396, 0.9172, 0.7666, 0.9220, 0.7287, 0.8280, 0.0537, 0.1497, 0.9594, 0.1406, 0.4087, 0.7885, 0.8048] }) -Item 40: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0152, 0.9782, 0.0253, 0.6194, 0.6177, 0.6446, 0.8634, 0.8343, 0.1429, 0.6369, 0.3378, 0.6536, 0.1102, 0.7227, 0.8701, 0.1821, 0.5264, 0.5552, 0.8596, 0.0010, 0.8143, 0.8791, 0.7726, 0.7088, 0.0896, 0.3240, 0.1309, 0.5004, 0.2342, 0.0984] }) -Item 41: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7821, 0.0623, 0.1168, 0.3609, 0.6040, 0.8336, 0.0911, 0.2181, 0.7485, 0.0281, 0.8549, 0.9804, 0.1108, 0.9639, 0.4405, 0.9132, 0.7031, 0.3452, 0.5902, 0.0231, 0.6480, 0.1488, 0.9591, 0.2111, 0.4024, 0.9373, 0.5868, 0.4497, 0.0518, 0.1472] }) -Item 42: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5514, 0.2225, 0.8400, 0.2598, 0.5622, 0.6231, 0.5772, 0.0082, 0.8470, 0.9596, 0.4124, 0.5401, 0.1223, 0.6089, 0.9136, 0.9120, 0.7385, 0.5413, 0.1915, 0.2687, 0.7671, 0.6234, 0.9868, 0.7270, 0.5404, 0.6695, 0.9427, 0.5441, 0.9689, 0.6273] }) -Item 43: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8608, 0.8689, 0.4918, 0.2239, 0.9291, 0.0622, 0.6843, 0.4184, 0.4703, 0.3202, 0.7687, 0.8275, 0.9704, 0.7889, 0.7334, 0.1453, 0.3702, 0.5082, 0.8797, 0.2501, 0.9118, 0.1180, 0.7303, 0.8331, 0.1301, 0.8686, 0.6935, 0.0703, 0.2890, 0.0618] }) -Item 44: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9144, 0.0223, 0.5079, 0.6873, 0.5501, 0.2577, 0.9304, 0.3154, 0.1546, 0.3749, 0.0835, 0.8041, 0.2294, 0.4978, 0.9517, 0.5000, 0.3866, 0.3378, 0.4766, 0.1355, 0.7071, 0.8291, 0.7740, 0.3245, 0.2792, 0.0711, 0.6141, 0.7858, 0.2139, 0.3282] }) -Item 45: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9838, 0.4742, 0.5307, 0.6989, 0.9323, 0.2140, 0.1371, 0.1113, 0.0322, 0.3001, 0.1340, 0.0790, 0.2782, 0.9782, 0.5140, 0.3046, 0.1680, 0.8445, 0.9321, 0.7381, 0.3928, 0.6103, 0.6585, 0.9092, 0.4978, 0.6749, 0.5345, 0.3792, 0.8163, 0.1454] }) -Item 46: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5159, 0.9516, 0.6267, 0.1007, 0.4283, 0.1452, 0.9565, 0.4723, 0.4935, 0.7395, 0.6451, 0.7015, 0.0522, 0.5192, 0.2266, 0.2402, 0.3333, 0.9273, 0.9958, 0.2017, 0.2902, 0.3911, 0.0928, 0.5717, 0.3145, 0.7503, 0.4984, 0.9545, 0.0889, 0.8065] }) -Item 47: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4506, 0.8658, 0.1164, 0.2339, 0.2266, 0.9050, 0.5849, 0.9792, 0.5951, 0.7706, 0.7796, 0.1371, 0.4991, 0.7207, 0.9812, 0.4095, 0.2353, 0.3570, 0.2602, 0.7291, 0.5657, 0.2764, 0.5556, 0.4127, 0.1003, 0.8519, 0.6812, 0.6548, 0.0776, 0.4340] }) -Item 48: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3709, 0.3974, 0.5971, 0.7814, 0.2075, 0.5392, 0.4790, 0.5432, 0.4844, 0.3367, 0.8977, 0.0726, 0.6655, 0.6888, 0.9977, 0.4786, 0.6354, 0.0349, 0.1562, 0.4070, 0.0880, 0.9745, 0.7060, 0.0318, 0.7282, 0.6670, 0.8390, 0.2992, 0.0245, 0.3949] }) -Item 49: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1322, 0.0670, 0.3404, 0.5339, 0.0229, 0.5964, 0.5497, 0.3819, 0.6553, 0.7129, 0.9637, 0.7032, 0.9449, 0.2401, 0.9760, 0.5352, 0.2430, 0.3744, 0.8652, 0.0512, 0.1682, 0.1739, 0.4144, 0.1880, 0.5934, 0.5135, 0.7059, 0.9658, 0.2671, 0.3260] }) -Item 50: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7462, 0.9877, 0.0796, 0.4441, 0.9824, 0.6855, 0.0985, 0.0618, 0.0551, 0.5251, 0.7779, 0.1040, 0.9286, 0.7941, 0.7117, 0.4402, 0.7396, 0.3132, 0.6452, 0.9110, 0.7132, 0.4191, 0.4112, 0.5348, 0.4404, 0.8581, 0.4330, 0.8951, 0.1598, 0.8896] }) -Item 51: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3643, 0.3282, 0.2580, 0.4477, 0.5920, 0.3581, 0.3502, 0.2441, 0.1707, 0.1243, 0.0999, 0.5953, 0.2748, 0.6587, 0.7290, 0.5587, 0.7189, 0.3957, 0.6531, 0.1442, 0.7119, 0.2412, 0.8803, 0.4895, 0.3027, 0.4314, 0.0367, 0.8662, 0.6177, 0.3332] }) -Item 52: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8421, 0.8103, 0.5294, 0.6795, 0.9634, 0.3110, 0.8945, 0.1541, 0.5916, 0.1082, 0.7825, 0.1929, 0.7196, 0.3823, 0.2323, 0.3234, 0.7334, 0.4601, 0.6471, 0.6307, 0.8962, 0.2893, 0.4631, 0.1205, 0.7605, 0.8382, 0.9778, 0.0805, 0.1082, 0.8498] }) -Item 53: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5928, 0.6267, 0.1717, 0.1604, 0.6506, 0.0302, 0.8289, 0.5930, 0.9304, 0.6067, 0.5622, 0.9136, 0.8093, 0.3523, 0.6691, 0.5854, 0.3384, 0.7957, 0.4830, 0.0315, 0.7294, 0.3134, 0.4225, 0.1835, 0.5813, 0.1435, 0.7924, 0.1626, 0.4706, 0.8306] }) -Item 54: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8491, 0.4500, 0.8568, 0.1146, 0.7974, 0.5148, 0.0862, 0.6020, 0.7705, 0.6189, 0.4477, 0.1057, 0.7328, 0.6922, 0.4087, 0.5630, 0.7761, 0.5898, 0.4709, 0.1850, 0.4666, 0.4263, 0.6792, 0.7046, 0.0851, 0.2056, 0.7178, 0.4810, 0.8220, 0.2514] }) -Item 55: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6095, 0.6335, 0.4740, 0.0746, 0.3871, 0.1016, 0.6414, 0.3076, 0.5484, 0.7602, 0.6928, 0.6693, 0.5314, 0.5478, 0.8312, 0.5319, 0.0348, 0.1317, 0.2634, 0.5546, 0.5113, 0.8326, 0.7357, 0.3584, 0.4124, 0.0272, 0.6879, 0.2599, 0.7837, 0.4725] }) -Item 56: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2720, 0.7143, 0.6966, 0.9709, 0.4263, 0.7441, 0.8624, 0.1372, 0.0418, 0.7771, 0.1553, 0.3192, 0.0459, 0.4143, 0.3707, 0.6627, 0.6568, 0.8666, 0.0761, 0.8969, 0.0492, 0.2230, 0.4939, 0.5646, 0.6062, 0.4992, 0.1078, 0.5006, 0.7079, 0.6897] }) -Item 57: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1524, 0.3441, 0.2368, 0.4350, 0.5328, 0.3005, 0.7021, 0.3614, 0.6369, 0.7984, 0.1099, 0.2671, 0.9108, 0.9017, 0.3862, 0.5157, 0.4510, 0.5185, 0.4465, 0.2198, 0.5385, 0.3734, 0.2170, 0.7698, 0.4575, 0.7720, 0.0857, 0.3963, 0.8207, 0.6391] }) -Item 58: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4278, 0.5749, 0.9222, 0.8668, 0.5708, 0.7825, 0.1484, 0.1654, 0.7721, 0.1671, 0.9193, 0.1164, 0.5486, 0.1205, 0.7077, 0.9949, 0.8732, 0.9675, 0.1991, 0.5403, 0.0405, 0.1756, 0.8858, 0.4640, 0.9576, 0.3734, 0.2786, 0.0953, 0.8583, 0.4868] }) -Item 59: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6227, 0.1202, 0.9715, 0.3936, 0.3423, 0.7479, 0.6526, 0.1867, 0.5568, 0.0922, 0.4923, 0.6243, 0.1646, 0.4228, 0.6323, 0.3442, 0.2133, 0.7990, 0.4992, 0.0482, 0.6260, 0.0885, 0.9299, 0.4293, 0.7760, 0.7470, 0.9417, 0.0020, 0.4020, 0.9238] }) -Item 60: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5174, 0.1043, 0.5020, 0.7082, 0.0108, 0.7768, 0.2805, 0.5635, 0.1165, 0.3446, 0.9755, 0.1436, 0.9761, 0.9216, 0.2756, 0.9967, 0.7486, 0.6232, 0.4070, 0.1137, 0.5368, 0.9245, 0.2500, 0.0250, 0.7935, 0.0007, 0.7585, 0.4630, 0.8483, 0.5910] }) -Item 61: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1347, 0.7000, 0.0530, 0.4959, 0.8227, 0.9831, 0.5433, 0.5201, 0.7924, 0.3847, 0.4618, 0.7762, 0.3668, 0.3105, 0.9197, 0.1617, 0.8675, 0.7686, 0.5316, 0.6798, 0.9551, 0.6542, 0.3051, 0.9479, 0.5483, 0.9541, 0.1520, 0.7273, 0.3857, 0.7027] }) -Item 62: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7594, 0.3160, 0.8848, 0.2336, 0.2080, 0.9798, 0.2609, 0.3579, 0.9043, 0.5035, 0.2169, 0.5686, 0.4564, 0.3243, 0.7777, 0.2940, 0.1526, 0.5762, 0.5091, 0.0906, 0.6799, 0.0227, 0.0664, 0.4943, 0.6480, 0.2907, 0.1529, 0.9099, 0.5285, 0.5710] }) -Item 63: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9221, 0.3578, 0.3207, 0.9945, 0.9288, 0.4608, 0.3001, 0.0296, 0.4678, 0.7422, 0.1848, 0.2948, 0.9534, 0.1223, 0.3098, 0.1023, 0.2691, 0.5410, 0.6640, 0.7102, 0.6446, 0.8912, 0.2151, 0.8837, 0.4653, 0.7481, 0.9103, 0.1118, 0.4085, 0.3791] }) -Item 64: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7278, 0.0499, 0.4536, 0.3295, 0.8839, 0.5060, 0.5773, 0.3133, 0.2521, 0.6842, 0.1135, 0.3192, 0.4543, 0.1061, 0.2675, 0.4425, 0.3531, 0.4401, 0.8975, 0.9026, 0.3267, 0.2830, 0.9081, 0.1656, 0.4371, 0.8825, 0.9425, 0.4156, 0.8326, 0.5255] }) -Item 65: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5677, 0.1247, 0.4928, 0.4097, 0.8433, 0.9238, 0.7848, 0.4437, 0.4696, 0.9886, 0.1417, 0.4364, 0.4471, 0.6284, 0.2548, 0.7256, 0.0324, 0.6394, 0.6730, 0.4224, 0.6438, 0.4449, 0.2229, 0.5860, 0.9926, 0.9454, 0.8358, 0.2026, 0.9971, 0.0270] }) -Item 66: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3202, 0.9724, 0.0590, 0.9146, 0.0865, 0.5347, 0.9236, 0.5211, 0.4621, 0.4500, 0.7957, 0.7486, 0.4893, 0.3373, 0.8893, 0.0299, 0.4020, 0.9628, 0.3753, 0.2408, 0.2653, 0.7672, 0.0966, 0.2104, 0.2312, 0.3469, 0.6364, 0.3652, 0.2086, 0.5423] }) -Item 67: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5299, 0.5421, 0.8194, 0.1018, 0.5426, 0.9350, 0.3228, 0.7979, 0.7473, 0.1118, 0.2515, 0.0225, 0.1534, 0.4736, 0.5641, 0.1919, 0.5379, 0.6815, 0.8908, 0.6717, 0.8996, 0.8896, 0.0856, 0.5125, 0.2434, 0.0755, 0.5839, 0.1712, 0.9467, 0.6559] }) -Item 68: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7403, 0.1475, 0.8585, 0.3990, 0.5766, 0.0263, 0.2665, 0.6317, 0.8026, 0.0227, 0.7350, 0.4161, 0.7097, 0.5355, 0.9565, 0.1053, 0.3177, 0.5186, 0.5486, 0.9838, 0.6668, 0.1541, 0.6342, 0.9678, 0.9183, 0.4694, 0.4923, 0.5748, 0.9564, 0.5665] }) -Item 69: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6163, 0.7762, 0.4365, 0.6713, 0.5647, 0.3449, 0.6615, 0.9430, 0.5941, 0.3563, 0.8698, 0.7265, 0.7299, 0.6801, 0.1361, 0.3680, 0.3622, 0.9737, 0.9003, 0.5171, 0.3981, 0.6434, 0.2324, 0.3246, 0.5110, 0.6495, 0.7496, 0.4635, 0.8431, 0.4580] }) -Item 70: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0981, 0.2398, 0.2595, 0.6075, 0.1605, 0.7487, 0.1070, 0.8330, 0.7908, 0.0203, 0.3900, 0.9755, 0.1293, 0.2872, 0.3445, 0.1459, 0.9796, 0.7792, 0.7915, 0.8245, 0.0562, 0.0983, 0.1049, 0.9047, 0.3248, 0.8740, 0.7597, 0.5026, 0.5782, 0.1187] }) -Item 71: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6478, 0.9371, 0.2378, 0.4279, 0.1400, 0.2146, 0.3193, 0.7330, 0.7932, 0.7295, 0.9781, 0.8327, 0.1761, 0.1474, 0.8589, 0.0800, 0.5614, 0.4009, 0.9556, 0.7030, 0.7711, 0.5020, 0.7911, 0.6455, 0.6838, 0.0809, 0.9930, 0.7730, 0.2199, 0.3138] }) -Item 72: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9586, 0.8715, 0.0659, 0.6144, 0.4995, 0.6693, 0.1415, 0.7556, 0.9350, 0.9924, 0.7515, 0.3790, 0.7403, 0.8305, 0.7240, 0.5444, 0.1211, 0.1179, 0.4767, 0.5844, 0.9410, 0.8368, 0.3227, 0.7505, 0.1099, 0.8573, 0.7832, 0.9095, 0.3476, 0.3119] }) -Item 73: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4640, 0.7218, 0.0154, 0.0829, 0.4829, 0.5139, 0.4344, 0.5872, 0.2770, 0.3745, 0.8848, 0.3805, 0.3193, 0.6432, 0.2470, 0.6752, 0.1428, 0.8068, 0.2241, 0.2959, 0.8348, 0.8973, 0.1213, 0.7855, 0.3006, 0.1461, 0.5417, 0.2111, 0.8150, 0.1504] }) -Item 74: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9907, 0.6582, 0.3640, 0.9777, 0.6001, 0.3023, 0.3154, 0.2637, 0.7202, 0.7507, 0.8958, 0.7089, 0.7351, 0.5297, 0.2843, 0.1364, 0.3376, 0.3935, 0.5760, 0.3888, 0.2231, 0.5688, 0.4859, 0.2078, 0.1466, 0.3177, 0.9709, 0.6116, 0.7154, 0.7503] }) -Item 75: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3639, 0.3210, 0.3718, 0.7818, 0.6264, 0.2524, 0.6018, 0.4059, 0.9744, 0.3568, 0.4892, 0.7652, 0.4566, 0.0884, 0.4999, 0.1489, 0.3169, 0.9850, 0.7573, 0.3145, 0.0845, 0.5885, 0.2073, 0.0937, 0.4518, 0.0957, 0.1403, 0.6862, 0.4749, 0.3771] }) -Item 76: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6910, 0.9733, 0.3937, 0.8154, 0.2171, 0.7616, 0.3415, 0.4227, 0.6408, 0.0355, 0.6109, 0.3701, 0.1244, 0.9890, 0.8110, 0.6992, 0.4823, 0.2457, 0.2802, 0.2739, 0.7232, 0.8222, 0.4219, 0.5532, 0.3637, 0.2164, 0.9099, 0.0570, 0.6572, 0.7077] }) -Item 77: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6003, 0.7202, 0.3803, 0.5052, 0.4006, 0.6708, 0.0438, 0.8432, 0.8772, 0.6849, 0.4162, 0.6977, 0.3395, 0.6632, 0.0256, 0.3843, 0.0968, 0.0266, 0.0174, 0.0328, 0.1925, 0.2962, 0.7774, 0.8711, 0.5001, 0.5312, 0.5863, 0.1766, 0.7026, 0.8613] }) -Item 78: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8030, 0.0396, 0.0641, 0.3958, 0.3427, 0.9163, 0.0129, 0.4500, 0.8504, 0.8711, 0.5896, 0.2410, 0.2023, 0.1514, 0.1462, 0.2485, 0.1558, 0.0196, 0.0312, 0.3130, 0.1591, 0.7236, 0.5735, 0.2308, 0.5460, 0.3421, 0.0572, 0.6052, 0.2613, 0.8674] }) -Item 79: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8093, 0.7442, 0.3738, 0.9164, 0.5923, 0.7353, 0.5379, 0.6815, 0.5925, 0.7954, 0.9246, 0.9350, 0.7121, 0.9404, 0.7657, 0.6102, 0.3958, 0.8172, 0.1868, 0.5326, 0.0352, 0.5308, 0.4925, 0.8650, 0.7519, 0.6908, 0.0251, 0.4628, 0.2902, 0.3531] }) -Item 80: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6515, 0.6402, 0.4825, 0.1251, 0.1569, 0.9048, 0.3695, 0.0092, 0.1574, 0.1528, 0.7839, 0.6874, 0.2127, 0.6863, 0.8016, 0.6897, 0.3058, 0.5211, 0.5270, 0.0955, 0.5762, 0.0805, 0.3199, 0.9090, 0.6208, 0.8156, 0.2419, 0.3181, 0.0620, 0.0985] }) -Item 81: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2370, 0.7334, 0.4755, 0.8921, 0.1448, 0.2971, 0.2116, 0.1124, 0.7297, 0.2965, 0.6643, 0.1020, 0.4147, 0.5717, 0.5803, 0.1797, 0.3495, 0.7602, 0.5506, 0.6812, 0.6435, 0.2448, 0.0042, 0.2285, 0.7938, 0.3224, 0.0413, 0.3119, 0.0898, 0.8277] }) -Item 82: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5256, 0.7737, 0.1071, 0.1049, 0.5131, 0.1703, 0.7922, 0.1135, 0.4165, 0.0898, 0.6449, 0.3926, 0.8879, 0.8974, 0.3679, 0.3870, 0.0928, 0.5963, 0.2508, 0.5844, 0.0041, 0.8819, 0.9161, 0.9307, 0.1548, 0.3934, 0.6088, 0.6955, 0.1229, 0.0494] }) -Item 83: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3071, 0.2166, 0.0566, 0.5153, 0.8628, 0.9601, 0.6390, 0.4052, 0.2759, 0.4989, 0.2220, 0.9184, 0.5489, 0.0201, 0.1469, 0.1779, 0.1827, 0.6549, 0.8811, 0.2189, 0.8447, 0.8635, 0.3256, 0.5651, 0.0724, 0.9818, 0.3070, 0.3512, 0.2026, 0.7946] }) -Item 84: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7826, 0.2989, 0.7770, 0.4571, 0.1601, 0.9667, 0.6717, 0.1903, 0.1169, 0.8590, 0.7849, 0.0099, 0.9294, 0.7851, 0.9878, 0.6203, 0.3452, 0.2254, 0.5425, 0.6924, 0.4998, 0.1669, 0.2662, 0.0321, 0.0579, 0.4807, 0.8837, 0.6801, 0.4343, 0.3297] }) -Item 85: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4718, 0.3682, 0.4350, 0.3129, 0.1289, 0.7526, 0.8249, 0.5640, 0.9296, 0.8479, 0.1283, 0.2632, 0.3694, 0.7950, 0.0273, 0.9558, 0.6983, 0.7563, 0.1747, 0.2701, 0.7434, 0.5281, 0.8098, 0.8743, 0.5457, 0.8847, 0.5409, 0.8640, 0.9360, 0.3401] }) -Item 86: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0498, 0.3107, 0.7264, 0.3119, 0.9096, 0.1574, 0.0399, 0.6895, 0.0600, 0.7256, 0.9863, 0.2722, 0.1111, 0.3469, 0.0548, 0.3537, 0.4830, 0.3201, 0.7230, 0.6671, 0.4246, 0.9173, 0.8003, 0.2369, 0.6229, 0.1692, 0.5419, 0.5009, 0.3584, 0.5946] }) -Item 87: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2909, 0.8867, 0.3238, 0.4342, 0.3491, 0.4305, 0.8452, 0.0936, 0.1220, 0.3452, 0.1736, 0.7292, 0.1167, 0.4834, 0.1016, 0.2478, 0.2028, 0.9520, 0.2854, 0.3369, 0.4047, 0.7799, 0.3272, 0.9902, 0.0833, 0.1636, 0.7756, 0.8540, 0.4763, 0.6465] }) -Item 88: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8769, 0.4665, 0.6818, 0.3210, 0.8711, 0.3130, 0.2871, 0.8996, 0.6896, 0.5484, 0.6041, 0.5275, 0.5048, 0.4548, 0.1419, 0.3088, 0.3050, 0.1076, 0.1253, 0.7856, 0.7688, 0.6142, 0.4916, 0.9840, 0.6736, 0.3734, 0.8886, 0.9260, 0.3265, 0.1584] }) -Item 89: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6270, 0.3774, 0.7874, 0.7272, 0.4240, 0.1508, 0.0360, 0.5710, 0.2254, 0.0950, 0.4095, 0.3464, 0.8501, 0.4572, 0.3022, 0.0795, 0.0845, 0.0360, 0.3839, 0.5728, 0.5501, 0.4156, 0.9986, 0.5015, 0.4543, 0.9016, 0.3680, 0.3139, 0.7894, 0.8085] }) -Item 90: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4460, 0.4258, 0.3236, 0.3679, 0.9002, 0.2569, 0.4921, 0.9407, 0.7830, 0.2773, 0.1046, 0.4353, 0.1210, 0.6273, 0.8741, 0.4641, 0.6691, 0.8989, 0.4624, 0.6183, 0.8460, 0.0977, 0.1144, 0.8153, 0.3813, 0.3064, 0.7226, 0.7813, 0.2815, 0.8862] }) -Item 91: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8560, 0.9670, 0.3969, 0.5008, 0.1567, 0.7420, 0.0072, 0.1891, 0.9690, 0.0387, 0.9630, 0.3936, 0.1125, 0.4424, 0.0855, 0.7900, 0.8814, 0.8120, 0.2263, 0.8761, 0.2498, 0.8021, 0.0934, 0.3020, 0.8538, 0.9658, 0.8355, 0.4396, 0.6029, 0.0986] }) -Item 92: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3994, 0.3790, 0.3149, 0.3094, 0.6979, 0.4079, 0.0662, 0.5442, 0.6009, 0.6694, 0.1466, 0.6650, 0.7175, 0.1460, 0.3400, 0.1159, 0.0663, 0.4503, 0.7431, 0.2036, 0.5143, 0.0838, 0.1408, 0.2708, 0.6502, 0.7476, 0.0027, 0.2546, 0.4029, 0.6528] }) -Item 93: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5034, 0.4728, 0.9950, 0.7630, 0.0154, 0.4453, 0.6893, 0.6996, 0.0246, 0.0245, 0.1628, 0.7592, 0.4518, 0.3659, 0.5843, 0.9270, 0.5562, 0.2315, 0.2196, 0.7557, 0.7544, 0.5390, 0.9575, 0.7762, 0.6434, 0.8592, 0.7840, 0.1388, 0.2920, 0.1637] }) -Item 94: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1804, 0.0669, 0.8639, 0.6324, 0.8798, 0.3417, 0.1164, 0.5756, 0.9767, 0.7200, 0.3680, 0.3990, 0.8036, 0.7097, 0.1595, 0.6541, 0.0915, 0.0054, 0.5892, 0.1158, 0.9738, 0.8020, 0.1475, 0.8025, 0.6771, 0.6167, 0.3474, 0.9959, 0.1469, 0.2971] }) -Item 95: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8157, 0.8688, 0.2659, 0.6080, 0.5274, 0.1883, 0.7562, 0.8511, 0.6928, 0.8151, 0.3489, 0.0685, 0.7027, 0.6863, 0.5350, 0.2973, 0.2547, 0.5162, 0.4468, 0.8645, 0.3545, 0.8380, 0.6886, 0.7425, 0.8855, 0.7851, 0.0179, 0.2890, 0.2872, 0.6186] }) -Item 96: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1112, 0.3992, 0.0741, 0.7318, 0.9446, 0.2970, 0.9245, 0.1696, 0.6283, 0.9061, 0.5514, 0.1520, 0.7314, 0.0530, 0.8088, 0.5043, 0.5434, 0.5908, 0.8721, 0.2087, 0.1389, 0.3781, 0.8195, 0.4335, 0.9500, 0.3030, 0.6022, 0.9074, 0.8735, 0.2674] }) -Item 97: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.4087, 0.7257, 0.3157, 0.9526, 0.5290, 0.5818, 0.5460, 0.1906, 0.9422, 0.7444, 0.2185, 0.1185, 0.7708, 0.8692, 0.3749, 0.7770, 0.2513, 0.6984, 0.8243, 0.4377, 0.5163, 0.2195, 0.0276, 0.8162, 0.6553, 0.7764, 0.3520, 0.9825, 0.5628] }) -Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5324, 0.0597, 0.9226, 0.4007, 0.0213, 0.2651, 0.8515, 0.9985, 0.8066, 0.3377, 0.7841, 0.5174, 0.3105, 0.4018, 0.4617, 0.8729, 0.0545, 0.3308, 0.1648, 0.5622, 0.8622, 0.4060, 0.9234, 0.4541, 0.7880, 0.6254, 0.6776, 0.7990, 0.1873, 0.5412] }) -Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, 0.9164, 0.5001, 0.9171, 0.7590, 0.2505, 0.0364, 0.8099, 0.6270, 0.6354, 0.1587, 0.4080, 0.2992, 0.0561, 0.0112, 0.4348, 0.1503, 0.6197, 0.5171, 0.2871, 0.8704] }) +Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5919, 0.9953, 0.7271, 0.7734, 0.5761, 0.8882, 0.8497, 0.0817, 0.3989, 0.3384, "other ..."] }) +Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4199, 0.2620, 0.2655, 0.8414, 0.0192, 0.3828, 0.2561, 0.2692, 0.0368, 0.4624, "other ..."] }) +Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2240, 0.0136, 0.1967, 0.7482, 0.6849, 0.7949, 0.0809, 0.5504, 0.6339, 0.6978, "other ..."] }) +Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0001, 0.7870, 0.9674, 0.4568, 0.5250, 0.2701, 0.2417, 0.8742, 0.6869, 0.8759, "other ..."] }) +Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7711, 0.4016, 0.1489, 0.9991, 0.5236, 0.5613, 0.4596, 0.2214, 0.5016, 0.1191, "other ..."] }) +Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.7778, 0.8818, 0.3427, 0.1293, 0.7240, 0.4773, 0.2871, 0.3988, 0.4014, "other ..."] }) +Item 6: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7473, 0.3347, 0.9786, 0.4693, 0.3540, 0.1280, 0.9628, 0.2076, 0.3437, 0.4589, "other ..."] }) +Item 7: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5699, 0.5919, 0.7788, 0.3705, 0.3807, 0.1708, 0.3678, 0.5629, 0.0612, 0.4826, "other ..."] }) +Item 8: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1874, 0.7676, 0.9707, 0.5148, 0.7242, 0.4714, 0.1278, 0.2181, 0.2651, 0.5674, "other ..."] }) +Item 9: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8134, 0.9647, 0.6640, 0.4464, 0.7439, 0.6904, 0.3159, 0.7607, 0.3483, 0.9963, "other ..."] }) +Item 10: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3157, 0.8096, 0.5024, 0.5421, 0.4327, 0.3696, 0.6485, 0.7972, 0.9792, 0.9554, "other ..."] }) +Item 11: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6560, 0.2343, 0.4079, 0.5972, 0.5766, 0.6739, 0.2985, 0.6167, 0.4834, 0.2818, "other ..."] }) +Item 12: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1559, 0.1898, 0.2475, 0.4582, 0.5481, 0.5846, 0.1844, 0.4898, 0.2916, 0.2669, "other ..."] }) +Item 13: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1726, 0.7399, 0.0271, 0.8721, 0.8725, 0.4023, 0.1558, 0.1044, 0.2096, 0.2081, "other ..."] }) +Item 14: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1712, 0.6340, 0.2866, 0.6282, 0.9072, 0.7589, 0.6432, 0.8676, 0.2942, 0.4375, "other ..."] }) +Item 15: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6031, 0.2970, 0.9485, 0.0272, 0.4336, 0.1339, 0.2209, 0.8350, 0.2566, 0.9481, "other ..."] }) +Item 16: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8145, 0.6759, 0.1411, 0.7878, 0.3556, 0.8440, 0.5301, 0.2627, 0.7050, 0.4145, "other ..."] }) +Item 17: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0819, 0.5362, 0.8215, 0.2263, 0.3109, 0.2738, 0.6133, 0.5147, 0.9334, 0.9877, "other ..."] }) +Item 18: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4594, 0.9245, 0.5151, 0.7390, 0.6121, 0.5891, 0.5795, 0.6295, 0.0595, 0.6471, "other ..."] }) +Item 19: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1390, 0.7531, 0.3249, 0.8754, 0.9984, 0.7362, 0.0281, 0.2016, 0.9443, 0.1989, "other ..."] }) +Item 20: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5102, 0.8700, 0.3910, 0.0807, 0.1067, 0.0692, 0.7037, 0.4229, 0.0066, 0.1894, "other ..."] }) +Item 21: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6252, 0.9240, 0.7227, 0.6810, 0.2563, 0.3321, 0.2283, 0.9226, 0.8494, 0.0356, "other ..."] }) +Item 22: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8077, 0.3229, 0.5031, 0.0472, 0.6957, 0.9603, 0.8790, 0.2009, 0.3837, 0.1765, "other ..."] }) +Item 23: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9915, 0.6886, 0.1976, 0.1725, 0.6776, 0.1356, 0.3842, 0.4424, 0.6939, 0.8016, "other ..."] }) +Item 24: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1651, 0.4144, 0.2816, 0.0037, 0.3038, 0.2344, 0.3321, 0.1985, 0.4704, 0.3710, "other ..."] }) +Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0116, 0.5323, 0.3940, 0.1382, 0.1987, 0.7287, 0.4026, 0.1442, 0.5957, 0.0340, "other ..."] }) +Item 26: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7192, 0.5087, 0.9102, 0.0269, 0.7119, 0.7909, 0.0259, 0.6221, 0.9168, 0.1528, "other ..."] }) +Item 27: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3569, 0.9070, 0.8412, 0.3201, 0.6998, 0.8674, 0.5494, 0.2322, 0.2969, 0.9787, "other ..."] }) +Item 28: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5587, 0.8792, 0.9279, 0.4203, 0.9903, 0.7524, 0.0600, 0.9020, 0.8837, 0.1950, "other ..."] }) +Item 29: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6021, 0.0690, 0.1080, 0.2535, 0.6817, 0.7899, 0.3297, 0.8656, 0.1566, 0.3560, "other ..."] }) +Item 30: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9353, 0.1703, 0.1992, 0.7788, 0.4769, 0.5431, 0.4292, 0.5249, 0.7629, 0.9059, "other ..."] }) +Item 31: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7584, 0.3448, 0.7315, 0.5405, 0.6797, 0.3253, 0.8611, 0.0846, 0.0174, 0.0882, "other ..."] }) +Item 32: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3473, 0.1415, 0.1873, 0.7472, 0.3808, 0.6412, 0.1944, 0.5869, 0.0722, 0.3215, "other ..."] }) +Item 33: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3494, 0.5787, 0.8027, 0.9625, 0.5944, 0.6781, 0.4204, 0.5899, 0.0209, 0.9001, "other ..."] }) +Item 34: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4076, 0.8830, 0.7654, 0.5911, 0.3055, 0.0942, 0.7024, 0.9471, 0.9752, 0.4628, "other ..."] }) +Item 35: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3281, 0.1178, 0.0533, 0.4172, 0.3990, 0.0395, 0.8533, 0.1435, 0.9799, 0.4063, "other ..."] }) +Item 36: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6939, 0.0973, 0.2992, 0.4254, 0.0919, 0.8203, 0.3851, 0.0823, 0.4547, 0.4283, "other ..."] }) +Item 37: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6603, 0.3675, 0.1019, 0.2193, 0.3180, 0.0591, 0.9934, 0.8583, 0.7473, 0.3644, "other ..."] }) +Item 38: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7093, 0.6425, 0.0423, 0.0877, 0.7820, 0.6701, 0.4963, 0.1499, 0.8631, 0.8113, "other ..."] }) +Item 39: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4498, 0.4107, 0.9992, 0.3928, 0.4779, 0.4661, 0.1282, 0.8140, 0.7490, 0.5641, "other ..."] }) +Item 40: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0152, 0.9782, 0.0253, 0.6194, 0.6177, 0.6446, 0.8634, 0.8343, 0.1429, 0.6369, "other ..."] }) +Item 41: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7821, 0.0623, 0.1168, 0.3609, 0.6040, 0.8336, 0.0911, 0.2181, 0.7485, 0.0281, "other ..."] }) +Item 42: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5514, 0.2225, 0.8400, 0.2598, 0.5622, 0.6231, 0.5772, 0.0082, 0.8470, 0.9596, "other ..."] }) +Item 43: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8608, 0.8689, 0.4918, 0.2239, 0.9291, 0.0622, 0.6843, 0.4184, 0.4703, 0.3202, "other ..."] }) +Item 44: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9144, 0.0223, 0.5079, 0.6873, 0.5501, 0.2577, 0.9304, 0.3154, 0.1546, 0.3749, "other ..."] }) +Item 45: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9838, 0.4742, 0.5307, 0.6989, 0.9323, 0.2140, 0.1371, 0.1113, 0.0322, 0.3001, "other ..."] }) +Item 46: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5159, 0.9516, 0.6267, 0.1007, 0.4283, 0.1452, 0.9565, 0.4723, 0.4935, 0.7395, "other ..."] }) +Item 47: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4506, 0.8658, 0.1164, 0.2339, 0.2266, 0.9050, 0.5849, 0.9792, 0.5951, 0.7706, "other ..."] }) +Item 48: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3709, 0.3974, 0.5971, 0.7814, 0.2075, 0.5392, 0.4790, 0.5432, 0.4844, 0.3367, "other ..."] }) +Item 49: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1322, 0.0670, 0.3404, 0.5339, 0.0229, 0.5964, 0.5497, 0.3819, 0.6553, 0.7129, "other ..."] }) +Item 50: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7462, 0.9877, 0.0796, 0.4441, 0.9824, 0.6855, 0.0985, 0.0618, 0.0551, 0.5251, "other ..."] }) +Item 51: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3643, 0.3282, 0.2580, 0.4477, 0.5920, 0.3581, 0.3502, 0.2441, 0.1707, 0.1243, "other ..."] }) +Item 52: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8421, 0.8103, 0.5294, 0.6795, 0.9634, 0.3110, 0.8945, 0.1541, 0.5916, 0.1082, "other ..."] }) +Item 53: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5928, 0.6267, 0.1717, 0.1604, 0.6506, 0.0302, 0.8289, 0.5930, 0.9304, 0.6067, "other ..."] }) +Item 54: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8491, 0.4500, 0.8568, 0.1146, 0.7974, 0.5148, 0.0862, 0.6020, 0.7705, 0.6189, "other ..."] }) +Item 55: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6095, 0.6335, 0.4740, 0.0746, 0.3871, 0.1016, 0.6414, 0.3076, 0.5484, 0.7602, "other ..."] }) +Item 56: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2720, 0.7143, 0.6966, 0.9709, 0.4263, 0.7441, 0.8624, 0.1372, 0.0418, 0.7771, "other ..."] }) +Item 57: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1524, 0.3441, 0.2368, 0.4350, 0.5328, 0.3005, 0.7021, 0.3614, 0.6369, 0.7984, "other ..."] }) +Item 58: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4278, 0.5749, 0.9222, 0.8668, 0.5708, 0.7825, 0.1484, 0.1654, 0.7721, 0.1671, "other ..."] }) +Item 59: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6227, 0.1202, 0.9715, 0.3936, 0.3423, 0.7479, 0.6526, 0.1867, 0.5568, 0.0922, "other ..."] }) +Item 60: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5174, 0.1043, 0.5020, 0.7082, 0.0108, 0.7768, 0.2805, 0.5635, 0.1165, 0.3446, "other ..."] }) +Item 61: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1347, 0.7000, 0.0530, 0.4959, 0.8227, 0.9831, 0.5433, 0.5201, 0.7924, 0.3847, "other ..."] }) +Item 62: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7594, 0.3160, 0.8848, 0.2336, 0.2080, 0.9798, 0.2609, 0.3579, 0.9043, 0.5035, "other ..."] }) +Item 63: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9221, 0.3578, 0.3207, 0.9945, 0.9288, 0.4608, 0.3001, 0.0296, 0.4678, 0.7422, "other ..."] }) +Item 64: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7278, 0.0499, 0.4536, 0.3295, 0.8839, 0.5060, 0.5773, 0.3133, 0.2521, 0.6842, "other ..."] }) +Item 65: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5677, 0.1247, 0.4928, 0.4097, 0.8433, 0.9238, 0.7848, 0.4437, 0.4696, 0.9886, "other ..."] }) +Item 66: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3202, 0.9724, 0.0590, 0.9146, 0.0865, 0.5347, 0.9236, 0.5211, 0.4621, 0.4500, "other ..."] }) +Item 67: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5299, 0.5421, 0.8194, 0.1018, 0.5426, 0.9350, 0.3228, 0.7979, 0.7473, 0.1118, "other ..."] }) +Item 68: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7403, 0.1475, 0.8585, 0.3990, 0.5766, 0.0263, 0.2665, 0.6317, 0.8026, 0.0227, "other ..."] }) +Item 69: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6163, 0.7762, 0.4365, 0.6713, 0.5647, 0.3449, 0.6615, 0.9430, 0.5941, 0.3563, "other ..."] }) +Item 70: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0981, 0.2398, 0.2595, 0.6075, 0.1605, 0.7487, 0.1070, 0.8330, 0.7908, 0.0203, "other ..."] }) +Item 71: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6478, 0.9371, 0.2378, 0.4279, 0.1400, 0.2146, 0.3193, 0.7330, 0.7932, 0.7295, "other ..."] }) +Item 72: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9586, 0.8715, 0.0659, 0.6144, 0.4995, 0.6693, 0.1415, 0.7556, 0.9350, 0.9924, "other ..."] }) +Item 73: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4640, 0.7218, 0.0154, 0.0829, 0.4829, 0.5139, 0.4344, 0.5872, 0.2770, 0.3745, "other ..."] }) +Item 74: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.9907, 0.6582, 0.3640, 0.9777, 0.6001, 0.3023, 0.3154, 0.2637, 0.7202, 0.7507, "other ..."] }) +Item 75: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3639, 0.3210, 0.3718, 0.7818, 0.6264, 0.2524, 0.6018, 0.4059, 0.9744, 0.3568, "other ..."] }) +Item 76: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6910, 0.9733, 0.3937, 0.8154, 0.2171, 0.7616, 0.3415, 0.4227, 0.6408, 0.0355, "other ..."] }) +Item 77: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6003, 0.7202, 0.3803, 0.5052, 0.4006, 0.6708, 0.0438, 0.8432, 0.8772, 0.6849, "other ..."] }) +Item 78: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8030, 0.0396, 0.0641, 0.3958, 0.3427, 0.9163, 0.0129, 0.4500, 0.8504, 0.8711, "other ..."] }) +Item 79: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8093, 0.7442, 0.3738, 0.9164, 0.5923, 0.7353, 0.5379, 0.6815, 0.5925, 0.7954, "other ..."] }) +Item 80: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6515, 0.6402, 0.4825, 0.1251, 0.1569, 0.9048, 0.3695, 0.0092, 0.1574, 0.1528, "other ..."] }) +Item 81: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2370, 0.7334, 0.4755, 0.8921, 0.1448, 0.2971, 0.2116, 0.1124, 0.7297, 0.2965, "other ..."] }) +Item 82: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5256, 0.7737, 0.1071, 0.1049, 0.5131, 0.1703, 0.7922, 0.1135, 0.4165, 0.0898, "other ..."] }) +Item 83: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3071, 0.2166, 0.0566, 0.5153, 0.8628, 0.9601, 0.6390, 0.4052, 0.2759, 0.4989, "other ..."] }) +Item 84: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.7826, 0.2989, 0.7770, 0.4571, 0.1601, 0.9667, 0.6717, 0.1903, 0.1169, 0.8590, "other ..."] }) +Item 85: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4718, 0.3682, 0.4350, 0.3129, 0.1289, 0.7526, 0.8249, 0.5640, 0.9296, 0.8479, "other ..."] }) +Item 86: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0498, 0.3107, 0.7264, 0.3119, 0.9096, 0.1574, 0.0399, 0.6895, 0.0600, 0.7256, "other ..."] }) +Item 87: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.2909, 0.8867, 0.3238, 0.4342, 0.3491, 0.4305, 0.8452, 0.0936, 0.1220, 0.3452, "other ..."] }) +Item 88: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8769, 0.4665, 0.6818, 0.3210, 0.8711, 0.3130, 0.2871, 0.8996, 0.6896, 0.5484, "other ..."] }) +Item 89: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.6270, 0.3774, 0.7874, 0.7272, 0.4240, 0.1508, 0.0360, 0.5710, 0.2254, 0.0950, "other ..."] }) +Item 90: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.4460, 0.4258, 0.3236, 0.3679, 0.9002, 0.2569, 0.4921, 0.9407, 0.7830, 0.2773, "other ..."] }) +Item 91: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8560, 0.9670, 0.3969, 0.5008, 0.1567, 0.7420, 0.0072, 0.1891, 0.9690, 0.0387, "other ..."] }) +Item 92: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.3994, 0.3790, 0.3149, 0.3094, 0.6979, 0.4079, 0.0662, 0.5442, 0.6009, 0.6694, "other ..."] }) +Item 93: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5034, 0.4728, 0.9950, 0.7630, 0.0154, 0.4453, 0.6893, 0.6996, 0.0246, 0.0245, "other ..."] }) +Item 94: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1804, 0.0669, 0.8639, 0.6324, 0.8798, 0.3417, 0.1164, 0.5756, 0.9767, 0.7200, "other ..."] }) +Item 95: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8157, 0.8688, 0.2659, 0.6080, 0.5274, 0.1883, 0.7562, 0.8511, 0.6928, 0.8151, "other ..."] }) +Item 96: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1112, 0.3992, 0.0741, 0.7318, 0.9446, 0.2970, 0.9245, 0.1696, 0.6283, 0.9061, "other ..."] }) +Item 97: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0850, 0.4087, 0.7257, 0.3157, 0.9526, 0.5290, 0.5818, 0.5460, 0.1906, 0.9422, "other ..."] }) +Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5324, 0.0597, 0.9226, 0.4007, 0.0213, 0.2651, 0.8515, 0.9985, 0.8066, 0.3377, "other ..."] }) +Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [6, 14, 18, 21, 22, 23, 26, 28, 40, 42, 43, 44, 47, 51, 54, 59, 61, 62, 68, 73, 80, 82, 83, 87, 90] }) Tree 1: Descendants(Descendants { descendants: [8, 10, 15, 17, 20, 24, 34, 37, 46, 49, 53, 55, 57, 66, 71, 75, 77, 79, 92, 95, 98] }) -Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, 0.1109, 0.2686, 0.0968, -0.0643, -0.0972, -0.0898, 0.0456, 0.0027, -0.2022, 0.0048, -0.4755, 0.1153, -0.1116, -0.1966, 0.2755, -0.0994, -0.0549, -0.0779, 0.1455, 0.0050] }) +Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) Tree 3: Descendants(Descendants { descendants: [9, 12, 29, 31, 32, 35, 36, 64, 89, 93, 96] }) Tree 4: Descendants(Descendants { descendants: [3, 5, 7, 11, 25, 30, 39, 48, 52, 58, 60, 67, 69, 70, 81, 86, 91, 97, 99] }) Tree 5: Descendants(Descendants { descendants: [0, 1, 2, 4, 13, 16, 19, 27, 33, 38, 41, 45, 50, 56, 63, 65, 72, 74, 76, 78, 84, 85, 88, 94] }) -Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, -0.1816, -0.0939, 0.0021, 0.0709, 0.0202, 0.0530, -0.1488, -0.2081, -0.0009, -0.2521, -0.0654, 0.0246, 0.4066, 0.3606, -0.2069, 0.1391, 0.0840, 0.1356, 0.1600, -0.3060] }) -Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, 0.2901, -0.1181, 0.1028, 0.1873, -0.1748, 0.1071, 0.3026, 0.1918, -0.2636, -0.1360, -0.0094, 0.1939, -0.2513, 0.0142, -0.2946, -0.0155, -0.0352, -0.2004, 0.1297, -0.1179] }) -Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, -0.0911, -0.1466, 0.0229, 0.1060, -0.4102, 0.1031, 0.2331, -0.1867, -0.0179, 0.4030, -0.0541, 0.1071, -0.1108, -0.3500, 0.2832, 0.1696, 0.0784, 0.1984, 0.1857, -0.1249] }) +Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) +Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) +Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) Tree 11: Descendants(Descendants { descendants: [0, 1, 3, 4, 7, 9, 10, 11, 13, 27, 29, 30, 31, 39, 45, 50, 56, 57, 60, 64, 65, 67, 73, 84, 85, 88, 91, 97] }) Tree 14: Descendants(Descendants { descendants: [25, 32, 33, 49, 81, 99] }) -Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(103), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, -0.1642, 0.0048, 0.1560, 0.1240, -0.1897, 0.0059, 0.2732, -0.0767, -0.0073, -0.0686, -0.1380, -0.2031, -0.1076, -0.3940, 0.3151, -0.1967, -0.0063, 0.3692, 0.0094, 0.1459] }) -Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(100), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, 0.0512, -0.0020, 0.2535, -0.0405, -0.0259, -0.4431, -0.1803, 0.1423, 0.2680, -0.0370, 0.1828, -0.0471, 0.0297, 0.0426, 0.1825, 0.0618, 0.1394, -0.1949, -0.2250, -0.0376] }) -Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, 0.2006, 0.3335, 0.2843, -0.1757, 0.2793, -0.2980, -0.0359, 0.0865, 0.2789, -0.3740, 0.0533, -0.0015, 0.3312, -0.0091, -0.0798, -0.1063, -0.0473, -0.0169, -0.3053, 0.0218] }) +Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(103), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) +Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(100), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) +Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) Tree 18: Descendants(Descendants { descendants: [4, 7, 9, 11, 25, 27, 29, 33, 48, 52, 56, 58, 60, 67, 76, 78, 81, 91, 97] }) Tree 19: Descendants(Descendants { descendants: [0, 1, 6, 12, 13, 14, 18, 19, 31, 35, 38, 39, 42, 45, 50, 51, 54, 64, 85, 86, 89, 93, 94, 96] }) -Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, 0.0527, -0.0864, 0.0909, -0.1323, -0.0939, 0.1246, 0.0144, 0.1074, 0.1608, -0.2549, 0.2505, 0.0488, 0.5129, 0.1660, -0.4090, 0.2014, -0.1974, 0.1653, 0.2648, -0.0579] }) +Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) Tree 21: Descendants(Descendants { descendants: [2, 3, 5, 8, 15, 16, 17, 20, 24, 32, 36, 49, 59, 62, 63, 65, 66, 69, 70, 75, 79, 80, 83, 84, 87, 92] }) -Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(106), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, 0.1131, -0.1640, -0.0971, 0.1107, 0.0928, 0.1311, 0.2739, -0.1771, -0.1730, 0.0449, 0.3326, -0.1653, 0.1866, 0.1290, -0.3017, -0.1810, -0.0166, -0.0532, -0.0940, 0.2350] }) -Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, 0.1803, 0.3667, 0.0478, 0.1034, 0.1925, -0.2745, -0.3940, 0.2451, 0.0596, -0.1108, 0.0388, -0.1178, -0.1064, 0.1941, -0.0789, 0.0339, 0.0391, -0.2001, -0.2129, -0.1413] }) +Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(106), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) +Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) Tree 25: Descendants(Descendants { descendants: [] }) Tree 26: Descendants(Descendants { descendants: [5, 16, 18, 22, 36, 43, 44, 49, 59, 83, 84, 96] }) -Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(111), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, 0.0476, -0.0620, -0.1644, 0.2042, -0.1229, 0.2581, 0.1689, -0.0211, -0.5080, 0.2612, -0.1435, 0.1440, -0.1411, 0.2912, -0.1177, 0.0276, -0.1104, 0.2365, -0.0692, 0.0538] }) +Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(111), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) Tree 29: Descendants(Descendants { descendants: [7, 9, 11, 12, 31, 32, 33, 35, 39, 52, 58, 69, 74, 81, 86, 89, 92, 97, 99] }) -Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, -0.1136, -0.1094, 0.0732, 0.0961, -0.3974, -0.0322, 0.0921, -0.0225, 0.0918, 0.1908, -0.0333, -0.2674, -0.0643, -0.5829, 0.1107, -0.0245, -0.0154, 0.0397, 0.2926, 0.2113] }) -Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, 0.2846, -0.2087, -0.2004, 0.2058, 0.1070, 0.1817, -0.1476, 0.0179, 0.0776, 0.1242, 0.0699, 0.0648, -0.0274, 0.2480, -0.4002, 0.2701, -0.0571, -0.2140, 0.0239, -0.1267] }) +Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) +Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) Tree 34: Descendants(Descendants { descendants: [0, 1, 2, 4, 13, 19, 24, 25, 27, 29, 41, 42, 45, 51, 56, 57, 60, 63, 64, 65, 77, 85, 91, 93, 94] }) -Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, -0.3277, -0.0323, -0.0636, 0.2488, -0.4443, 0.3214, 0.0959, -0.3218, -0.0695, 0.0767, 0.0436, 0.1616, 0.0567, 0.1653, 0.1340, 0.0084, 0.0783, -0.0016, 0.1253, -0.1028] }) +Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) Tree 36: Descendants(Descendants { descendants: [8, 15, 18, 21, 36, 42, 53, 59, 71, 87, 89, 93, 98] }) Tree 38: Descendants(Descendants { descendants: [4, 11, 23, 33, 48, 52, 54, 63, 64, 66, 69, 74, 75, 88, 90, 99] }) -Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(114), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, 0.0407, -0.1623, 0.2173, -0.3136, 0.0590, -0.2003, -0.0082, -0.1270, 0.0564, -0.2095, -0.0657, 0.1016, -0.0702, 0.2284, -0.2728, -0.0290, 0.3925, -0.0802, -0.0253, -0.0275] }) -Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, -0.0863, -0.0199, -0.0004, 0.2405, -0.1246, 0.1038, 0.1006, -0.0305, 0.0380, 0.3559, -0.2894, 0.0328, -0.4506, -0.3225, 0.2490, -0.0197, -0.1629, -0.2224, 0.1068, -0.0759] }) +Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(114), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) +Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) Tree 41: Descendants(Descendants { descendants: [1, 2, 3, 6, 22, 29, 40, 41, 43, 44, 45, 47, 49, 50, 65, 72, 73, 77, 80, 82, 83, 85, 92, 94] }) Tree 42: Descendants(Descendants { descendants: [13, 14, 16, 24, 30, 32, 35, 37, 38, 51, 57, 61, 78, 79, 95, 96] }) -Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, -0.0601, 0.2616, 0.0565, -0.0938, -0.0145, -0.1146, 0.4269, -0.0358, -0.0730, 0.0392, -0.2603, 0.1109, 0.0687, 0.0439, 0.1691, -0.0846, -0.4510, -0.0385, 0.2366, 0.0783] }) -Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, -0.1252, 0.1579, 0.0484, 0.1324, 0.1851, 0.1471, -0.2672, -0.1088, 0.0051, -0.0882, 0.0127, -0.0930, 0.0175, 0.2037, -0.1296, 0.3565, -0.3824, 0.0678, 0.0265, -0.2280] }) +Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) +Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) Tree 45: Descendants(Descendants { descendants: [23, 43, 46, 49, 53, 55, 66, 75, 82] }) Tree 46: Descendants(Descendants { descendants: [0, 6, 8, 18, 21, 22, 28, 31, 39, 41, 42, 44, 45, 47, 50, 51, 52, 54, 58, 59, 72, 76, 77, 80, 85, 88, 91, 93, 98] }) Tree 47: Descendants(Descendants { descendants: [10, 13, 19, 27, 37, 40, 67, 71, 73, 86, 87, 90] }) -Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, -0.0698, -0.0018, -0.1918, 0.0338, 0.0339, -0.1357, 0.0752, 0.0238, 0.0309, 0.2366, -0.2525, 0.4409, -0.3780, 0.1873, 0.0832, -0.4678, -0.0565, 0.1858, -0.0473, -0.0123] }) +Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) Tree 49: Descendants(Descendants { descendants: [] }) -Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, 0.0647, 0.0797, -0.0989, 0.1190, -0.1396, 0.0312, 0.0572, -0.6574, -0.0868, 0.0124, -0.2229, 0.1733, -0.2335, 0.1060, 0.2380, 0.1268, -0.0459, -0.2752, -0.1312, -0.2836] }) +Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) Tree 51: Descendants(Descendants { descendants: [1, 14, 29, 30, 35, 38, 57, 63, 64, 65, 78, 83, 89, 94, 95, 97] }) -Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(117), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, 0.1305, 0.3916, -0.0534, -0.0555, 0.1314, -0.1056, 0.2684, 0.3731, -0.1052, -0.2472, -0.1977, -0.0885, 0.0235, 0.0173, 0.0686, -0.2534, 0.1249, 0.0815, -0.1741, 0.1279] }) -Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, -0.1406, 0.2167, 0.4664, -0.1600, -0.0316, -0.3137, -0.0300, 0.0707, -0.0475, 0.1332, -0.0198, 0.0259, -0.3368, -0.2182, 0.3876, 0.0855, -0.0989, 0.1142, 0.0094, 0.1087] }) -Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, -0.2281, -0.1307, -0.1940, 0.0743, -0.2224, 0.2621, 0.4020, -0.1358, -0.1421, 0.2556, 0.1564, 0.0275, 0.0829, -0.2441, 0.1231, 0.2096, 0.0415, 0.3321, 0.2420, 0.0255] }) +Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(117), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) +Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) +Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) Tree 58: Descendants(Descendants { descendants: [1, 3, 4, 7, 11, 14, 27, 29, 30, 33, 39, 50, 52, 56, 57, 67, 68, 81, 86, 88, 93, 94, 95, 97, 99] }) Tree 59: Descendants(Descendants { descendants: [5, 9, 15, 23, 31, 32, 45, 63, 64, 65, 69, 77, 83, 89, 92] }) -Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, -0.0216, 0.1138, -0.0032, -0.1261, -0.1610, -0.1209, -0.4143, 0.2001, 0.4793, -0.2008, -0.0190, -0.2578, 0.2425, 0.1568, -0.0856, 0.2075, -0.2392, -0.0009, -0.1147, 0.0704] }) +Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) Tree 61: Descendants(Descendants { descendants: [12, 16, 17, 19, 24, 25, 26, 34, 35, 36, 37, 41, 49, 53, 55, 60, 62, 70, 71, 74, 75, 78, 79, 91, 96, 98] }) -Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(120), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, -0.0027, -0.4418, -0.0800, -0.0528, 0.1247, 0.0186, -0.1231, -0.0172, -0.1034, -0.0108, 0.1670, -0.1303, 0.0481, 0.2372, -0.4116, 0.2883, 0.0878, -0.0047, -0.0383, 0.1339] }) -Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, 0.2931, 0.1127, -0.0574, -0.0419, 0.2841, 0.1814, 0.0900, 0.0942, 0.0058, -0.3365, -0.0987, 0.0550, 0.2670, 0.1378, -0.2335, -0.1600, 0.0364, 0.2521, -0.1054, -0.1411] }) +Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(120), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) +Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) Tree 65: Descendants(Descendants { descendants: [3, 7, 9, 10, 17, 25, 30, 37, 38, 39, 55, 56, 60, 65, 67, 77, 78, 86, 88, 94, 97] }) Tree 66: Descendants(Descendants { descendants: [1, 4, 11, 13, 19, 27, 42, 54, 58, 73, 76, 85, 87, 91] }) -Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, -0.0097, -0.1884, 0.1172, -0.0446, -0.0250, 0.1777, 0.0816, 0.2413, -0.2184, -0.1328, -0.0861, 0.1612, 0.1595, 0.1337, -0.2581, 0.2318, 0.1348, 0.1098, 0.0504, -0.3532] }) +Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) Tree 68: Descendants(Descendants { descendants: [12, 32, 35, 75, 99] }) Tree 70: Descendants(Descendants { descendants: [0, 5, 6, 18, 22, 31, 33, 44, 45, 51, 64, 68, 89, 93] }) -Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(123), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, -0.2211, 0.0110, -0.1644, -0.0635, -0.1158, 0.1109, 0.0872, -0.1639, 0.1730, 0.0744, 0.1254, 0.0237, 0.4010, -0.0193, 0.1347, -0.0681, 0.1437, -0.1188, 0.2499, -0.0817] }) +Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(123), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) Tree 72: Descendants(Descendants { descendants: [21, 23, 41, 43, 46, 49, 53, 59, 71, 80] }) -Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, 0.4196, 0.0262, -0.2664, 0.1847, 0.0711, -0.0195, 0.0464, 0.0459, 0.1556, -0.2079, -0.0035, -0.3500, 0.1718, -0.1083, -0.0923, -0.2715, 0.1083, -0.0153, -0.2891, -0.1430] }) -Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, 0.2265, -0.2023, -0.2901, 0.1725, 0.2272, 0.2300, -0.0068, -0.0465, 0.0117, 0.1397, 0.0488, -0.0288, 0.1493, 0.4854, -0.2355, 0.0637, -0.0046, -0.2136, -0.2705, -0.1199] }) -Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, 0.0673, 0.1867, 0.3739, -0.1279, 0.3181, -0.3485, -0.1212, 0.2294, 0.2856, -0.2235, 0.0543, -0.2036, 0.1905, -0.0273, -0.0616, 0.2454, -0.1052, -0.0761, -0.3185, -0.0837] }) +Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) +Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) +Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) Tree 80: Descendants(Descendants { descendants: [0, 1, 4, 7, 11, 12, 17, 19, 25, 31, 32, 33, 35, 36, 48, 49, 56, 58, 60, 63, 66, 70, 74, 75, 81, 91, 93, 96, 99] }) Tree 81: Descendants(Descendants { descendants: [27, 85] }) Tree 83: Descendants(Descendants { descendants: [3, 5, 6, 8, 9, 13, 15, 18, 20, 24, 40, 45, 46, 57, 61, 65, 73, 77, 79, 82, 83, 87, 89, 92, 95] }) -Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(126), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, -0.1129, 0.0524, -0.0186, -0.2463, -0.1155, -0.1463, -0.4634, 0.0699, 0.0128, 0.1095, -0.1902, 0.3119, -0.0996, 0.4041, 0.0501, 0.2183, -0.1728, -0.0823, 0.0981, -0.0756] }) -Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, 0.1917, 0.2602, -0.0370, -0.0627, 0.4363, -0.2168, -0.1736, 0.1961, 0.3870, -0.0136, 0.1538, -0.1195, 0.0285, -0.2140, 0.0699, 0.0224, 0.3888, -0.1619, -0.1619, 0.0292] }) -Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, 0.2534, -0.2271, -0.2660, 0.0689, -0.0707, -0.0678, -0.1642, 0.0867, 0.0469, -0.1977, 0.2930, 0.0458, 0.2275, 0.3742, -0.3834, 0.1165, -0.1414, -0.0012, -0.0766, -0.0228] }) +Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(126), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) +Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) +Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) Tree 87: Descendants(Descendants { descendants: [0, 4, 13, 21, 27, 28, 29, 37, 38, 40, 41, 47, 50, 72, 73, 76, 80, 82, 85, 91] }) Tree 88: Descendants(Descendants { descendants: [1, 3, 7, 10, 17, 30, 56, 67, 77, 78, 86, 88] }) Tree 89: Descendants(Descendants { descendants: [8, 20, 24, 46, 49, 61, 62, 66, 70, 75, 79, 81, 92] }) Tree 90: Descendants(Descendants { descendants: [6, 14, 18, 31, 33, 39, 42, 45, 48, 51, 57, 58, 63, 64, 89, 93, 95, 97] }) Tree 91: Descendants(Descendants { descendants: [2, 5, 9, 15, 16, 22, 23, 26, 32, 34, 35, 36, 43, 44, 52, 53, 54, 55, 68, 69, 71, 74, 83, 84, 87, 90, 94, 96, 98, 99] }) -Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, 0.1311, 0.3498, 0.1850, -0.0395, -0.0264, -0.2005, -0.1037, 0.0984, 0.1482, -0.2048, 0.1539, -0.1091, -0.2066, -0.1567, 0.2054, -0.3106, 0.1361, 0.2352, -0.2780, 0.1194] }) +Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) Tree 93: Descendants(Descendants { descendants: [11, 12, 19, 25, 59, 60, 65] }) -Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, -0.2013, 0.0197, -0.0472, -0.0336, -0.1988, 0.0397, 0.1914, 0.1412, -0.1298, -0.1525, -0.1154, -0.0438, -0.1335, -0.2600, 0.1097, 0.1125, 0.1885, 0.0315, 0.2691, 0.0511] }) -Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, -0.1367, -0.0389, -0.0556, 0.2224, -0.2718, 0.1765, 0.1163, -0.2332, 0.0342, 0.2299, 0.2840, 0.0873, 0.3227, -0.1082, 0.2161, -0.1491, 0.3666, -0.0066, 0.2141, 0.0844] }) -Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, -0.0180, 0.2335, 0.3782, -0.1127, 0.2746, -0.2657, -0.0579, 0.1808, 0.2623, -0.3329, 0.1801, -0.2106, 0.1692, -0.1191, 0.0152, 0.1214, 0.0252, -0.0236, -0.1220, -0.1448] }) -Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, 0.0177, 0.3192, 0.1521, -0.1484, 0.1918, -0.2794, -0.1141, 0.2598, 0.1453, 0.1133, -0.1149, -0.0455, 0.0697, -0.2537, 0.1797, -0.0423, -0.0470, 0.0886, 0.0868, 0.2083] }) +Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) +Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) +Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) +Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) Tree 98: Descendants(Descendants { descendants: [12, 17, 19, 28, 35, 38, 41, 42, 53, 55, 58, 75, 77, 78, 96, 98] }) Tree 99: Descendants(Descendants { descendants: [14, 22, 37, 40, 47, 51, 61, 66, 70, 72, 76, 79, 82, 87, 90, 95] }) -Tree 100: SplitPlaneNormal(SplitPlaneNormal { left: Tree(98), right: Tree(99), normal: [-0.0078, 0.2489, -0.2359, 0.0425, -0.0624, 0.0527, 0.2042, 0.2301, -0.3498, -0.0920, -0.1275, -0.0441, -0.0225, 0.3046, 0.2041, -0.1816, 0.0726, 0.0811, 0.0479, 0.0584, 0.1373, 0.1513, -0.1683, 0.4209, -0.2927, 0.0339, -0.2174, 0.0260, -0.2159, -0.1273] }) +Tree 100: SplitPlaneNormal(SplitPlaneNormal { left: Tree(98), right: Tree(99), normal: [-0.0078, 0.2489, -0.2359, 0.0425, -0.0624, 0.0527, 0.2042, 0.2301, -0.3498, -0.0920, "other ..."] }) Tree 101: Descendants(Descendants { descendants: [8, 15, 24, 83, 92, 94] }) Tree 102: Descendants(Descendants { descendants: [2, 5, 6, 16, 18, 20, 21, 23, 26, 34, 36, 43, 44, 46, 48, 52, 54, 59, 62, 63, 68, 69, 71, 74, 80, 86, 89, 93] }) -Tree 103: SplitPlaneNormal(SplitPlaneNormal { left: Tree(101), right: Tree(102), normal: [0.0940, 0.1325, 0.0627, 0.0098, -0.2045, -0.0698, 0.1022, 0.0678, -0.0750, -0.3038, 0.2822, -0.2471, -0.0600, 0.0224, 0.2191, 0.0428, 0.2723, 0.0783, -0.0772, 0.2503, 0.0242, -0.3317, 0.3299, 0.0653, 0.2141, 0.1082, 0.2680, -0.3318, 0.0456, -0.0326] }) +Tree 103: SplitPlaneNormal(SplitPlaneNormal { left: Tree(101), right: Tree(102), normal: [0.0940, 0.1325, 0.0627, 0.0098, -0.2045, -0.0698, 0.1022, 0.0678, -0.0750, -0.3038, "other ..."] }) Tree 104: Descendants(Descendants { descendants: [10, 21, 28, 30, 37, 41, 47, 71, 72, 77, 88, 98] }) Tree 105: Descendants(Descendants { descendants: [22, 23, 26, 34, 40, 43, 44, 46, 53, 55, 57, 61, 68, 73, 74, 82, 90, 95, 99] }) -Tree 106: SplitPlaneNormal(SplitPlaneNormal { left: Tree(104), right: Tree(105), normal: [-0.1170, 0.1446, 0.0602, -0.0688, 0.2407, -0.1008, 0.1718, -0.1779, -0.1381, 0.0348, -0.3006, 0.0654, 0.2713, 0.1607, 0.0822, -0.2730, 0.1304, 0.3466, 0.3719, -0.1597, -0.0863, -0.1268, -0.2079, 0.0406, -0.0698, -0.1338, -0.0359, -0.1620, 0.0672, 0.3284] }) +Tree 106: SplitPlaneNormal(SplitPlaneNormal { left: Tree(104), right: Tree(105), normal: [-0.1170, 0.1446, 0.0602, -0.0688, 0.2407, -0.1008, 0.1718, -0.1779, -0.1381, 0.0348, "other ..."] }) Tree 107: Descendants(Descendants { descendants: [14, 20, 26, 40, 61, 67, 80, 87] }) Tree 108: Descendants(Descendants { descendants: [3, 6, 8, 10, 21, 23, 28, 30, 34, 37, 38, 47, 48, 50, 54, 55, 66, 68, 71, 72, 73, 76, 78, 79, 82, 88, 95, 98] }) -Tree 109: SplitPlaneNormal(SplitPlaneNormal { left: Tree(107), right: Tree(108), normal: [0.2514, -0.1588, -0.0499, 0.1317, -0.2218, -0.1911, 0.0804, 0.1173, 0.2414, 0.2416, 0.1915, -0.0751, 0.0220, 0.1557, -0.0375, 0.2249, -0.1552, -0.3207, -0.2665, 0.1628, -0.1629, 0.2448, 0.3071, -0.2017, 0.1247, 0.0747, 0.1337, 0.0982, -0.1820, -0.1500] }) +Tree 109: SplitPlaneNormal(SplitPlaneNormal { left: Tree(107), right: Tree(108), normal: [0.2514, -0.1588, -0.0499, 0.1317, -0.2218, -0.1911, 0.0804, 0.1173, 0.2414, 0.2416, "other ..."] }) Tree 110: Descendants(Descendants { descendants: [15, 17, 46, 53, 62, 70, 75, 90] }) -Tree 111: SplitPlaneNormal(SplitPlaneNormal { left: Tree(109), right: Tree(110), normal: [-0.2031, -0.2079, 0.3738, -0.0426, -0.0746, -0.1377, 0.0392, 0.0958, 0.1932, -0.0802, -0.1856, 0.1741, -0.1386, -0.3751, -0.1041, -0.0863, -0.0096, 0.3212, 0.1088, -0.2078, -0.1331, -0.2478, -0.2288, -0.1457, 0.0749, -0.1457, 0.0298, 0.1675, 0.1699, 0.2378] }) +Tree 111: SplitPlaneNormal(SplitPlaneNormal { left: Tree(109), right: Tree(110), normal: [-0.2031, -0.2079, 0.3738, -0.0426, -0.0746, -0.1377, 0.0392, 0.0958, 0.1932, -0.0802, "other ..."] }) Tree 112: Descendants(Descendants { descendants: [0, 5, 7, 9, 10, 12, 17, 19, 20, 25, 27, 31, 34, 39, 46, 55, 56, 58, 60, 67, 68, 76, 81, 84, 86, 91, 97] }) Tree 113: Descendants(Descendants { descendants: [26, 28, 62, 70] }) -Tree 114: SplitPlaneNormal(SplitPlaneNormal { left: Tree(112), right: Tree(113), normal: [-0.1107, 0.0316, 0.0135, -0.1941, -0.1442, 0.1133, -0.1829, 0.0071, 0.2601, -0.1618, 0.0527, 0.2233, -0.1284, -0.3001, 0.0956, -0.2167, 0.0226, 0.0846, 0.0552, -0.3635, 0.0092, -0.1312, -0.3513, 0.1966, 0.1593, 0.1285, -0.2511, 0.1445, -0.3528, -0.0106] }) +Tree 114: SplitPlaneNormal(SplitPlaneNormal { left: Tree(112), right: Tree(113), normal: [-0.1107, 0.0316, 0.0135, -0.1941, -0.1442, 0.1133, -0.1829, 0.0071, 0.2601, -0.1618, "other ..."] }) Tree 115: Descendants(Descendants { descendants: [2, 3, 4, 7, 11, 12, 16, 17, 20, 25, 32, 33, 34, 36, 48, 56, 60, 68, 69, 70, 74, 79, 81, 84, 96, 99] }) Tree 116: Descendants(Descendants { descendants: [5, 9, 15, 24, 26, 61, 62, 92] }) -Tree 117: SplitPlaneNormal(SplitPlaneNormal { left: Tree(115), right: Tree(116), normal: [-0.0406, 0.1263, 0.0654, -0.2637, -0.0650, 0.0877, -0.2022, 0.0139, -0.0035, -0.1877, -0.3014, 0.3774, -0.2076, -0.0453, -0.1721, -0.1977, -0.1765, 0.0122, 0.2644, 0.0717, 0.3604, -0.0858, -0.2241, 0.0946, -0.0122, 0.0321, -0.3840, 0.0867, -0.0692, 0.0973] }) +Tree 117: SplitPlaneNormal(SplitPlaneNormal { left: Tree(115), right: Tree(116), normal: [-0.0406, 0.1263, 0.0654, -0.2637, -0.0650, 0.0877, -0.2022, 0.0139, -0.0035, -0.1877, "other ..."] }) Tree 118: Descendants(Descendants { descendants: [2, 10, 13, 20, 21, 28, 40, 46, 47, 48, 61, 66, 72, 73, 76, 80, 87, 90] }) Tree 119: Descendants(Descendants { descendants: [0, 6, 8, 18, 22, 38, 42, 43, 44, 51, 54, 58, 59, 82, 84, 85] }) -Tree 120: SplitPlaneNormal(SplitPlaneNormal { left: Tree(118), right: Tree(119), normal: [0.2462, -0.2241, 0.3383, -0.2016, 0.0430, -0.0574, 0.1455, -0.2949, -0.0625, 0.0844, -0.1007, -0.1562, 0.3254, -0.1062, -0.2551, 0.1835, 0.0819, -0.0581, 0.1166, -0.0475, -0.0563, -0.2019, 0.4488, -0.0376, 0.0097, -0.0984, -0.0238, 0.1005, 0.1962, -0.1662] }) +Tree 120: SplitPlaneNormal(SplitPlaneNormal { left: Tree(118), right: Tree(119), normal: [0.2462, -0.2241, 0.3383, -0.2016, 0.0430, -0.0574, 0.1455, -0.2949, -0.0625, 0.0844, "other ..."] }) Tree 121: Descendants(Descendants { descendants: [8, 20, 34, 36, 47, 48, 50, 52, 63, 66, 69, 72, 74, 79, 81, 82, 84, 95, 96, 98] }) Tree 122: Descendants(Descendants { descendants: [2, 14, 15, 16, 24, 26, 28, 29, 40, 57, 61, 62, 70, 83, 90, 92] }) -Tree 123: SplitPlaneNormal(SplitPlaneNormal { left: Tree(121), right: Tree(122), normal: [-0.1843, -0.0871, -0.0603, -0.1272, 0.2035, 0.1519, -0.1754, 0.2246, 0.0560, 0.0408, -0.3485, 0.4195, -0.1783, -0.0260, -0.0409, -0.0284, 0.0867, -0.0408, 0.0948, -0.2281, 0.1962, 0.0020, -0.3135, 0.3187, -0.0816, 0.1485, -0.3187, 0.0170, 0.1290, -0.0041] }) +Tree 123: SplitPlaneNormal(SplitPlaneNormal { left: Tree(121), right: Tree(122), normal: [-0.1843, -0.0871, -0.0603, -0.1272, 0.2035, 0.1519, -0.1754, 0.2246, 0.0560, 0.0408, "other ..."] }) Tree 124: Descendants(Descendants { descendants: [14, 16, 22, 38, 41, 42, 43, 44, 50, 51, 53, 59, 62, 64, 80, 84] }) Tree 125: Descendants(Descendants { descendants: [2, 10, 21, 23, 26, 28, 29, 30, 34, 37, 39, 47, 52, 54, 55, 67, 68, 69, 71, 72, 76, 78, 86, 88, 90, 94, 97, 98] }) -Tree 126: SplitPlaneNormal(SplitPlaneNormal { left: Tree(124), right: Tree(125), normal: [-0.2156, 0.0925, 0.0242, 0.1246, -0.0758, -0.0672, -0.1816, 0.4853, 0.1643, 0.1418, 0.2752, -0.1825, -0.2671, 0.0674, -0.1950, -0.0743, -0.1493, -0.0368, -0.1929, 0.2540, -0.0441, 0.3135, -0.2219, 0.1059, 0.2115, 0.0749, 0.1629, -0.1050, -0.0211, 0.0051] }) +Tree 126: SplitPlaneNormal(SplitPlaneNormal { left: Tree(124), right: Tree(125), normal: [-0.2156, 0.0925, 0.0242, 0.1246, -0.0758, -0.0672, -0.1816, 0.4853, 0.1643, 0.1418, "other ..."] }) Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" } - diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 43faf4e1..8299f222 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -1,6 +1,7 @@ use std::{ borrow::Cow, mem::{size_of, transmute}, + slice::ChunksExact, }; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; @@ -43,23 +44,54 @@ impl UnalignedVectorCodec for BinaryQuantized { } fn iter(vec: &UnalignedVector) -> impl Iterator + '_ { - vec.vector - .chunks_exact(size_of::()) - .map(|bytes| QuantizedWord::from_ne_bytes(bytes.try_into().unwrap())) - .flat_map(|mut word| { - let mut ret = vec![0.0; QUANTIZED_WORD_SIZE]; - for index in 0..QUANTIZED_WORD_SIZE { - let bit = word & 1; - word >>= 1; - if bit == 1 { - ret[index] = 1.0; - } - } - ret - }) + BinaryQuantizedIterator { + current_element: 0, + // Force the pulling of the first word + current_iteration: QUANTIZED_WORD_SIZE, + iter: vec.vector.chunks_exact(size_of::()), + } } fn len(vec: &UnalignedVector) -> usize { vec.vector.len() / size_of::() } } + +pub struct BinaryQuantizedIterator<'a> { + current_element: usize, + current_iteration: usize, + iter: ChunksExact<'a, u8>, +} + +impl Iterator for BinaryQuantizedIterator<'_> { + type Item = f32; + + fn next(&mut self) -> Option { + if self.current_iteration >= QUANTIZED_WORD_SIZE { + let bytes = self.iter.next()?; + self.current_element = QuantizedWord::from_ne_bytes(bytes.try_into().unwrap()); + self.current_iteration = 0; + } + + let bit = self.current_element & 1; + self.current_element >>= 1; + self.current_iteration += 1; + + Some(bit as f32) + } + + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.iter.size_hint(); + let rem = QUANTIZED_WORD_SIZE - self.current_iteration; + + (low + rem, high.map(|h| h + rem)) + } +} + +impl ExactSizeIterator for BinaryQuantizedIterator<'_> { + fn len(&self) -> usize { + let (lower, upper) = self.size_hint(); + debug_assert_eq!(upper, Some(lower)); + lower + } +} From facc8575222d3f5da5b9a94288e44896911e701f Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 16:51:19 +0200 Subject: [PATCH 08/61] fix the size of the iterator --- src/tests/node.rs | 19 +++++++++++++++++++ src/unaligned_vector/binary_quantized.rs | 10 +++++----- src/unaligned_vector/f32.rs | 2 +- src/unaligned_vector/mod.rs | 9 ++++++--- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/tests/node.rs b/src/tests/node.rs index c2f61637..42d44998 100644 --- a/src/tests/node.rs +++ b/src/tests/node.rs @@ -17,6 +17,25 @@ fn unaligned_f32_vec() { assert_eq!(bytes, unaligned_owned_from_bytes.as_bytes()); } +#[test] +fn unaligned_binary_quantized_iter_size() { + let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; + let unaligned = UnalignedVector::::from_slice(&original); + assert_snapshot!(unaligned.len(), @"64"); + let mut iter = unaligned.iter(); + assert_snapshot!(iter.len(), @"64"); + iter.next().unwrap(); + assert_snapshot!(iter.len(), @"63"); + iter.by_ref().take(10).for_each(drop); + assert_snapshot!(iter.len(), @"53"); + iter.by_ref().take(52).for_each(drop); + assert_snapshot!(iter.len(), @"1"); + iter.next().unwrap(); + assert_snapshot!(iter.len(), @"0"); + iter.next(); + assert_snapshot!(iter.len(), @"0"); +} + #[test] fn unaligned_binary_quantized_smol() { let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 8299f222..b99c9998 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -29,9 +29,9 @@ impl UnalignedVectorCodec for BinaryQuantized { let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { let mut word: QuantizedWord = 0; - for bit in chunk.iter().rev() { + for scalar in chunk.iter().rev() { word <<= 1; - word += bit.is_sign_positive() as QuantizedWord; + word += scalar.is_sign_positive() as QuantizedWord; } output.extend_from_slice(&word.to_ne_bytes()); } @@ -43,7 +43,7 @@ impl UnalignedVectorCodec for BinaryQuantized { Cow::Owned(Self::from_slice(&vec).into_owned()) } - fn iter(vec: &UnalignedVector) -> impl Iterator + '_ { + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { BinaryQuantizedIterator { current_element: 0, // Force the pulling of the first word @@ -53,7 +53,7 @@ impl UnalignedVectorCodec for BinaryQuantized { } fn len(vec: &UnalignedVector) -> usize { - vec.vector.len() / size_of::() + (vec.vector.len() / size_of::()) * QUANTIZED_WORD_SIZE } } @@ -84,7 +84,7 @@ impl Iterator for BinaryQuantizedIterator<'_> { let (low, high) = self.iter.size_hint(); let rem = QUANTIZED_WORD_SIZE - self.current_iteration; - (low + rem, high.map(|h| h + rem)) + (low * QUANTIZED_WORD_SIZE + rem, high.map(|h| h * QUANTIZED_WORD_SIZE + rem)) } } diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index 3cbb633f..63bc6fd1 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -35,7 +35,7 @@ impl UnalignedVectorCodec for f32 { /// Returns an iterator of f32 that are read from the slice. /// The f32 are copied in memory and are therefore, aligned. - fn iter(vec: &UnalignedVector) -> impl Iterator + '_ { + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { vec.vector.chunks_exact(size_of::()).map(NativeEndian::read_f32) } diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index bdc72f0d..4d052ebe 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -28,7 +28,7 @@ pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// Returns an iterator of f32 that are read from the vector. /// The f32 are copied in memory and are therefore, aligned. - fn iter(vec: &UnalignedVector) -> impl Iterator + '_; + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_; /// Returns the len of the vector in terms of elements. fn len(vec: &UnalignedVector) -> usize; @@ -71,13 +71,16 @@ impl UnalignedVector { /// Returns an iterator of f32 that are read from the vector. /// The f32 are copied in memory and are therefore, aligned. - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl ExactSizeIterator + '_ { Codec::iter(self) } /// Returns an allocated and aligned `Vec`. pub fn to_vec(&self) -> Vec { - self.iter().collect() + let iter = self.iter(); + let mut ret = Vec::with_capacity(iter.len()); + ret.extend(iter); + ret } /// Returns the len of the vector in terms of elements. From 15a13ffaafc1d85e7a662051f50d60996f27f5a3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 8 Jul 2024 22:19:23 +0200 Subject: [PATCH 09/61] fix spaces --- src/spaces/simple_avx.rs | 10 ++++++++-- src/spaces/simple_sse.rs | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/spaces/simple_avx.rs b/src/spaces/simple_avx.rs index f52c2427..182b04d1 100644 --- a/src/spaces/simple_avx.rs +++ b/src/spaces/simple_avx.rs @@ -14,7 +14,10 @@ unsafe fn hsum256_ps_avx(x: __m256) -> f32 { #[target_feature(enable = "avx")] #[target_feature(enable = "fma")] -pub(crate) unsafe fn euclid_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn euclid_similarity_avx( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // It is safe to load unaligned floats from a pointer. // @@ -63,7 +66,10 @@ pub(crate) unsafe fn euclid_similarity_avx(v1: &UnalignedF32Slice, v2: &Unaligne #[target_feature(enable = "avx")] #[target_feature(enable = "fma")] -pub(crate) unsafe fn dot_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn dot_similarity_avx( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // It is safe to load unaligned floats from a pointer. // diff --git a/src/spaces/simple_sse.rs b/src/spaces/simple_sse.rs index c42c4f7b..11d7533a 100644 --- a/src/spaces/simple_sse.rs +++ b/src/spaces/simple_sse.rs @@ -14,7 +14,10 @@ unsafe fn hsum128_ps_sse(x: __m128) -> f32 { } #[target_feature(enable = "sse")] -pub(crate) unsafe fn euclid_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn euclid_similarity_sse( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // It is safe to load unaligned floats from a pointer. // @@ -58,7 +61,10 @@ pub(crate) unsafe fn euclid_similarity_sse(v1: &UnalignedF32Slice, v2: &Unaligne } #[target_feature(enable = "sse")] -pub(crate) unsafe fn dot_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { +pub(crate) unsafe fn dot_similarity_sse( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { // It is safe to load unaligned floats from a pointer. // From e4847d2810a5c457aabbd10b9171ed23dcc1d22a Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:29:54 +0200 Subject: [PATCH 10/61] remove the now useless craft_owned_unaligned_vector_from_f32 function from the distance trait --- src/distance/angular.rs | 2 +- src/distance/dot_product.rs | 2 +- src/distance/euclidean.rs | 2 +- src/distance/manhattan.rs | 2 +- src/distance/mod.rs | 10 ++-------- src/writer.rs | 2 +- 6 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/distance/angular.rs b/src/distance/angular.rs index adc76086..f2805672 100644 --- a/src/distance/angular.rs +++ b/src/distance/angular.rs @@ -71,7 +71,7 @@ impl Distance for Angular { let [node_p, node_q] = two_means(rng, children, true)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); - let unaligned_vector = Self::craft_owned_unaligned_vector_from_f32(vector); + let unaligned_vector = UnalignedVector::from_vec(vector); let mut normal = Leaf { header: NodeHeaderAngular { norm: 0.0 }, vector: unaligned_vector }; Self::normalize(&mut normal); diff --git a/src/distance/dot_product.rs b/src/distance/dot_product.rs index 2f6c1b73..137f6eb1 100644 --- a/src/distance/dot_product.rs +++ b/src/distance/dot_product.rs @@ -96,7 +96,7 @@ impl Distance for DotProduct { node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf:: { header: NodeHeaderDotProduct { norm: 0.0, extra_dim: 0.0 }, - vector: Self::craft_owned_unaligned_vector_from_f32(vector), + vector: UnalignedVector::from_vec(vector), }; normal.header.extra_dim = node_p.header.extra_dim - node_q.header.extra_dim; Self::normalize(&mut normal); diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index a4a935d2..989270f3 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -56,7 +56,7 @@ impl Distance for Euclidean { node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, - vector: Self::craft_owned_unaligned_vector_from_f32(vector), + vector: UnalignedVector::from_vec(vector), }; Self::normalize(&mut normal); diff --git a/src/distance/manhattan.rs b/src/distance/manhattan.rs index 7036eedf..850e8508 100644 --- a/src/distance/manhattan.rs +++ b/src/distance/manhattan.rs @@ -59,7 +59,7 @@ impl Distance for Manhattan { node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { header: NodeHeaderManhattan { bias: 0.0 }, - vector: Self::craft_owned_unaligned_vector_from_f32(vector), + vector: UnalignedVector::from_vec(vector), }; Self::normalize(&mut normal); diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 78033e8a..25363595 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -34,12 +34,6 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { fn name() -> &'static str; - fn craft_owned_unaligned_vector_from_f32( - vector: Vec, - ) -> Cow<'static, UnalignedVector> { - UnalignedVector::from_vec(vector) - } - fn new_header(vector: &UnalignedVector) -> Self::Header; /// Returns a non-normalized distance. @@ -71,7 +65,7 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { let norm = Self::norm(node); if norm > 0.0 { let vec: Vec<_> = node.vector.iter().map(|x| x / norm).collect(); - node.vector = Self::craft_owned_unaligned_vector_from_f32(vec); + node.vector = UnalignedVector::from_vec(vec); } } @@ -84,7 +78,7 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { .zip(new_node.vector.iter()) .map(|(x, n)| (x * c + n / norm) / (c + 1.0)) .collect(); - mean.vector = Self::craft_owned_unaligned_vector_from_f32(vec); + mean.vector = UnalignedVector::from_vec(vec); } fn create_split<'a, R: Rng>( diff --git a/src/writer.rs b/src/writer.rs index 8e054e06..e4d2af29 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -57,7 +57,7 @@ impl Writer { match node { Node::Leaf(Leaf { header: _, vector }) => { let vector = vector.to_vec(); - let vector = ND::craft_owned_unaligned_vector_from_f32(vector); + let vector = UnalignedVector::from_vec(vector); let new_leaf = Node::Leaf(Leaf { header: ND::new_header(&vector), vector }); unsafe { // safety: We do not keep a reference to the current value, we own it. From ddb2304c69c54f1d6ed79ea85ab79e3ad3ca14b0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:37:03 +0200 Subject: [PATCH 11/61] get rid of the drain --- src/reader.rs | 8 ++------ src/writer.rs | 7 ++----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index c4f73c6e..cba75d59 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -131,12 +131,8 @@ impl<'t, D: Distance> Reader<'t, D> { /// Returns the vector for item `i` that was previously added. pub fn item_vector(&self, rtxn: &'t RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { - let mut vec = leaf.vector.to_vec(); - // Depending on the distance we may have additional elements in the vec that needs to be removed. - vec.drain(self.dimensions()..); - vec - })) + Ok(item_leaf(self.database, self.index, rtxn, item)? + .map(|leaf| leaf.vector.iter().take(self.dimensions).collect())) } /// Returns `true` if the index is empty. diff --git a/src/writer.rs b/src/writer.rs index e4d2af29..dd7715e8 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -87,11 +87,8 @@ impl Writer { /// Returns an `Option`al vector previous stored in this database. pub fn item_vector(&self, rtxn: &RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { - let mut vec = leaf.vector.to_vec(); - vec.drain(self.dimensions..); - vec - })) + Ok(item_leaf(self.database, self.index, rtxn, item)? + .map(|leaf| leaf.vector.iter().take(self.dimensions).collect())) } /// Returns `true` if the index is empty. From 903ec0b9421182f4b7753624ab6bd1de9c6f8e18 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:38:29 +0200 Subject: [PATCH 12/61] fix the spaces tests --- src/spaces/simple_avx.rs | 4 ++-- src/spaces/simple_sse.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spaces/simple_avx.rs b/src/spaces/simple_avx.rs index 182b04d1..32bed468 100644 --- a/src/spaces/simple_avx.rs +++ b/src/spaces/simple_avx.rs @@ -132,8 +132,8 @@ mod tests { 56., 57., 58., 59., 60., 61., ]; - let v1 = UnalignedF32Slice::from_slice(&v1[..]); - let v2 = UnalignedF32Slice::from_slice(&v2[..]); + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); let euclid_simd = unsafe { euclid_similarity_avx(v1, v2) }; let euclid = euclidean_distance_non_optimized(v1, v2); diff --git a/src/spaces/simple_sse.rs b/src/spaces/simple_sse.rs index 11d7533a..d8925da1 100644 --- a/src/spaces/simple_sse.rs +++ b/src/spaces/simple_sse.rs @@ -130,8 +130,8 @@ mod tests { 56., 57., 58., 59., 60., 61., ]; - let v1 = UnalignedF32Slice::from_slice(&v1[..]); - let v2 = UnalignedF32Slice::from_slice(&v2[..]); + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); let euclid_simd = unsafe { euclid_similarity_sse(v1, v2) }; let euclid = euclidean_distance_non_optimized(v1, v2); From ae6534508530f74e75c03f9e6ffce3bfa2f43a97 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:45:47 +0200 Subject: [PATCH 13/61] fix the spaces again --- src/spaces/simple_avx.rs | 8 ++++---- src/spaces/simple_sse.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/spaces/simple_avx.rs b/src/spaces/simple_avx.rs index 32bed468..d5dd78cc 100644 --- a/src/spaces/simple_avx.rs +++ b/src/spaces/simple_avx.rs @@ -135,12 +135,12 @@ mod tests { let v1 = UnalignedVector::from_slice(&v1[..]); let v2 = UnalignedVector::from_slice(&v2[..]); - let euclid_simd = unsafe { euclid_similarity_avx(v1, v2) }; - let euclid = euclidean_distance_non_optimized(v1, v2); + let euclid_simd = unsafe { euclid_similarity_avx(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); assert_eq!(euclid_simd, euclid); - let dot_simd = unsafe { dot_similarity_avx(v1, v2) }; - let dot = dot_product_non_optimized(v1, v2); + let dot_simd = unsafe { dot_similarity_avx(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); assert_eq!(dot_simd, dot); // let cosine_simd = unsafe { cosine_preprocess_avx(v1.clone()) }; diff --git a/src/spaces/simple_sse.rs b/src/spaces/simple_sse.rs index d8925da1..f679c24a 100644 --- a/src/spaces/simple_sse.rs +++ b/src/spaces/simple_sse.rs @@ -133,12 +133,12 @@ mod tests { let v1 = UnalignedVector::from_slice(&v1[..]); let v2 = UnalignedVector::from_slice(&v2[..]); - let euclid_simd = unsafe { euclid_similarity_sse(v1, v2) }; - let euclid = euclidean_distance_non_optimized(v1, v2); + let euclid_simd = unsafe { euclid_similarity_sse(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); assert_eq!(euclid_simd, euclid); - let dot_simd = unsafe { dot_similarity_sse(v1, v2) }; - let dot = dot_product_non_optimized(v1, v2); + let dot_simd = unsafe { dot_similarity_sse(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); assert_eq!(dot_simd, dot); // let cosine_simd = unsafe { cosine_preprocess_sse(v1.clone()) }; From 9ab541ecb65c6430888a14b9abeb3ade70b601cc Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:53:52 +0200 Subject: [PATCH 14/61] add a comment explaining the relevancy issue we may encounter --- src/distance/binary_quantized_euclidean.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 890f0da0..99a8bec6 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -75,5 +75,7 @@ impl Distance for BinaryQuantizedEuclidean { } fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. + // This may or may not impact relevancy since the 1s will be added to every vector. u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 } From 7907da9150c948abc1a53046b15d66f56b77d1ec Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Jul 2024 10:55:57 +0200 Subject: [PATCH 15/61] rename the vector format to vector codec --- src/distance/angular.rs | 12 ++++++------ src/distance/binary_quantized_euclidean.rs | 12 ++++++------ src/distance/dot_product.rs | 12 ++++++------ src/distance/euclidean.rs | 12 ++++++------ src/distance/manhattan.rs | 12 ++++++------ src/distance/mod.rs | 14 +++++++------- src/node.rs | 8 ++++---- 7 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/distance/angular.rs b/src/distance/angular.rs index f2805672..6e32ed10 100644 --- a/src/distance/angular.rs +++ b/src/distance/angular.rs @@ -25,13 +25,13 @@ pub struct NodeHeaderAngular { impl Distance for Angular { type Header = NodeHeaderAngular; - type VectorFormat = f32; + type VectorCodec = f32; fn name() -> &'static str { "angular" } - fn new_header(vector: &UnalignedVector) -> Self::Header { + fn new_header(vector: &UnalignedVector) -> Self::Header { NodeHeaderAngular { norm: Self::norm_no_header(vector) } } @@ -56,7 +56,7 @@ impl Distance for Angular { d } - fn norm_no_header(v: &UnalignedVector) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } @@ -67,7 +67,7 @@ impl Distance for Angular { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, true)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); @@ -79,8 +79,8 @@ impl Distance for Angular { } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32 { dot_product(p, q) } diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 99a8bec6..f2e6d5f9 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -26,13 +26,13 @@ pub struct NodeHeaderBinaryQuantizedEuclidean { impl Distance for BinaryQuantizedEuclidean { type Header = NodeHeaderBinaryQuantizedEuclidean; - type VectorFormat = unaligned_vector::BinaryQuantized; + type VectorCodec = unaligned_vector::BinaryQuantized; fn name() -> &'static str { "binary quantized euclidean" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 } } @@ -40,7 +40,7 @@ impl Distance for BinaryQuantizedEuclidean { dot_product(&p.vector, &q.vector) } - fn norm_no_header(v: &UnalignedVector) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } @@ -49,7 +49,7 @@ impl Distance for BinaryQuantizedEuclidean { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); @@ -67,8 +67,8 @@ impl Distance for BinaryQuantizedEuclidean { } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32 { dot_product(p, q) } diff --git a/src/distance/dot_product.rs b/src/distance/dot_product.rs index 137f6eb1..daaee9b4 100644 --- a/src/distance/dot_product.rs +++ b/src/distance/dot_product.rs @@ -30,13 +30,13 @@ pub struct NodeHeaderDotProduct { impl Distance for DotProduct { type Header = NodeHeaderDotProduct; - type VectorFormat = f32; + type VectorCodec = f32; fn name() -> &'static str { "dot-product" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { // We compute the norm when we preprocess the vector, before generating the tree nodes. NodeHeaderDotProduct { extra_dim: 0.0, norm: 0.0 } } @@ -66,7 +66,7 @@ impl Distance for DotProduct { (dot + leaf.header.extra_dim * leaf.header.extra_dim).sqrt() } - fn norm_no_header(v: &UnalignedVector) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } @@ -90,7 +90,7 @@ impl Distance for DotProduct { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, true)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); @@ -109,8 +109,8 @@ impl Distance for DotProduct { } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32 { dot_product(p, q) } diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index 989270f3..ae503e83 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -27,13 +27,13 @@ pub struct NodeHeaderEuclidean { impl Distance for Euclidean { type Header = NodeHeaderEuclidean; - type VectorFormat = f32; + type VectorCodec = f32; fn name() -> &'static str { "euclidean" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderEuclidean { bias: 0.0 } } @@ -41,7 +41,7 @@ impl Distance for Euclidean { euclidean_distance(&p.vector, &q.vector) } - fn norm_no_header(v: &UnalignedVector) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } @@ -50,7 +50,7 @@ impl Distance for Euclidean { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec<_> = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); @@ -76,8 +76,8 @@ impl Distance for Euclidean { } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32 { dot_product(p, q) } diff --git a/src/distance/manhattan.rs b/src/distance/manhattan.rs index 850e8508..4f1d957c 100644 --- a/src/distance/manhattan.rs +++ b/src/distance/manhattan.rs @@ -26,13 +26,13 @@ pub struct NodeHeaderManhattan { impl Distance for Manhattan { type Header = NodeHeaderManhattan; - type VectorFormat = f32; + type VectorCodec = f32; fn name() -> &'static str { "manhattan" } - fn new_header(_vector: &UnalignedVector) -> Self::Header { + fn new_header(_vector: &UnalignedVector) -> Self::Header { NodeHeaderManhattan { bias: 0.0 } } @@ -44,7 +44,7 @@ impl Distance for Manhattan { d.max(0.0) } - fn norm_no_header(v: &UnalignedVector) -> f32 { + fn norm_no_header(v: &UnalignedVector) -> f32 { dot_product(v, v).sqrt() } @@ -53,7 +53,7 @@ impl Distance for Manhattan { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>> { + ) -> heed::Result>> { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec<_> = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); @@ -79,8 +79,8 @@ impl Distance for Manhattan { } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32 { dot_product(p, q) } diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 25363595..ed05f869 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -30,11 +30,11 @@ mod manhattan; pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { /// A header structure with informations related to the type Header: Pod + Zeroable + fmt::Debug; - type VectorFormat: UnalignedVectorCodec; + type VectorCodec: UnalignedVectorCodec; fn name() -> &'static str; - fn new_header(vector: &UnalignedVector) -> Self::Header; + fn new_header(vector: &UnalignedVector) -> Self::Header; /// Returns a non-normalized distance. fn built_distance(p: &Leaf, q: &Leaf) -> f32; @@ -59,7 +59,7 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { Self::norm_no_header(&leaf.vector) } - fn norm_no_header(v: &UnalignedVector) -> f32; + fn norm_no_header(v: &UnalignedVector) -> f32; fn normalize(node: &mut Leaf) { let norm = Self::norm(node); @@ -84,19 +84,19 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { fn create_split<'a, R: Rng>( children: &'a ImmutableSubsetLeafs, rng: &mut R, - ) -> heed::Result>>; + ) -> heed::Result>>; fn margin(p: &Leaf, q: &Leaf) -> f32 { Self::margin_no_header(&p.vector, &q.vector) } fn margin_no_header( - p: &UnalignedVector, - q: &UnalignedVector, + p: &UnalignedVector, + q: &UnalignedVector, ) -> f32; fn side( - normal_plane: &UnalignedVector, + normal_plane: &UnalignedVector, node: &Leaf, rng: &mut R, ) -> Side { diff --git a/src/node.rs b/src/node.rs index 197dcd56..b2bc1cc8 100644 --- a/src/node.rs +++ b/src/node.rs @@ -38,7 +38,7 @@ pub struct Leaf<'a, D: Distance> { /// The header of this leaf. pub header: D::Header, /// The vector of this leaf. - pub vector: Cow<'a, UnalignedVector>, + pub vector: Cow<'a, UnalignedVector>, } impl fmt::Debug for Leaf<'_, D> { @@ -115,7 +115,7 @@ impl fmt::Debug for ItemIds<'_> { pub struct SplitPlaneNormal<'a, D: Distance> { pub left: NodeId, pub right: NodeId, - pub normal: Cow<'a, UnalignedVector>, + pub normal: Cow<'a, UnalignedVector>, } impl fmt::Debug for SplitPlaneNormal<'_, D> { @@ -171,7 +171,7 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { [LEAF_TAG, bytes @ ..] => { let (header_bytes, remaining) = bytes.split_at(size_of::()); let header = pod_read_unaligned(header_bytes); - let vector = UnalignedVector::::from_bytes(remaining)?; + let vector = UnalignedVector::::from_bytes(remaining)?; Ok(Node::Leaf(Leaf { header, vector })) } @@ -179,7 +179,7 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { let (left, bytes) = NodeId::from_bytes(bytes); let (right, bytes) = NodeId::from_bytes(bytes); Ok(Node::SplitPlaneNormal(SplitPlaneNormal { - normal: UnalignedVector::::from_bytes(bytes)?, + normal: UnalignedVector::::from_bytes(bytes)?, left, right, })) From 47fd2bdd7f59efa84ddd9ea5dcb1b10c32b4849c Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 Jul 2024 16:00:10 +0200 Subject: [PATCH 16/61] fix the normalized distance for the binary quantized euclidean distance --- src/distance/angular.rs | 2 +- src/distance/binary_quantized_euclidean.rs | 19 +++++++++++++++++-- src/distance/dot_product.rs | 2 +- src/distance/manhattan.rs | 2 +- src/distance/mod.rs | 2 +- src/reader.rs | 2 +- 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/distance/angular.rs b/src/distance/angular.rs index 6e32ed10..2e2520f6 100644 --- a/src/distance/angular.rs +++ b/src/distance/angular.rs @@ -52,7 +52,7 @@ impl Distance for Angular { } } - fn normalized_distance(d: f32) -> f32 { + fn normalized_distance(d: f32, _dimensions: usize) -> f32 { d } diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index f2e6d5f9..278a77ce 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -37,11 +37,17 @@ impl Distance for BinaryQuantizedEuclidean { } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - dot_product(&p.vector, &q.vector) + squared_euclidean_distance(&p.vector, &q.vector) + } + + /// Normalizes the distance returned by the distance method. + fn normalized_distance(d: f32, dimensions: usize) -> f32 { + d / dimensions as f32 } fn norm_no_header(v: &UnalignedVector) -> f32 { - dot_product(v, v).sqrt() + let ones = v.as_bytes().iter().map(|b| b.count_ones()).sum::() as f32; + ones.sqrt() } fn init(_node: &mut Leaf) {} @@ -75,6 +81,15 @@ impl Distance for BinaryQuantizedEuclidean { } fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. + // This may or may not impact relevancy since the 1s will be added to every vector. + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 +} + +fn squared_euclidean_distance( + u: &UnalignedVector, + v: &UnalignedVector, +) -> f32 { // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. // This may or may not impact relevancy since the 1s will be added to every vector. u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 diff --git a/src/distance/dot_product.rs b/src/distance/dot_product.rs index daaee9b4..6a948c98 100644 --- a/src/distance/dot_product.rs +++ b/src/distance/dot_product.rs @@ -70,7 +70,7 @@ impl Distance for DotProduct { dot_product(v, v).sqrt() } - fn normalized_distance(d: f32) -> f32 { + fn normalized_distance(d: f32, _dimension: usize) -> f32 { -d } diff --git a/src/distance/manhattan.rs b/src/distance/manhattan.rs index 4f1d957c..ae4ba4d5 100644 --- a/src/distance/manhattan.rs +++ b/src/distance/manhattan.rs @@ -40,7 +40,7 @@ impl Distance for Manhattan { p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() } - fn normalized_distance(d: f32) -> f32 { + fn normalized_distance(d: f32, _dimension: usize) -> f32 { d.max(0.0) } diff --git a/src/distance/mod.rs b/src/distance/mod.rs index ed05f869..237ba246 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -44,7 +44,7 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { } /// Normalizes the distance returned by the distance method. - fn normalized_distance(d: f32) -> f32 { + fn normalized_distance(d: f32, _dimensions: usize) -> f32 { d.sqrt() } diff --git a/src/reader.rs b/src/reader.rs index cba75d59..f9e2ba05 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -276,7 +276,7 @@ impl<'t, D: Distance> Reader<'t, D> { if output.len() == capacity { break; } - output.push((item, D::normalized_distance(dist))); + output.push((item, D::normalized_distance(dist, self.dimensions))); } Ok(output) From 4c853b85b12e8191bd38889f644fc16460aaceb6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 Jul 2024 16:01:04 +0200 Subject: [PATCH 17/61] add a first test of relevancy that uses autogenerated vectors --- examples/relevancy.rs | 139 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 examples/relevancy.rs diff --git a/examples/relevancy.rs b/examples/relevancy.rs new file mode 100644 index 00000000..633c5d58 --- /dev/null +++ b/examples/relevancy.rs @@ -0,0 +1,139 @@ +use rand::seq::SliceRandom; + +use arroy::distances::{Angular, BinaryQuantizedEuclidean, DotProduct, Euclidean, Manhattan}; +use arroy::internals::{self, Leaf, NodeCodec, UnalignedVector}; +use arroy::{Database, Distance, ItemId, Result, Writer}; +use heed::{EnvOpenOptions, RwTxn}; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +const TWENTY_HUNDRED_MIB: usize = 2 * 1024 * 1024 * 1024; + +const NUMBER_VECTORS: usize = 4_000; +// The openAI dimensions +const VECTOR_DIMENSIONS: usize = 256; +// const VECTOR_DIMENSIONS: usize = 512; +// const VECTOR_DIMENSIONS: usize = 1024; +// const VECTOR_DIMENSIONS: usize = 1536; +// const VECTOR_DIMENSIONS: usize = 3072; + +fn main() { + for (distance_name, func) in &[ + (Angular::name(), &measure_distance:: as &dyn Fn(usize)), + (Euclidean::name(), &measure_distance:: as &dyn Fn(usize)), + (Manhattan::name(), &measure_distance:: as &dyn Fn(usize)), + (DotProduct::name(), &measure_distance:: as &dyn Fn(usize)), + ( + BinaryQuantizedEuclidean::name(), + &measure_distance:: as &dyn Fn(usize), + ), + ] { + println!("{distance_name}"); + for number_fetched in [1, 10, 50, 100] { + (func)(number_fetched); + } + println!(); + } +} + +fn measure_distance(number_fetched: usize) { + let dir = tempfile::tempdir().unwrap(); + let env = + unsafe { EnvOpenOptions::new().map_size(TWENTY_HUNDRED_MIB).open(dir.path()) }.unwrap(); + + let mut rng = StdRng::seed_from_u64(13); + let points = generate_points(&mut rng, NUMBER_VECTORS, VECTOR_DIMENSIONS); + let mut wtxn = env.write_txn().unwrap(); + + let database = env + .create_database::>(&mut wtxn, None) + .unwrap(); + load_into_arroy(&mut rng, &mut wtxn, database, VECTOR_DIMENSIONS, &points).unwrap(); + + let reader = arroy::Reader::open(&wtxn, 0, database).unwrap(); + + let querying = points.choose(&mut rng).unwrap(); + + let relevant = partial_sort_by::( + points.iter().map(|(i, v)| (*i, v.as_slice())), + &querying.1, + number_fetched, + ); + + let arroy = reader.nns_by_item(&wtxn, querying.0, number_fetched, None, None).unwrap().unwrap(); + + let mut correctly_retrieved = 0; + for ret in arroy { + if relevant.iter().any(|(id, _, _)| *id == ret.0) { + correctly_retrieved += 1; + } + } + + println!("recall@{number_fetched}: {}", correctly_retrieved as f32 / relevant.len() as f32); +} + +fn partial_sort_by<'a, D: Distance>( + mut vectors: impl Iterator, + sort_by: &[f32], + elements: usize, +) -> Vec<(ItemId, &'a [f32], f32)> { + let mut ret = Vec::with_capacity(elements); + ret.extend(vectors.by_ref().take(elements).map(|(i, v)| (i, v, distance::(sort_by, v)))); + ret.sort_by(|(_, _, left), (_, _, right)| left.total_cmp(right)); + + if ret.is_empty() { + return ret; + } + + for (item_id, vector) in vectors { + let distance = distance::(sort_by, vector); + if distance < ret.last().unwrap().2 { + match ret.binary_search_by(|(_, _, d)| d.total_cmp(&distance)) { + Ok(i) | Err(i) => { + ret.pop(); + ret.insert(i, (item_id, vector, distance)) + } + } + } + } + + ret +} + +fn distance(left: &[f32], right: &[f32]) -> f32 { + let left = UnalignedVector::from_slice(left); + let left = Leaf { header: D::new_header(&left), vector: left }; + let right = UnalignedVector::from_slice(right); + let right = Leaf { header: D::new_header(&right), vector: right }; + + D::built_distance(&left, &right) +} + +fn load_into_arroy( + rng: &mut StdRng, + wtxn: &mut RwTxn, + database: Database, + dimensions: usize, + points: &[(ItemId, Vec)], +) -> Result<()> { + let writer = Writer::::new(database, 0, dimensions); + for (i, vector) in points.iter() { + writer.add_item(wtxn, *i, &vector[..])?; + } + writer.build(wtxn, rng, None)?; + + Ok(()) +} + +fn generate_points(mut rng: R, count: usize, dimensions: usize) -> Vec<(ItemId, Vec)> { + let mut points = Vec::with_capacity(count); + for item_id in 0..count { + let mut vector = vec![0.0; dimensions]; + for scalar in &mut vector { + *scalar = rng.gen_range(-1.0..1.0); + } + // rng.try_fill(&mut vector[..]).unwrap(); + points.push((item_id.try_into().unwrap(), vector)); + } + points +} From 7f004e82a3c8c42d6d4c43efd0d06656ede941d3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 Jul 2024 16:26:11 +0200 Subject: [PATCH 18/61] add an implementation for the binary quantized manhattan distance --- examples/relevancy.rs | 14 +++- src/distance/binary_quantized_manhattan.rs | 93 ++++++++++++++++++++++ src/distance/mod.rs | 4 + src/lib.rs | 7 +- 4 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 src/distance/binary_quantized_manhattan.rs diff --git a/examples/relevancy.rs b/examples/relevancy.rs index 633c5d58..1558a6a5 100644 --- a/examples/relevancy.rs +++ b/examples/relevancy.rs @@ -1,6 +1,8 @@ use rand::seq::SliceRandom; -use arroy::distances::{Angular, BinaryQuantizedEuclidean, DotProduct, Euclidean, Manhattan}; +use arroy::distances::{ + Angular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, DotProduct, Euclidean, Manhattan, +}; use arroy::internals::{self, Leaf, NodeCodec, UnalignedVector}; use arroy::{Database, Distance, ItemId, Result, Writer}; use heed::{EnvOpenOptions, RwTxn}; @@ -9,12 +11,12 @@ use rand::{Rng, SeedableRng}; const TWENTY_HUNDRED_MIB: usize = 2 * 1024 * 1024 * 1024; -const NUMBER_VECTORS: usize = 4_000; +const NUMBER_VECTORS: usize = 10_000; // The openAI dimensions -const VECTOR_DIMENSIONS: usize = 256; +// const VECTOR_DIMENSIONS: usize = 256; // const VECTOR_DIMENSIONS: usize = 512; // const VECTOR_DIMENSIONS: usize = 1024; -// const VECTOR_DIMENSIONS: usize = 1536; +const VECTOR_DIMENSIONS: usize = 1536; // const VECTOR_DIMENSIONS: usize = 3072; fn main() { @@ -27,6 +29,10 @@ fn main() { BinaryQuantizedEuclidean::name(), &measure_distance:: as &dyn Fn(usize), ), + ( + BinaryQuantizedManhattan::name(), + &measure_distance:: as &dyn Fn(usize), + ), ] { println!("{distance_name}"); for number_fetched in [1, 10, 50, 100] { diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs new file mode 100644 index 00000000..55d3fd89 --- /dev/null +++ b/src/distance/binary_quantized_manhattan.rs @@ -0,0 +1,93 @@ +use std::borrow::Cow; + +use bytemuck::{Pod, Zeroable}; +use rand::Rng; + +use super::two_means; +use crate::distance::Distance; +use crate::node::Leaf; +use crate::parallel::ImmutableSubsetLeafs; +use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; + +/// A taxicab geometry or a Manhattan geometry is a geometry whose usual distance function +/// or metric of Euclidean geometry is replaced by a new metric in which the distance between +/// two points is the sum of the absolute differences of their Cartesian coordinates. +#[derive(Debug, Clone)] +pub enum BinaryQuantizedManhattan {} + +/// The header of BinaryQuantizedEuclidean leaf nodes. +#[repr(C)] +#[derive(Pod, Zeroable, Debug, Clone, Copy)] +pub struct NodeHeaderBinaryQuantizedManhattan { + /// An extra constant term to determine the offset of the plane + bias: f32, +} + +impl Distance for BinaryQuantizedManhattan { + type Header = NodeHeaderBinaryQuantizedManhattan; + type VectorCodec = unaligned_vector::BinaryQuantized; + + fn name() -> &'static str { + "binary quantized manhattan" + } + + fn new_header(_vector: &UnalignedVector) -> Self::Header { + NodeHeaderBinaryQuantizedManhattan { bias: 0.0 } + } + + fn built_distance(p: &Leaf, q: &Leaf) -> f32 { + manhattan_distance(&p.vector, &q.vector) + } + + /// Normalizes the distance returned by the distance method. + fn normalized_distance(d: f32, dimensions: usize) -> f32 { + d.max(0.0) / dimensions as f32 + } + + fn norm_no_header(v: &UnalignedVector) -> f32 { + let ones = v.as_bytes().iter().map(|b| b.count_ones()).sum::() as f32; + ones.sqrt() + } + + fn init(_node: &mut Leaf) {} + + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, + rng: &mut R, + ) -> heed::Result>> { + let [node_p, node_q] = two_means(rng, children, false)?; + let vector: Vec = + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); + let mut normal = Leaf { + header: NodeHeaderBinaryQuantizedManhattan { bias: 0.0 }, + vector: UnalignedVector::from_slice(&vector), + }; + Self::normalize(&mut normal); + + Ok(Cow::Owned(normal.vector.into_owned())) + } + + fn margin(p: &Leaf, q: &Leaf) -> f32 { + p.header.bias + dot_product(&p.vector, &q.vector) + } + + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { + dot_product(p, q) + } +} + +fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. + // This may or may not impact relevancy since the 1s will be added to every vector. + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 +} + +fn manhattan_distance( + u: &UnalignedVector, + v: &UnalignedVector, +) -> f32 { + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 +} diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 237ba246..a8d8df57 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -5,6 +5,9 @@ pub use angular::{Angular, NodeHeaderAngular}; pub use binary_quantized_euclidean::{ BinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedEuclidean, }; +pub use binary_quantized_manhattan::{ + BinaryQuantizedManhattan, NodeHeaderBinaryQuantizedManhattan, +}; use bytemuck::{Pod, Zeroable}; pub use dot_product::{DotProduct, NodeHeaderDotProduct}; pub use euclidean::{Euclidean, NodeHeaderEuclidean}; @@ -20,6 +23,7 @@ use crate::NodeCodec; mod angular; mod binary_quantized_euclidean; +mod binary_quantized_manhattan; mod dot_product; mod euclidean; mod manhattan; diff --git a/src/lib.rs b/src/lib.rs index 0c0e873b..c9a1bb49 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,8 +105,8 @@ pub mod internals { use rand::Rng; pub use crate::distance::{ - NodeHeaderAngular, NodeHeaderBinaryQuantizedEuclidean, NodeHeaderDotProduct, - NodeHeaderEuclidean, NodeHeaderManhattan, + NodeHeaderAngular, NodeHeaderBinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedManhattan, + NodeHeaderDotProduct, NodeHeaderEuclidean, NodeHeaderManhattan, }; pub use crate::key::KeyCodec; pub use crate::node::{Leaf, NodeCodec}; @@ -136,7 +136,8 @@ pub mod internals { /// The set of distances implementing the [`Distance`] and supported by arroy. pub mod distances { pub use crate::distance::{ - Angular, BinaryQuantizedEuclidean, DotProduct, Euclidean, Manhattan, + Angular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, DotProduct, Euclidean, + Manhattan, }; } From b6ea5609eee2d8be0c99f5780dd9a91b53573f2a Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 Jul 2024 17:11:09 +0200 Subject: [PATCH 19/61] improve lisibility of the relevancy benchmark --- examples/relevancy.rs | 88 +++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/examples/relevancy.rs b/examples/relevancy.rs index 1558a6a5..43fd981a 100644 --- a/examples/relevancy.rs +++ b/examples/relevancy.rs @@ -1,3 +1,5 @@ +use std::fmt; + use rand::seq::SliceRandom; use arroy::distances::{ @@ -11,50 +13,97 @@ use rand::{Rng, SeedableRng}; const TWENTY_HUNDRED_MIB: usize = 2 * 1024 * 1024 * 1024; -const NUMBER_VECTORS: usize = 10_000; -// The openAI dimensions -// const VECTOR_DIMENSIONS: usize = 256; -// const VECTOR_DIMENSIONS: usize = 512; -// const VECTOR_DIMENSIONS: usize = 1024; -const VECTOR_DIMENSIONS: usize = 1536; -// const VECTOR_DIMENSIONS: usize = 3072; +const NUMBER_VECTORS: usize = 4_000; fn main() { + let dimensions_tested = [256, 512, 1024, 1536, 3072]; + let recall_tested = [1, 10, 50, 100]; + + println!("Testing the following dimensions: @{dimensions_tested:?}"); + println!("Testing the following recall: @{recall_tested:?}"); + println!("Starting..."); + println!(); + for (distance_name, func) in &[ - (Angular::name(), &measure_distance:: as &dyn Fn(usize)), - (Euclidean::name(), &measure_distance:: as &dyn Fn(usize)), - (Manhattan::name(), &measure_distance:: as &dyn Fn(usize)), - (DotProduct::name(), &measure_distance:: as &dyn Fn(usize)), + (Angular::name(), &measure_distance:: as &dyn Fn(usize, usize) -> f32), + ( + Euclidean::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, + ), + ( + Manhattan::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, + ), + ( + DotProduct::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, + ), ( BinaryQuantizedEuclidean::name(), - &measure_distance:: as &dyn Fn(usize), + &measure_distance:: + as &dyn Fn(usize, usize) -> f32, ), ( BinaryQuantizedManhattan::name(), - &measure_distance:: as &dyn Fn(usize), + &measure_distance:: + as &dyn Fn(usize, usize) -> f32, ), ] { + let now = std::time::Instant::now(); println!("{distance_name}"); - for number_fetched in [1, 10, 50, 100] { - (func)(number_fetched); + // The openAI dimensions + for dimensions in [256, 512, 1024, 1536, 3072] { + let mut recall = Vec::new(); + for number_fetched in recall_tested { + let rec = (func)(number_fetched, dimensions); + recall.push(Recall(rec)); + } + println!("For {dimensions:4} dim, recall: {recall:3?}"); } + println!("Took {:?}", now.elapsed()); println!(); } } -fn measure_distance(number_fetched: usize) { +struct Recall(f32); + +impl fmt::Debug for Recall { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + // red + f32::NEG_INFINITY..=0.25 => write!(f, "\x1b[1;31m")?, + // yellow + 0.25..=0.5 => write!(f, "\x1b[1;33m")?, + // green + 0.5..=0.75 => write!(f, "\x1b[1;32m")?, + // blue + 0.75..=0.90 => write!(f, "\x1b[1;34m")?, + // cyan + 0.90..=0.999 => write!(f, "\x1b[1;36m")?, + // underlined cyan + 0.999..=f32::INFINITY => write!(f, "\x1b[1;4;36m")?, + _ => (), + } + write!(f, "{:.2}\x1b[0m", self.0) + } +} + +fn measure_distance( + number_fetched: usize, + dimensions: usize, +) -> f32 { let dir = tempfile::tempdir().unwrap(); let env = unsafe { EnvOpenOptions::new().map_size(TWENTY_HUNDRED_MIB).open(dir.path()) }.unwrap(); let mut rng = StdRng::seed_from_u64(13); - let points = generate_points(&mut rng, NUMBER_VECTORS, VECTOR_DIMENSIONS); + let points = generate_points(&mut rng, NUMBER_VECTORS, dimensions); let mut wtxn = env.write_txn().unwrap(); let database = env .create_database::>(&mut wtxn, None) .unwrap(); - load_into_arroy(&mut rng, &mut wtxn, database, VECTOR_DIMENSIONS, &points).unwrap(); + load_into_arroy(&mut rng, &mut wtxn, database, dimensions, &points).unwrap(); let reader = arroy::Reader::open(&wtxn, 0, database).unwrap(); @@ -75,7 +124,8 @@ fn measure_distance(number_f } } - println!("recall@{number_fetched}: {}", correctly_retrieved as f32 / relevant.len() as f32); + // println!("recall@{number_fetched}: {}", correctly_retrieved as f32 / relevant.len() as f32); + correctly_retrieved as f32 / relevant.len() as f32 } fn partial_sort_by<'a, D: Distance>( From 0e8fba237a8d3a4fe2cd0cd16351aeec60592f87 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 10 Jul 2024 17:28:24 +0200 Subject: [PATCH 20/61] implements a basic oversampling --- examples/relevancy.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/examples/relevancy.rs b/examples/relevancy.rs index 43fd981a..5e7edf85 100644 --- a/examples/relevancy.rs +++ b/examples/relevancy.rs @@ -12,15 +12,18 @@ use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; const TWENTY_HUNDRED_MIB: usize = 2 * 1024 * 1024 * 1024; - const NUMBER_VECTORS: usize = 4_000; +const OVERSAMPLING: usize = 3; fn main() { let dimensions_tested = [256, 512, 1024, 1536, 3072]; let recall_tested = [1, 10, 50, 100]; + let color: Vec<_> = (0..=100).step_by(10).map(|i| Recall(i as f32 / 100.0)).collect(); - println!("Testing the following dimensions: @{dimensions_tested:?}"); + println!("Testing the following dimensions: {dimensions_tested:?}"); println!("Testing the following recall: @{recall_tested:?}"); + println!("Oversampling of: x{OVERSAMPLING}"); + println!("With color code: {color:?}"); println!("Starting..."); println!(); @@ -115,7 +118,11 @@ fn measure_distance( number_fetched, ); - let arroy = reader.nns_by_item(&wtxn, querying.0, number_fetched, None, None).unwrap().unwrap(); + let mut arroy = reader + .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None) + .unwrap() + .unwrap(); + arroy.truncate(number_fetched); let mut correctly_retrieved = 0; for ret in arroy { From 9f664489d045139b8d2be442cb4895e3b956e350 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 5 Aug 2024 15:45:17 +0200 Subject: [PATCH 21/61] compute two_means on non binary quantized distances --- examples/relevancy.rs | 63 +++++++++++--------- src/distance/binary_quantized_euclidean.rs | 46 +++++++++++++-- src/distance/binary_quantized_manhattan.rs | 45 ++++++++++++-- src/distance/euclidean.rs | 3 +- src/distance/mod.rs | 69 +++++++++++++++++++++- src/lib.rs | 9 +-- src/unaligned_vector/binary_quantized.rs | 7 ++- 7 files changed, 194 insertions(+), 48 deletions(-) diff --git a/examples/relevancy.rs b/examples/relevancy.rs index 5e7edf85..cc464d4c 100644 --- a/examples/relevancy.rs +++ b/examples/relevancy.rs @@ -3,7 +3,8 @@ use std::fmt; use rand::seq::SliceRandom; use arroy::distances::{ - Angular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, DotProduct, Euclidean, Manhattan, + Angular, BinaryQuantizedAngular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, + DotProduct, Euclidean, Manhattan, }; use arroy::internals::{self, Leaf, NodeCodec, UnalignedVector}; use arroy::{Database, Distance, ItemId, Result, Writer}; @@ -28,28 +29,32 @@ fn main() { println!(); for (distance_name, func) in &[ + ( + BinaryQuantizedAngular::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, + ), (Angular::name(), &measure_distance:: as &dyn Fn(usize, usize) -> f32), ( - Euclidean::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, + BinaryQuantizedManhattan::name(), + &measure_distance:: + as &dyn Fn(usize, usize) -> f32, ), ( Manhattan::name(), &measure_distance:: as &dyn Fn(usize, usize) -> f32, ), - ( - DotProduct::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, - ), ( BinaryQuantizedEuclidean::name(), &measure_distance:: as &dyn Fn(usize, usize) -> f32, ), ( - BinaryQuantizedManhattan::name(), - &measure_distance:: - as &dyn Fn(usize, usize) -> f32, + Euclidean::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, + ), + ( + DotProduct::name(), + &measure_distance:: as &dyn Fn(usize, usize) -> f32, ), ] { let now = std::time::Instant::now(); @@ -110,29 +115,31 @@ fn measure_distance( let reader = arroy::Reader::open(&wtxn, 0, database).unwrap(); - let querying = points.choose(&mut rng).unwrap(); - - let relevant = partial_sort_by::( - points.iter().map(|(i, v)| (*i, v.as_slice())), - &querying.1, - number_fetched, - ); - - let mut arroy = reader - .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None) - .unwrap() - .unwrap(); - arroy.truncate(number_fetched); - let mut correctly_retrieved = 0; - for ret in arroy { - if relevant.iter().any(|(id, _, _)| *id == ret.0) { - correctly_retrieved += 1; + for _ in 0..100 { + let querying = points.choose(&mut rng).unwrap(); + + let relevant = partial_sort_by::( + points.iter().map(|(i, v)| (*i, v.as_slice())), + &querying.1, + number_fetched, + ); + + let mut arroy = reader + .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None) + .unwrap() + .unwrap(); + arroy.truncate(number_fetched); + + for ret in arroy { + if relevant.iter().any(|(id, _, _)| *id == ret.0) { + correctly_retrieved += 1; + } } } // println!("recall@{number_fetched}: {}", correctly_retrieved as f32 / relevant.len() as f32); - correctly_retrieved as f32 / relevant.len() as f32 + correctly_retrieved as f32 / (number_fetched as f32 * 100.0) } fn partial_sort_by<'a, D: Distance>( diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 278a77ce..724fb0ed 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use bytemuck::{Pod, Zeroable}; use rand::Rng; -use super::two_means; +use super::{two_means_binary_quantized as two_means, Euclidean}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; @@ -46,8 +46,7 @@ impl Distance for BinaryQuantizedEuclidean { } fn norm_no_header(v: &UnalignedVector) -> f32 { - let ones = v.as_bytes().iter().map(|b| b.count_ones()).sum::() as f32; - ones.sqrt() + dot_product(v, v).sqrt() } fn init(_node: &mut Leaf) {} @@ -56,7 +55,7 @@ impl Distance for BinaryQuantizedEuclidean { children: &'a ImmutableSubsetLeafs, rng: &mut R, ) -> heed::Result>> { - let [node_p, node_q] = two_means(rng, children, false)?; + let [node_p, node_q] = two_means::(rng, children, false)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { @@ -80,10 +79,35 @@ impl Distance for BinaryQuantizedEuclidean { } } +fn bits(mut word: u8) -> [f32; 8] { + let mut ret = [0.0; 8]; + for i in 0..8 { + let bit = word & 1; + word >>= 1; + if bit == 0 { + ret[i] = -1.0; + } else { + ret[i] = 1.0; + } + } + + ret +} + fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. // This may or may not impact relevancy since the 1s will be added to every vector. - u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| u * v) + }) + .sum::() } fn squared_euclidean_distance( @@ -92,5 +116,15 @@ fn squared_euclidean_distance( ) -> f32 { // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. // This may or may not impact relevancy since the 1s will be added to every vector. - u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| (u - v) * (u - v)) + }) + .sum::() } diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs index 55d3fd89..3a724f10 100644 --- a/src/distance/binary_quantized_manhattan.rs +++ b/src/distance/binary_quantized_manhattan.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use bytemuck::{Pod, Zeroable}; use rand::Rng; -use super::two_means; +use super::{two_means_binary_quantized as two_means, Manhattan}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; @@ -45,7 +45,7 @@ impl Distance for BinaryQuantizedManhattan { } fn norm_no_header(v: &UnalignedVector) -> f32 { - let ones = v.as_bytes().iter().map(|b| b.count_ones()).sum::() as f32; + let ones = v.as_bytes().iter().flat_map(|b| bits(*b)).sum::(); ones.sqrt() } @@ -55,7 +55,7 @@ impl Distance for BinaryQuantizedManhattan { children: &'a ImmutableSubsetLeafs, rng: &mut R, ) -> heed::Result>> { - let [node_p, node_q] = two_means(rng, children, false)?; + let [node_p, node_q] = two_means::(rng, children, false)?; let vector: Vec = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); let mut normal = Leaf { @@ -79,15 +79,50 @@ impl Distance for BinaryQuantizedManhattan { } } +fn bits(mut word: u8) -> [f32; 8] { + let mut ret = [0.0; 8]; + for i in 0..8 { + let bit = word & 1; + word >>= 1; + if bit == 0 { + ret[i] = -1.0; + } else { + ret[i] = 1.0; + } + } + + ret +} + fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. // This may or may not impact relevancy since the 1s will be added to every vector. - u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| u * v) + }) + .sum::() } fn manhattan_distance( u: &UnalignedVector, v: &UnalignedVector, ) -> f32 { - u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| (u - v).abs()) + }) + .sum::() } diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index ae503e83..1c0b2f54 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -54,11 +54,10 @@ impl Distance for Euclidean { let [node_p, node_q] = two_means(rng, children, false)?; let vector: Vec<_> = node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); - let mut normal = Leaf { + let mut normal: Leaf<'static, Self> = Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: UnalignedVector::from_vec(vector), }; - Self::normalize(&mut normal); normal.header.bias = normal .vector diff --git a/src/distance/mod.rs b/src/distance/mod.rs index a8d8df57..8871e0a4 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::fmt; pub use angular::{Angular, NodeHeaderAngular}; +pub use binary_quantized_angular::{BinaryQuantizedAngular, NodeHeaderBinaryQuantizedAngular}; pub use binary_quantized_euclidean::{ BinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedEuclidean, }; @@ -22,12 +23,18 @@ use crate::unaligned_vector::{UnalignedVector, UnalignedVectorCodec}; use crate::NodeCodec; mod angular; +mod binary_quantized_angular; mod binary_quantized_euclidean; mod binary_quantized_manhattan; mod dot_product; mod euclidean; mod manhattan; +fn new_leaf(vec: Vec) -> Leaf<'static, D> { + let vector = UnalignedVector::from_vec(vec); + Leaf { header: D::new_header(&vector), vector } +} + /// A trait used by arroy to compute the distances, /// compute the split planes, and normalize user vectors. #[allow(missing_docs)] @@ -137,8 +144,7 @@ fn two_means( const ITERATION_STEPS: usize = 200; let [leaf_p, leaf_q] = leafs.choose_two(rng)?.unwrap(); - let mut leaf_p = leaf_p.into_owned(); - let mut leaf_q = leaf_q.into_owned(); + let (mut leaf_p, mut leaf_q) = (leaf_p.into_owned(), leaf_q.into_owned()); if cosine { D::normalize(&mut leaf_p); @@ -171,3 +177,62 @@ fn two_means( Ok([leaf_p, leaf_q]) } + +pub fn two_means_binary_quantized( + rng: &mut R, + leafs: &ImmutableSubsetLeafs, + cosine: bool, +) -> heed::Result<[Leaf<'static, D>; 2]> { + // This algorithm is a huge heuristic. Empirically it works really well, but I + // can't motivate it well. The basic idea is to keep two centroids and assign + // points to either one of them. We weight each centroid by the number of points + // assigned to it, so to balance it. + + const ITERATION_STEPS: usize = 200; + + let [leaf_p, leaf_q] = leafs.choose_two(rng)?.unwrap(); + let mut leaf_p: Leaf<'static, NonBqDist> = new_leaf(leaf_p.vector.iter().collect()); + let mut leaf_q: Leaf<'static, NonBqDist> = new_leaf(leaf_q.vector.iter().collect()); + + if cosine { + NonBqDist::normalize(&mut leaf_p); + NonBqDist::normalize(&mut leaf_q); + } + + NonBqDist::init(&mut leaf_p); + NonBqDist::init(&mut leaf_q); + + let mut ic = 1.0; + let mut jc = 1.0; + for _ in 0..ITERATION_STEPS { + let node_k = leafs.choose(rng)?.unwrap(); + let node_k: Leaf<'static, NonBqDist> = new_leaf(node_k.vector.iter().collect()); + let di = ic * NonBqDist::non_built_distance(&leaf_p, &node_k); + let dj = jc * NonBqDist::non_built_distance(&leaf_q, &node_k); + let norm = if cosine { NonBqDist::norm(&node_k) } else { 1.0 }; + if norm.is_nan() || norm <= 0.0 { + continue; + } + if di < dj { + // update_mean(&mut leaf_p, node_k.vector.iter(), norm, ic); + Distance::update_mean(&mut leaf_p, &node_k, norm, ic); + Distance::init(&mut leaf_p); + ic += 1.0; + } else if dj < di { + // update_mean(&mut leaf_q, node_k.vector.iter(), norm, jc); + Distance::update_mean(&mut leaf_q, &node_k, norm, jc); + Distance::init(&mut leaf_q); + jc += 1.0; + } + } + + let leaf_p = new_leaf(leaf_p.vector.iter().collect()); + let leaf_q = new_leaf(leaf_q.vector.iter().collect()); + Ok([leaf_p, leaf_q]) +} + +fn update_mean(mean: &mut Vec, new_node: impl Iterator, norm: f32, c: f32) { + let vec: Vec<_> = + mean.iter().zip(new_node).map(|(x, n)| (x * c + n / norm) / (c + 1.0)).collect(); + *mean = vec; +} diff --git a/src/lib.rs b/src/lib.rs index c9a1bb49..b4271f68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,8 +105,9 @@ pub mod internals { use rand::Rng; pub use crate::distance::{ - NodeHeaderAngular, NodeHeaderBinaryQuantizedEuclidean, NodeHeaderBinaryQuantizedManhattan, - NodeHeaderDotProduct, NodeHeaderEuclidean, NodeHeaderManhattan, + NodeHeaderAngular, NodeHeaderBinaryQuantizedAngular, NodeHeaderBinaryQuantizedEuclidean, + NodeHeaderBinaryQuantizedManhattan, NodeHeaderDotProduct, NodeHeaderEuclidean, + NodeHeaderManhattan, }; pub use crate::key::KeyCodec; pub use crate::node::{Leaf, NodeCodec}; @@ -136,8 +137,8 @@ pub mod internals { /// The set of distances implementing the [`Distance`] and supported by arroy. pub mod distances { pub use crate::distance::{ - Angular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, DotProduct, Euclidean, - Manhattan, + Angular, BinaryQuantizedAngular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, + DotProduct, Euclidean, Manhattan, }; } diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index b99c9998..a8eb607d 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -77,7 +77,12 @@ impl Iterator for BinaryQuantizedIterator<'_> { self.current_element >>= 1; self.current_iteration += 1; - Some(bit as f32) + if bit == 0 { + Some(-1.0) + } else { + Some(1.0) + } + // Some(bit as f32) } fn size_hint(&self) -> (usize, Option) { From 732767a590e8c7142052aa2348c84d7418dc08ac Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 6 Aug 2024 10:28:50 +0200 Subject: [PATCH 22/61] Make two_means return non binary quantized distances --- src/distance/mod.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 8871e0a4..7191d60b 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -182,7 +182,7 @@ pub fn two_means_binary_quantized( rng: &mut R, leafs: &ImmutableSubsetLeafs, cosine: bool, -) -> heed::Result<[Leaf<'static, D>; 2]> { +) -> heed::Result<[Leaf<'static, NonBqDist>; 2]> { // This algorithm is a huge heuristic. Empirically it works really well, but I // can't motivate it well. The basic idea is to keep two centroids and assign // points to either one of them. We weight each centroid by the number of points @@ -214,25 +214,15 @@ pub fn two_means_binary_quantized( continue; } if di < dj { - // update_mean(&mut leaf_p, node_k.vector.iter(), norm, ic); Distance::update_mean(&mut leaf_p, &node_k, norm, ic); Distance::init(&mut leaf_p); ic += 1.0; } else if dj < di { - // update_mean(&mut leaf_q, node_k.vector.iter(), norm, jc); Distance::update_mean(&mut leaf_q, &node_k, norm, jc); Distance::init(&mut leaf_q); jc += 1.0; } } - let leaf_p = new_leaf(leaf_p.vector.iter().collect()); - let leaf_q = new_leaf(leaf_q.vector.iter().collect()); Ok([leaf_p, leaf_q]) } - -fn update_mean(mean: &mut Vec, new_node: impl Iterator, norm: f32, c: f32) { - let vec: Vec<_> = - mean.iter().zip(new_node).map(|(x, n)| (x * c + n / norm) / (c + 1.0)).collect(); - *mean = vec; -} From 54626c124a1776cbcdba779b267b584ac33e9db9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Aug 2024 16:25:24 +0200 Subject: [PATCH 23/61] add the angular distance --- src/distance/binary_quantized_angular.rs | 141 +++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/distance/binary_quantized_angular.rs diff --git a/src/distance/binary_quantized_angular.rs b/src/distance/binary_quantized_angular.rs new file mode 100644 index 00000000..a7b1ab30 --- /dev/null +++ b/src/distance/binary_quantized_angular.rs @@ -0,0 +1,141 @@ +use std::borrow::Cow; + +use bytemuck::{Pod, Zeroable}; +use rand::Rng; + +use super::{two_means_binary_quantized as two_means, Angular, NodeHeaderAngular}; +use crate::distance::Distance; +use crate::node::Leaf; +use crate::parallel::ImmutableSubsetLeafs; +use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; + +/// The Cosine similarity is a measure of similarity between two +/// non-zero vectors defined in an inner product space. Cosine similarity +/// is the cosine of the angle between the vectors. +#[derive(Debug, Clone)] +pub enum BinaryQuantizedAngular {} + +/// The header of BinaryQuantizedAngular leaf nodes. +#[repr(C)] +#[derive(Pod, Zeroable, Debug, Clone, Copy)] +pub struct NodeHeaderBinaryQuantizedAngular { + norm: f32, +} + +impl Distance for BinaryQuantizedAngular { + type Header = NodeHeaderBinaryQuantizedAngular; + type VectorCodec = unaligned_vector::BinaryQuantized; + + fn name() -> &'static str { + "binary quantized angular" + } + + fn new_header(vector: &UnalignedVector) -> Self::Header { + NodeHeaderBinaryQuantizedAngular { norm: Self::norm_no_header(vector) } + } + + fn built_distance(p: &Leaf, q: &Leaf) -> f32 { + let pn = p.header.norm; + let qn = q.header.norm; + let pq = dot_product(&p.vector, &q.vector); + let pnqn = pn * qn; + if pnqn != 0.0 { + let cos = pq / pnqn; + // cos is [-1; 1] + // cos = 0. -> 0.5 + // cos = -1. -> 1.0 + // cos = 1. -> 0.0 + (1.0 - cos) / 2.0 + } else { + 0.0 + } + } + + /// Normalizes the distance returned by the distance method. + fn normalized_distance(d: f32, _dimensions: usize) -> f32 { + d + } + + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } + + fn init(node: &mut Leaf) { + node.header.norm = dot_product(&node.vector, &node.vector).sqrt(); + } + + fn create_split<'a, R: Rng>( + children: &'a ImmutableSubsetLeafs, + rng: &mut R, + ) -> heed::Result>> { + let [node_p, node_q] = two_means::(rng, children, true)?; + let vector: Vec = + node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect(); + let unaligned_vector = UnalignedVector::from_vec(vector); + let mut normal = Leaf { + header: NodeHeaderBinaryQuantizedAngular { norm: 0.0 }, + vector: unaligned_vector, + }; + Self::normalize(&mut normal); + + Ok(normal.vector) + } + + fn margin_no_header( + p: &UnalignedVector, + q: &UnalignedVector, + ) -> f32 { + dot_product(p, q) + } +} + +fn bits(mut word: u8) -> [f32; 8] { + let mut ret = [0.0; 8]; + for i in 0..8 { + let bit = word & 1; + word >>= 1; + if bit == 0 { + ret[i] = -1.0; + // ret[i] = 0.0; + } else { + ret[i] = 1.0; + } + } + + ret +} + +fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. + // This may or may not impact relevancy since the 1s will be added to every vector. + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| u * v) + }) + .sum::() +} + +fn squared_euclidean_distance( + u: &UnalignedVector, + v: &UnalignedVector, +) -> f32 { + // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. + // This may or may not impact relevancy since the 1s will be added to every vector. + // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 + + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .flat_map(|(u, v)| { + let u = bits(*u); + let v = bits(*v); + u.into_iter().zip(v).map(|(u, v)| (u - v) * (u - v)) + }) + .sum::() +} From 6290296cde1056fb2c37ce1c82534072ab8d83c8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 4 Sep 2024 17:07:25 +0200 Subject: [PATCH 24/61] implement the oversampling --- examples/compare_with_hnsw.rs | 3 ++- examples/relevancy.rs | 2 +- examples/search_movies.rs | 4 ++-- src/distance/binary_quantized_angular.rs | 4 +++- src/distance/binary_quantized_euclidean.rs | 2 ++ src/distance/binary_quantized_manhattan.rs | 2 ++ src/distance/mod.rs | 2 ++ src/reader.rs | 13 +++++++++++-- src/tests/reader.rs | 22 ++++++++++++---------- src/tests/writer.rs | 4 ++-- 10 files changed, 39 insertions(+), 19 deletions(-) diff --git a/examples/compare_with_hnsw.rs b/examples/compare_with_hnsw.rs index b792cacf..b97ccbe8 100644 --- a/examples/compare_with_hnsw.rs +++ b/examples/compare_with_hnsw.rs @@ -45,7 +45,8 @@ fn main() -> Result<()> { let search_k = if is_precise { NonZeroUsize::new(NUMBER_FETCHED * reader.n_trees() * 20) } else { None }; - let arroy_results = reader.nns_by_item(&rtxn, 0, NUMBER_FETCHED, search_k, None)?.unwrap(); + let arroy_results = + reader.nns_by_item(&rtxn, 0, NUMBER_FETCHED, search_k, None, None)?.unwrap(); eprintln!("took {:.02?} to find into arroy", before.elapsed()); let first = Point(reader.item_vector(&rtxn, 0)?.unwrap()); diff --git a/examples/relevancy.rs b/examples/relevancy.rs index cc464d4c..294791e1 100644 --- a/examples/relevancy.rs +++ b/examples/relevancy.rs @@ -126,7 +126,7 @@ fn measure_distance( ); let mut arroy = reader - .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None) + .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None, None) .unwrap() .unwrap(); arroy.truncate(number_fetched); diff --git a/examples/search_movies.rs b/examples/search_movies.rs index 6b43ec9d..cc817836 100644 --- a/examples/search_movies.rs +++ b/examples/search_movies.rs @@ -27,7 +27,7 @@ fn main() { #[rustfmt::skip] let query: Vec = vec![-0.016822422, -0.021621706, 0.00019239318, 0.054372, 0.034900583, -0.011089119, 0.042128928, 0.02026509, 0.0019038923, -0.0014809829, -0.033832666, -0.029640282, -0.054234847, 0.018665258, -0.055895746, 0.0753006, 0.0061977296, 0.032228395, 0.022795584, -0.058140032, 0.026109613, -0.029421866, 0.04866331, 0.020661665, 0.017602501, 0.020653117, 0.046483666, 0.042843442, -0.045545023, -0.0017392042, 0.012052985, -0.0058927303, 0.032480225, 0.009872672, 0.024758337, -0.013354463, -0.044432696, -0.03226193, -0.059227727, 0.0078192735, 0.013650394, 0.0031477972, 0.005877705, 0.0068786396, 0.002517114, -0.011458909, 0.008640344, 0.044904687, -0.0047290456, -0.012748338, -0.048921518, 0.07827129, 0.005205742, -0.021857478, -0.02370976, 0.041743826, -0.016076453, -0.011403813, -0.025544455, -0.0046601044, -0.021723151, 0.007303265, -0.0136509575, 0.0073000537, -0.005085544, 0.04384241, -0.018964743, 0.03818674, -0.09198379, -0.032043297, -0.0067259674, 0.019887544, 0.005341308, 0.0050093965, 0.054900724, -0.020799952, 0.020495495, 0.01472667, 0.019677797, 0.037550557, -0.010920308, 0.03371257, 0.0020930816, 0.03709999, -0.036922902, -0.049608115, 0.0154750785, 0.007696657, -0.058294553, 0.045302838, -0.023393214, -0.060448237, -0.005798211, 0.053323198, 0.04070376, -0.0028753958, 0.051668108, -0.0069777397, 0.033418525, 0.016234992, -0.033323497, -0.0074829464, -0.008664235, -0.05547656, -0.11400871, -0.03518515, -0.0056998464, 0.01812429, -0.031799175, -0.0073341345, -0.06147767, -0.003742939, -0.004249079, -0.013904026, -0.00065635156, 0.09179383, 0.004267396, 0.00015509031, -0.049766053, 0.029881846, 0.10485467, -0.03120661, 0.014043553, 0.08671136, 0.059179407, 0.029454986, -0.0122302845, 0.06451508, 0.021481989, -0.06484224, 0.018707344, 0.022344032, -0.004790084, -0.04705671, 0.016396629, -0.03346155, 0.0064264126, -0.0053360737, 0.06672058, -0.0078784805, -0.016174054, 0.026566355, -0.046398066, 0.0025418145, 0.046905387, -0.020884424, -0.051193744, -0.031737294, -0.009927951, 0.023741305, -0.058117628, 0.051733956, -0.025581324, -0.030992776, 0.008804903, 0.04388304, 0.013213721, 0.004467152, -0.04988626, 0.0069321035, 0.039403677, 0.019677948, -0.066907056, 0.018572355, 0.013511877, -0.010518738, 0.010099771, -0.003633823, -0.0631501, -0.025649378, -0.043461364, 0.0016490245, 0.064196914, 0.033599235, -0.013222726, 0.015318823, 0.0771801, -0.0070276, -0.031138066, 0.0055310773, -0.09972089, 0.05066132, 0.047467627, -0.03498512, -0.03416252, -0.018362196, 0.040274452, -0.031371195, 0.030042851, 0.016328678, -0.05765591, -0.048823263, 0.054553114, -0.02033182, 0.046627544, 0.016558101, -0.0033715998, 0.0006232865, -0.0065704435, 0.008104579, 0.016307961, -0.041840676, 0.048135996, -0.018808063, -0.036892023, -0.0450471, 0.02718623, -0.036660295, -0.022694368, 0.005702901, -0.022678563, 0.0013453028, 0.07429447, -0.034700394, -0.032727163, 0.00596015, 0.034842487, -0.027818438, -0.00051779655, -0.014468772, 0.033954486, 0.04148899, -0.0829876, -0.015300944, 0.015376903, 0.09567573, 0.036652327, -0.049033575, -0.04484115, 0.041701544, -0.057027884, 0.0069984253, -0.0053272387, 0.025826871, 0.002177651, -0.030157669, 0.007895542, -0.014717798, 0.054724272, -0.05034077, -0.016694192, 0.038352106, -0.060709346, 0.08236629, -0.0096279215, 0.014632059, 0.025158316, -0.0009260515, -0.043707818, -0.01941624, -0.0118600605, -0.035666965, 0.037794825, 0.014687504, 0.038666032, -0.075831376, -0.038647566, -0.048394937, 0.031239703, 0.029136332, -0.00076040986, -0.015906896, 0.03718925, -0.0140040675, -0.037951406, -0.041062936, -0.06529122, 0.011906159, -0.011368897, 0.0060307034, 0.03684682, 0.031995844, -0.033985753, -0.018714348, -0.012443444, -0.007389346, 0.03257332, -0.04580996, -0.026579294, -0.024290696, -0.025647637, 0.022456668, -0.02420987, -0.065709755, -0.02623659, -0.028259972, 0.019707581, -0.022819564, -0.0409341, 0.026851093, 0.031858675, 0.048687093, -0.013439109, 0.011736404, 0.016420575, 0.03451187, -0.0059358296, 0.015338021, 0.04402986, 0.033739056, 0.033959225, 0.0068245684, -0.0143376645, -0.0007635987, -0.01949658, 0.016379116, 0.018640755, -0.06126936, -0.22691156, 0.015514225, -0.0010716971, 0.0044359663, 0.03258783, -0.0018310734, 0.010761778, -0.033404265, 0.005418415, 0.028870588, -0.0366465, 0.025508897, -0.003327967, -0.025249101, 0.041501254, -0.06906739, -0.03184493, -0.041302733, -0.03037772, 0.015740091, 0.008446552, -0.0459613, -0.022405358, -0.0036640046, 0.017842831, 0.003960712, -0.025942408, -0.038227286, -0.045894515, -0.01752483, 0.017444108, -0.051017836, 0.029609472, 0.008688325, 0.020816054, 0.008120903, 0.03892946, -0.033378396, 0.02176841, 0.027685048, -0.012064678, -0.079198286, -0.04271553, 0.005021753, 0.066962436, -0.03443632, -0.004004281, -0.050009515, -0.01630804, 0.06379373, 0.055116866, 0.027930314, 0.043325268, 0.02733439, -0.015951807, 0.059688378, -0.0075212875, 0.03786285, -0.04638327, -0.043671872, 0.043587692, 0.011264745, -0.059823193, 0.008415408, -0.040225852, -0.05263509, -0.0038932117, -0.047234535, 0.05749084, 0.029582193, -0.012869698, 0.027698075, -0.014221754, -0.05440618, 0.007839065, -0.028753158, -0.029088387, -0.00039888048, 0.012631819, 0.0038486738, -0.059913363, -0.0034661351, 0.011339918, 0.005836589, -0.018044928, -0.035229705, 0.0015524679, -0.035521194, -0.028409205, 0.0004174717, 0.060292065, -0.009710763, -0.04719587, 0.034226153, 0.04258676, 0.03754591, 0.056335006, -0.012293127, 0.03885916, -0.011872468, 0.026709288, -0.030494772, -0.0027441443, 0.01256448, 0.0070703924, 0.011282641, -0.03820788, -0.029001744, 0.0024300558, -0.0032799696, 0.037857816, 0.001686728, 0.056249045, -0.01862739, 0.04376537, -0.0019654054, 0.050269835, 0.035223164, 0.0059567657, 0.013870472, -0.001804614, 0.027300585, -0.03382222, -0.041098855, -0.060636565, 0.0047175046, 0.029142305, 0.06523361, 0.028681634, -0.023454288, -0.018000197, -0.030090509, -0.0046562785, -0.04519735, 0.047884777, -0.00059952086, -0.03280122, -0.08012555, 0.008639195, 0.01629006, 0.032155965, 0.034481294, 0.021274198, 0.010470909, 0.022913199, -0.035904404, 0.041294016, -0.00987633, -0.021613108, 0.012205929, 0.005322071, 0.025864823, 0.08942025, -0.08067831, -0.014871667, -0.034839284, 0.028048998, -0.0063091223, 0.037978478, -0.055790387, 0.0045954804, -0.042958327, 0.02137769, -0.008589233, -0.00062141696, 0.052822173, 0.034125473, -0.015106767, 0.0030919765, -0.0072712647, 0.0056287237, 0.019516133, -0.031278323, 0.025771588, 0.01701546, 0.019516064, 0.016180338, 0.01349268, 0.011978184, 0.011838524, -0.0050102035, -0.06970658, 0.022854539, -0.004192521, 0.0577575, -0.003954721, -0.054374386, -0.027609108, 0.0134023735, 0.010305641, -0.011130317, 0.052328475, 0.014928648, -0.013976018, -0.07100651, -0.06789901, -0.031873316, -0.011598853, 0.029284442, -0.04940027, 0.0100974385, -0.02187546, -0.062819175, 0.0069366414, 0.052176703, -0.06834835, 0.013463273, -0.0013379813, 0.005786334, 0.017775143, -0.01291353, -0.016923305, -0.049682386, -0.034103107, 0.010883184, -0.055132758, 0.025268175, -0.025599582, 0.015927013, -0.03237898, -0.027073668, -0.034358867, -0.027672807, 0.022677394, -0.03531693, 0.010573503, 0.00032215187, 0.0066956943, -0.051510572, -0.029456092, 0.05758612, -0.038166363, -0.00999853, 0.05758596, -0.006796505, 0.028503977, -0.024184246, 0.054051045, 0.0040905816, 0.099899694, 0.06076009, 0.011753628, -0.03253187, -0.0035343366, 0.02351163, 0.03206495, 0.004892613, -0.04530409, -0.0056237346, -0.006101407, 0.019704496, -0.010228795, 0.027814431, 0.020409154, 0.033115197, -0.07446951, -0.042142425, 0.03928483, -0.022784598, -0.003539396, -0.0074683367, 0.043651864, 0.007761874, 0.022063423, 0.05344986, 0.05065469, 0.029476669, -0.028968832, 0.023550583, -0.022291148, 0.055309687, -0.053843252, 0.020895477, -0.0148687605, 0.012166838, 0.0033556349, -0.07022937, -0.059401378, 0.013194393, -0.0419862, -0.0070434613, 0.030479655, -0.053955454, -0.031870224, 0.034511264, -0.047943473, 0.0069080396, 0.026099209, -0.012516935, 0.0003174421, -0.006716995, 0.07027558, 0.038463045, -0.016081728, 0.05018074, -0.062176052, 0.08961092, 0.03679902, 0.011107996, -0.0032339245, -0.0118898135, 0.013669906, 0.056221563, -0.049234938, 0.003090264, 0.01062722, -0.008937757, -0.08464787, -0.032616463, 0.055935893, 0.006192905, -0.014768529, 0.04930304, 0.053852808, -0.036349185, -0.037947245, 0.0076732435, -0.040889677, 0.022189876, 0.015142795, 0.005928425, -0.009679575, 0.039194115, 0.0041091475, 0.035120673, 0.016776932, -0.04100678, 0.041131947, 0.040904496, 0.047341976, 0.029321635, 0.030489001, -0.0135518275, 0.038717188, 0.0017859036, -0.044703316, -0.007397534, 0.029149175, -0.00021891313, 0.019795585, -0.054424375, 0.010228703, -0.0057461066, 0.05096695, 0.05683213, -0.018136851, -0.0030009004, -0.033427265, 0.010878728, 0.050759643, 0.040795874, 0.019920254, -0.026135486, -0.07642272, 0.035290312, 0.004655317, 0.0043676766, -0.010411962, -0.0076723946, 0.015248613, 0.008905208, -0.0002423048, 0.03892336, -0.025703456, -0.021123456, -0.00066909986, 0.04459856, 0.052217484, -0.017885901, -0.015303531, 0.0057848957, -0.036129624, -0.0736907, 0.035401847, -0.025658514, -0.0082354145, -0.0012491915, -0.040769547, -0.039205503, 0.05530217, -0.014954734, 0.0056790086, -0.04454665, -0.028425619, -0.034654, -0.057087515, -0.0224583, -0.005496095, -0.009889468, -0.05025576, -0.009459795, -0.00871503, 0.021968294, 0.0074964114, -0.032455806, -0.005696087, 0.005180231, 0.056079067, -0.03189999, 0.045113377, 0.061360348, 0.01839327, -0.053088665, 0.04942768, 0.014662789, 0.06641078, -0.008998172, -0.009717696, -0.079248, 0.047506567, 0.04778238, 0.025009798, -0.03899872, 0.009850679, -0.04399064, -0.053494785, 0.055456433, 0.026770461, -0.011158729, -0.073486604, -0.04088162, -0.023263954, -0.022185653, 0.03401001, -0.034742568, 0.043794204, 0.004035502, 0.011585448, -0.009235968, 0.031503983, 0.016500674, -0.012498497, -0.05733327, 0.0024852154, -0.02377962, -0.072548844, -0.008489325, 0.01825339, 0.032909963, -0.023669574, 0.0022601841, -0.008336443, 0.0041536367, 0.007989558, -0.035507284, -0.03951105, 0.0069870483, 0.04283141, -0.05102877, -0.025309727, 0.052937508, -0.014378752, -0.012047669, -0.024964543, -0.00071902486, 0.009493713, 0.024152702, 0.022622166, 0.06481285, 0.0022744837]; let now = Instant::now(); - let _ret = reader.nns_by_vector(&rtxn, &query, 20, None, None).unwrap(); + let _ret = reader.nns_by_vector(&rtxn, &query, 20, None, None, None).unwrap(); println!("Louis's query took {:?}", now.elapsed()); let mut durations = Vec::new(); @@ -37,7 +37,7 @@ fn main() { println!("Starts querying all documents ..."); for (id, _) in vectors.into_iter() { let now = Instant::now(); - reader.nns_by_item(&rtxn, id, 20, None, None).unwrap().unwrap(); + reader.nns_by_item(&rtxn, id, 20, None, None, None).unwrap().unwrap(); // reader.nns_by_item(&rtxn, id, 20, None, Some(&filter)).unwrap().unwrap(); durations.push(now.elapsed()); } diff --git a/src/distance/binary_quantized_angular.rs b/src/distance/binary_quantized_angular.rs index a7b1ab30..633b7b1e 100644 --- a/src/distance/binary_quantized_angular.rs +++ b/src/distance/binary_quantized_angular.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use bytemuck::{Pod, Zeroable}; use rand::Rng; -use super::{two_means_binary_quantized as two_means, Angular, NodeHeaderAngular}; +use super::{two_means_binary_quantized as two_means, Angular}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; @@ -23,6 +23,8 @@ pub struct NodeHeaderBinaryQuantizedAngular { } impl Distance for BinaryQuantizedAngular { + const DEFAULT_OVERSAMPLING: usize = 3; + type Header = NodeHeaderBinaryQuantizedAngular; type VectorCodec = unaligned_vector::BinaryQuantized; diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 724fb0ed..2a72e9fa 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -25,6 +25,8 @@ pub struct NodeHeaderBinaryQuantizedEuclidean { } impl Distance for BinaryQuantizedEuclidean { + const DEFAULT_OVERSAMPLING: usize = 3; + type Header = NodeHeaderBinaryQuantizedEuclidean; type VectorCodec = unaligned_vector::BinaryQuantized; diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs index 3a724f10..1808d346 100644 --- a/src/distance/binary_quantized_manhattan.rs +++ b/src/distance/binary_quantized_manhattan.rs @@ -24,6 +24,8 @@ pub struct NodeHeaderBinaryQuantizedManhattan { } impl Distance for BinaryQuantizedManhattan { + const DEFAULT_OVERSAMPLING: usize = 3; + type Header = NodeHeaderBinaryQuantizedManhattan; type VectorCodec = unaligned_vector::BinaryQuantized; diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 7191d60b..30a10ddd 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -39,6 +39,8 @@ fn new_leaf(vec: Vec) -> Leaf<'static, D> { /// compute the split planes, and normalize user vectors. #[allow(missing_docs)] pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { + const DEFAULT_OVERSAMPLING: usize = 1; + /// A header structure with informations related to the type Header: Pod + Zeroable + fmt::Debug; type VectorCodec: UnalignedVectorCodec; diff --git a/src/reader.rs b/src/reader.rs index f9e2ba05..48c113e5 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -173,10 +173,13 @@ impl<'t, D: Distance> Reader<'t, D> { item: ItemId, count: usize, search_k: Option, + oversampling: Option, candidates: Option<&RoaringBitmap>, ) -> Result>> { match item_leaf(self.database, self.index, rtxn, item)? { - Some(leaf) => self.nns_by_leaf(rtxn, &leaf, count, search_k, candidates).map(Some), + Some(leaf) => { + self.nns_by_leaf(rtxn, &leaf, count, search_k, oversampling, candidates).map(Some) + } None => Ok(None), } } @@ -190,6 +193,7 @@ impl<'t, D: Distance> Reader<'t, D> { vector: &[f32], count: usize, search_k: Option, + oversampling: Option, candidates: Option<&RoaringBitmap>, ) -> Result> { if vector.len() != self.dimensions { @@ -201,7 +205,7 @@ impl<'t, D: Distance> Reader<'t, D> { let vector = UnalignedVector::from_slice(vector); let leaf = Leaf { header: D::new_header(&vector), vector }; - self.nns_by_leaf(rtxn, &leaf, count, search_k, candidates) + self.nns_by_leaf(rtxn, &leaf, count, search_k, oversampling, candidates) } fn nns_by_leaf( @@ -210,6 +214,7 @@ impl<'t, D: Distance> Reader<'t, D> { query_leaf: &Leaf, count: usize, search_k: Option, + oversampling: Option, candidates: Option<&RoaringBitmap>, ) -> Result> { if self.items.is_empty() { @@ -220,6 +225,10 @@ impl<'t, D: Distance> Reader<'t, D> { let mut queue = BinaryHeap::with_capacity(self.roots.len() + self.items.len().ilog2() as usize); let search_k = search_k.map_or(count * self.roots.len(), NonZeroUsize::get); + let search_k = oversampling + .map_or(search_k.saturating_mul(D::DEFAULT_OVERSAMPLING), |oversampling| { + search_k.saturating_mul(oversampling.get()) + }); // Insert all the root nodes and associate them to the highest distance. queue.extend(repeat(OrderedFloat(f32::INFINITY)).zip(self.roots.iter().map(NodeId::tree))); diff --git a/src/tests/reader.rs b/src/tests/reader.rs index 84adec3a..e0d7d94a 100644 --- a/src/tests/reader.rs +++ b/src/tests/reader.rs @@ -49,7 +49,7 @@ fn open_db_with_wrong_dimension() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::::open(&rtxn, 0, handle.database).unwrap(); - let ret = reader.nns_by_vector(&rtxn, &[1.0, 2.0, 3.0], 5, None, None).unwrap_err(); + let ret = reader.nns_by_vector(&rtxn, &[1.0, 2.0, 3.0], 5, None, None, None).unwrap_err(); insta::assert_snapshot!(ret, @"Invalid vector dimensions. Got 3 but expected 2"); } @@ -88,7 +88,7 @@ fn search_in_db_with_a_single_vector() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::::open(&rtxn, 0, handle.database).unwrap(); - let ret = reader.nns_by_item(&rtxn, 0, 1, None, None).unwrap(); + let ret = reader.nns_by_item(&rtxn, 0, 1, None, None, None).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(0): distance(0) "###); @@ -112,14 +112,14 @@ fn two_dimension_on_a_line() { let reader = Reader::::open(&rtxn, 0, handle.database).unwrap(); // if we can't look into enough nodes we find some random points - let ret = reader.nns_by_item(&rtxn, 0, 5, NonZeroUsize::new(1), None).unwrap(); + let ret = reader.nns_by_item(&rtxn, 0, 5, NonZeroUsize::new(1), None, None).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(48): distance(48) id(92): distance(92) "###); // if we can look into all the node there is no inifinite loop and it works - let ret = reader.nns_by_item(&rtxn, 0, 5, NonZeroUsize::new(usize::MAX), None).unwrap(); + let ret = reader.nns_by_item(&rtxn, 0, 5, NonZeroUsize::new(usize::MAX), None, None).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(0): distance(0) id(1): distance(1) @@ -128,7 +128,7 @@ fn two_dimension_on_a_line() { id(4): distance(4) "###); - let ret = reader.nns_by_item(&rtxn, 0, 5, None, None).unwrap(); + let ret = reader.nns_by_item(&rtxn, 0, 5, None, None, None).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(1): distance(1) id(2): distance(2) @@ -158,7 +158,7 @@ fn two_dimension_on_a_column() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::::open(&rtxn, 0, handle.database).unwrap(); - let ret = reader.nns_by_item(&rtxn, 0, 5, None, None).unwrap(); + let ret = reader.nns_by_item(&rtxn, 0, 5, None, None, None).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(1): distance(1) @@ -207,14 +207,16 @@ fn filtering() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::::open(&rtxn, 0, handle.database).unwrap(); - let ret = reader.nns_by_item(&rtxn, 0, 5, None, Some(&RoaringBitmap::from_iter(0..2))).unwrap(); + let ret = + reader.nns_by_item(&rtxn, 0, 5, None, None, Some(&RoaringBitmap::from_iter(0..2))).unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(0): distance(0) id(1): distance(1) "###); - let ret = - reader.nns_by_item(&rtxn, 0, 5, None, Some(&RoaringBitmap::from_iter(98..1000))).unwrap(); + let ret = reader + .nns_by_item(&rtxn, 0, 5, None, None, Some(&RoaringBitmap::from_iter(98..1000))) + .unwrap(); insta::assert_snapshot!(NnsRes(ret), @r###" id(98): distance(98) id(99): distance(99) @@ -233,7 +235,7 @@ fn search_in_empty_database() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::open(&rtxn, 0, handle.database).unwrap(); - let ret = reader.nns_by_vector(&rtxn, &[0., 0.], 10, None, None).unwrap(); + let ret = reader.nns_by_vector(&rtxn, &[0., 0.], 10, None, None, None).unwrap(); insta::assert_debug_snapshot!(ret, @"[]"); } diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 683ca2b2..6a9d6d17 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -402,7 +402,7 @@ fn delete_document_in_an_empty_index_74() { writer2.build(&mut wtxn, &mut rng, None).unwrap(); let reader = Reader::open(&wtxn, 1, handle.database).unwrap(); - let ret = reader.nns_by_vector(&wtxn, &[0., 0.], 10, None, None).unwrap(); + let ret = reader.nns_by_vector(&wtxn, &[0., 0.], 10, None, None, None).unwrap(); insta::assert_debug_snapshot!(ret, @"[]"); wtxn.commit().unwrap(); @@ -418,7 +418,7 @@ fn delete_document_in_an_empty_index_74() { let rtxn = handle.env.read_txn().unwrap(); let reader = Reader::open(&rtxn, 1, handle.database).unwrap(); - let ret = reader.nns_by_vector(&rtxn, &[0., 0.], 10, None, None).unwrap(); + let ret = reader.nns_by_vector(&rtxn, &[0., 0.], 10, None, None, None).unwrap(); insta::assert_debug_snapshot!(ret, @"[]"); } From d812bd8c1e2a667991a4862fdb338c0b37b8c54a Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 5 Sep 2024 18:12:48 +0200 Subject: [PATCH 25/61] make the binary quantized distance quick again --- src/distance/binary_quantized_angular.rs | 64 ++---------------- src/distance/binary_quantized_euclidean.rs | 76 ++++++++-------------- src/distance/binary_quantized_manhattan.rs | 72 +++++++------------- src/spaces/simple.rs | 44 ++++++++++++- 4 files changed, 102 insertions(+), 154 deletions(-) diff --git a/src/distance/binary_quantized_angular.rs b/src/distance/binary_quantized_angular.rs index 633b7b1e..dc6a7de4 100644 --- a/src/distance/binary_quantized_angular.rs +++ b/src/distance/binary_quantized_angular.rs @@ -7,7 +7,8 @@ use super::{two_means_binary_quantized as two_means, Angular}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; -use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; +use crate::spaces::simple::dot_product_binary_quantized; +use crate::unaligned_vector::{BinaryQuantized, UnalignedVector}; /// The Cosine similarity is a measure of similarity between two /// non-zero vectors defined in an inner product space. Cosine similarity @@ -26,7 +27,7 @@ impl Distance for BinaryQuantizedAngular { const DEFAULT_OVERSAMPLING: usize = 3; type Header = NodeHeaderBinaryQuantizedAngular; - type VectorCodec = unaligned_vector::BinaryQuantized; + type VectorCodec = BinaryQuantized; fn name() -> &'static str { "binary quantized angular" @@ -39,7 +40,7 @@ impl Distance for BinaryQuantizedAngular { fn built_distance(p: &Leaf, q: &Leaf) -> f32 { let pn = p.header.norm; let qn = q.header.norm; - let pq = dot_product(&p.vector, &q.vector); + let pq = dot_product_binary_quantized(&p.vector, &q.vector); let pnqn = pn * qn; if pnqn != 0.0 { let cos = pq / pnqn; @@ -59,11 +60,11 @@ impl Distance for BinaryQuantizedAngular { } fn norm_no_header(v: &UnalignedVector) -> f32 { - dot_product(v, v).sqrt() + dot_product_binary_quantized(v, v).sqrt() } fn init(node: &mut Leaf) { - node.header.norm = dot_product(&node.vector, &node.vector).sqrt(); + node.header.norm = dot_product_binary_quantized(&node.vector, &node.vector).sqrt(); } fn create_split<'a, R: Rng>( @@ -87,57 +88,6 @@ impl Distance for BinaryQuantizedAngular { p: &UnalignedVector, q: &UnalignedVector, ) -> f32 { - dot_product(p, q) + dot_product_binary_quantized(p, q) } } - -fn bits(mut word: u8) -> [f32; 8] { - let mut ret = [0.0; 8]; - for i in 0..8 { - let bit = word & 1; - word >>= 1; - if bit == 0 { - ret[i] = -1.0; - // ret[i] = 0.0; - } else { - ret[i] = 1.0; - } - } - - ret -} - -fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { - // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. - // This may or may not impact relevancy since the 1s will be added to every vector. - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| u * v) - }) - .sum::() -} - -fn squared_euclidean_distance( - u: &UnalignedVector, - v: &UnalignedVector, -) -> f32 { - // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. - // This may or may not impact relevancy since the 1s will be added to every vector. - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| (u - v) * (u - v)) - }) - .sum::() -} diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 2a72e9fa..a022b855 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -7,6 +7,7 @@ use super::{two_means_binary_quantized as two_means, Euclidean}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; +use crate::spaces::simple::dot_product_binary_quantized; use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; /// The Euclidean distance between two points in Euclidean space @@ -39,7 +40,7 @@ impl Distance for BinaryQuantizedEuclidean { } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - squared_euclidean_distance(&p.vector, &q.vector) + squared_euclidean_distance_binary_quantized(&p.vector, &q.vector) } /// Normalizes the distance returned by the distance method. @@ -48,7 +49,7 @@ impl Distance for BinaryQuantizedEuclidean { } fn norm_no_header(v: &UnalignedVector) -> f32 { - dot_product(v, v).sqrt() + dot_product_binary_quantized(v, v).sqrt() } fn init(_node: &mut Leaf) {} @@ -70,63 +71,40 @@ impl Distance for BinaryQuantizedEuclidean { } fn margin(p: &Leaf, q: &Leaf) -> f32 { - p.header.bias + dot_product(&p.vector, &q.vector) + p.header.bias + dot_product_binary_quantized(&p.vector, &q.vector) } fn margin_no_header( p: &UnalignedVector, q: &UnalignedVector, ) -> f32 { - dot_product(p, q) + dot_product_binary_quantized(p, q) } } -fn bits(mut word: u8) -> [f32; 8] { - let mut ret = [0.0; 8]; - for i in 0..8 { - let bit = word & 1; - word >>= 1; - if bit == 0 { - ret[i] = -1.0; - } else { - ret[i] = 1.0; - } - } - - ret -} - -fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { - // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. - // This may or may not impact relevancy since the 1s will be added to every vector. - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| u * v) - }) - .sum::() -} - -fn squared_euclidean_distance( +/// For the binary quantized squared euclidean distance: +/// 1. We need to do the following operation: `(u - v)^2`, in our case the only allowed values are -1 and 1: +/// -1 - -1 = 0 | ^2 => 0 +/// -1 - 1 = -2 | ^2 => 4 +/// 1 - -1 = 2 | ^2 => 4 +/// 1 - 1 = 0 | ^2 => 0 +/// +/// If we replace the -1 by the binary quantized 0, and the 1 stays 1s: +/// 0 * 0 = 0 +/// 0 * 1 = 1 +/// 1 * 0 = 1 +/// 1 * 1 = 0 +/// +/// The result must be multiplicated by 4. But that can be done at the very end. +/// +/// 2. Then we need to do the sum of the results: +/// Since we cannot go into the negative, it's safe to hold everything in a `u32` and simply counts the 1s. +/// At the very end, before converting the value to a `f32` we can multiplies everything by 4. +fn squared_euclidean_distance_binary_quantized( u: &UnalignedVector, v: &UnalignedVector, ) -> f32 { - // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. - // This may or may not impact relevancy since the 1s will be added to every vector. - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| (u - v) * (u - v)) - }) - .sum::() + let ret = + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() * 4; + ret as f32 } diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs index 1808d346..6ccbe621 100644 --- a/src/distance/binary_quantized_manhattan.rs +++ b/src/distance/binary_quantized_manhattan.rs @@ -7,6 +7,7 @@ use super::{two_means_binary_quantized as two_means, Manhattan}; use crate::distance::Distance; use crate::node::Leaf; use crate::parallel::ImmutableSubsetLeafs; +use crate::spaces::simple::dot_product_binary_quantized; use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; /// A taxicab geometry or a Manhattan geometry is a geometry whose usual distance function @@ -38,7 +39,7 @@ impl Distance for BinaryQuantizedManhattan { } fn built_distance(p: &Leaf, q: &Leaf) -> f32 { - manhattan_distance(&p.vector, &q.vector) + manhattan_distance_binary_quantized(&p.vector, &q.vector) } /// Normalizes the distance returned by the distance method. @@ -47,7 +48,11 @@ impl Distance for BinaryQuantizedManhattan { } fn norm_no_header(v: &UnalignedVector) -> f32 { - let ones = v.as_bytes().iter().flat_map(|b| bits(*b)).sum::(); + let ones = v + .as_bytes() + .iter() + .map(|b| b.count_ones() as i32 - b.count_zeros() as i32) + .sum::() as f32; ones.sqrt() } @@ -70,61 +75,34 @@ impl Distance for BinaryQuantizedManhattan { } fn margin(p: &Leaf, q: &Leaf) -> f32 { - p.header.bias + dot_product(&p.vector, &q.vector) + p.header.bias + dot_product_binary_quantized(&p.vector, &q.vector) } fn margin_no_header( p: &UnalignedVector, q: &UnalignedVector, ) -> f32 { - dot_product(p, q) + dot_product_binary_quantized(p, q) } } -fn bits(mut word: u8) -> [f32; 8] { - let mut ret = [0.0; 8]; - for i in 0..8 { - let bit = word & 1; - word >>= 1; - if bit == 0 { - ret[i] = -1.0; - } else { - ret[i] = 1.0; - } - } - - ret -} - -fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { - // /!\ If the number of dimensions is not a multiple of the `Word` size, we'll xor 0 bits at the end, which will generate a lot of 1s. - // This may or may not impact relevancy since the 1s will be added to every vector. - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u | v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| u * v) - }) - .sum::() -} - -fn manhattan_distance( +/// For the binary quantized manhattan distance: +/// ```rust +/// p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() +/// ``` +/// 1. We need to subtract two scalars and take the absolute value: +/// -1 - -1 = 0 | abs => 0 +/// -1 - 1 = -2 | abs => 2 +/// 1 - -1 = 2 | abs => 2 +/// 1 - 1 = 0 | abs => 0 +/// +/// It's very similar to the euclidean distance. +/// => It's a xor, we counts the ones and multiplicate the result by 2 at the end. +fn manhattan_distance_binary_quantized( u: &UnalignedVector, v: &UnalignedVector, ) -> f32 { - // u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() as f32 - - u.as_bytes() - .iter() - .zip(v.as_bytes()) - .flat_map(|(u, v)| { - let u = bits(*u); - let v = bits(*v); - u.into_iter().zip(v).map(|(u, v)| (u - v).abs()) - }) - .sum::() + let ret = + u.as_bytes().iter().zip(v.as_bytes()).map(|(u, v)| (u ^ v).count_ones()).sum::() * 2; + ret as f32 } diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index f80d11a4..8df42cf4 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -4,7 +4,7 @@ use super::simple_avx::*; use super::simple_neon::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use super::simple_sse::*; -use crate::unaligned_vector::UnalignedVector; +use crate::unaligned_vector::{BinaryQuantized, UnalignedVector}; #[cfg(target_arch = "x86_64")] const MIN_DIM_SIZE_AVX: usize = 32; @@ -81,3 +81,45 @@ pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { u.iter().zip(v.iter()).map(|(a, b)| a * b).sum() } + +/// For the binary quantized dot product: +/// 1. We need to multiplicate two scalars, in our case the only allowed values are -1 and 1: +/// -1 * -1 = 1 +/// -1 * 1 = -1 +/// 1 * -1 = -1 +/// 1 * 1 = 1 +/// +/// This looks like a negative xor already, if we replace the -1 by the binary quantized 0, and the 1 stays 1s: +/// 0 * 0 = 1 +/// 0 * 1 = 0 +/// 1 * 0 = 0 +/// 1 * 1 = 1 +/// Is equivalent to `!(a ^ b)`. +/// +/// 2. Then we need to do the sum of the results: +/// 2.1 First we must do the sum of the operation on the `Word`s +/// /!\ We must be careful here because `1 - 0` actually translates to `1 - 1 = 0`. +/// `word.count_ones() - word.count_zeroes()` should do it: +/// 00 => -2 +/// 01 => 0 +/// 10 => 0 +/// 11 => 2 +/// /!\ We must also take care to use signed integer to be able to go into negatives +/// +/// 2.2 Finally we must sum the result of all the words +/// - By taking care of not overflowing: The biggest vectors contains like 5000 dimensions, a i16 could be enough. A i32 should be perfect. +/// - We can do the sum straight away without any more tricks +/// - We can cast the result to an f32 as expected +pub fn dot_product_binary_quantized( + u: &UnalignedVector, + v: &UnalignedVector, +) -> f32 { + u.as_bytes() + .iter() + .zip(v.as_bytes()) + .map(|(u, v)| { + let ret = !(u ^ v); + ret.count_ones() as i32 - ret.count_zeros() as i32 + }) + .sum::() as f32 +} From f9e2b630376f270971adf9bd8aaf48ae03c3cae8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 9 Sep 2024 15:55:51 +0200 Subject: [PATCH 26/61] provide a specialized method to check if a vector contains only zeros --- src/reader.rs | 2 +- src/unaligned_vector/binary_quantized.rs | 4 ++++ src/unaligned_vector/f32.rs | 4 ++++ src/unaligned_vector/mod.rs | 8 ++++++++ src/writer.rs | 2 +- 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 48c113e5..0c01c96d 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -102,7 +102,7 @@ impl<'t, D: Distance> Reader<'t, D> { Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => { let left = recursive_depth(rtxn, database, index, left)?; let right = recursive_depth(rtxn, database, index, right)?; - let is_zero_normal = normal.iter().all(|f| f == 0.0) as usize; + let is_zero_normal = normal.is_zero() as usize; Ok(TreeStats { depth: 1 + left.depth.max(right.depth), diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index a8eb607d..87ca94ea 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -55,6 +55,10 @@ impl UnalignedVectorCodec for BinaryQuantized { fn len(vec: &UnalignedVector) -> usize { (vec.vector.len() / size_of::()) * QUANTIZED_WORD_SIZE } + + fn is_zero(vec: &UnalignedVector) -> bool { + vec.as_bytes().iter().all(|b| *b == 0) + } } pub struct BinaryQuantizedIterator<'a> { diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index 63bc6fd1..161245a4 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -43,4 +43,8 @@ impl UnalignedVectorCodec for f32 { fn len(vec: &UnalignedVector) -> usize { vec.vector.len() / size_of::() } + + fn is_zero(vec: &UnalignedVector) -> bool { + vec.iter().all(|v| v == 0.0) + } } diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index 4d052ebe..60965dd2 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -32,6 +32,9 @@ pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// Returns the len of the vector in terms of elements. fn len(vec: &UnalignedVector) -> usize; + + /// Returns true if all the elements in the vector are equal to 0. + fn is_zero(vec: &UnalignedVector) -> bool; } /// A wrapper struct that is used to read unaligned vectors directly from memory. @@ -75,6 +78,11 @@ impl UnalignedVector { Codec::iter(self) } + /// Returns true if all the elements in the vector are equal to 0. + pub fn is_zero(&self) -> bool { + Codec::is_zero(self) + } + /// Returns an allocated and aligned `Vec`. pub fn to_vec(&self) -> Vec { let iter = self.iter(); diff --git a/src/writer.rs b/src/writer.rs index dd7715e8..8083c926 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -555,7 +555,7 @@ impl Writer { let mut left_ids = RoaringBitmap::new(); let mut right_ids = RoaringBitmap::new(); - if normal.iter().all(|d| d == 0.0) { + if normal.is_zero() { randomly_split_children(rng, to_insert, &mut left_ids, &mut right_ids); } else { for leaf in to_insert { From 60875ebc0a110a2a8662c72d49af7b2a22284a36 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 9 Sep 2024 16:45:30 +0200 Subject: [PATCH 27/61] fix the euclidean distance --- src/distance/euclidean.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/distance/euclidean.rs b/src/distance/euclidean.rs index 1c0b2f54..ccf8ee4f 100644 --- a/src/distance/euclidean.rs +++ b/src/distance/euclidean.rs @@ -58,6 +58,7 @@ impl Distance for Euclidean { header: NodeHeaderEuclidean { bias: 0.0 }, vector: UnalignedVector::from_vec(vector), }; + Self::normalize(&mut normal); normal.header.bias = normal .vector From 8c541edbdd787d4db27a8a00d78b551fac798e06 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 9 Sep 2024 16:47:30 +0200 Subject: [PATCH 28/61] fix the tests --- src/distance/binary_quantized_manhattan.rs | 2 +- src/lib.rs | 2 +- src/tests/binary_quantized.rs | 6 +++--- src/tests/node.rs | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs index 6ccbe621..730c6559 100644 --- a/src/distance/binary_quantized_manhattan.rs +++ b/src/distance/binary_quantized_manhattan.rs @@ -87,7 +87,7 @@ impl Distance for BinaryQuantizedManhattan { } /// For the binary quantized manhattan distance: -/// ```rust +/// ```text /// p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() /// ``` /// 1. We need to subtract two scalars and take the absolute value: diff --git a/src/lib.rs b/src/lib.rs index b4271f68..2d4f89da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,7 +60,7 @@ //! //! // Similar searching can be achieved by requesting the nearest neighbors of a given item. //! let item_id = 0; -//! let arroy_results = reader.nns_by_item(&rtxn, item_id, n_results, search_k, None)?.unwrap(); +//! let arroy_results = reader.nns_by_item(&rtxn, item_id, n_results, search_k, None, None)?.unwrap(); //! # Ok(()) } //! ``` diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index 109ca28f..62450532 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -13,8 +13,8 @@ fn write_and_retrieve_binary_quantized_vector() { let vec = writer.item_vector(&wtxn, 0).unwrap().unwrap(); insta::assert_debug_snapshot!(vec, @r###" [ - 0.0, - 0.0, + -1.0, + -1.0, 1.0, 1.0, 1.0, @@ -27,7 +27,7 @@ fn write_and_retrieve_binary_quantized_vector() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, "0.0, ..."] }) + Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, 1.0000, 1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 5, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } "###); diff --git a/src/tests/node.rs b/src/tests/node.rs index 42d44998..1f7b014d 100644 --- a/src/tests/node.rs +++ b/src/tests/node.rs @@ -56,9 +56,9 @@ fn unaligned_binary_quantized_smol() { let deser: Vec<_> = unaligned.iter().collect(); assert_debug_snapshot!(deser[0..original.len()], @r###" [ - 0.0, + -1.0, 1.0, - 0.0, + -1.0, 1.0, 1.0, ] @@ -95,12 +95,12 @@ fn unaligned_binary_quantized_large() { let deser: Vec<_> = unaligned.to_vec(); assert_snapshot!(format!("{:?}", &deser[0..original.len()]), -@"[0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]"); +@"[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0]"); for (orig, deser) in original.iter().zip(&deser) { if orig.is_sign_positive() { assert_eq!(deser, &1.0); } else { - assert_eq!(deser, &0.0); + assert_eq!(deser, &-1.0); } } } From d75d550b4b563ae0675d10a635fe100b175a54ab Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 10 Sep 2024 11:43:55 +0200 Subject: [PATCH 29/61] [perf] use an hashmap instead of a roaring bitmap + a vec to store the immutable tree/leafs --- Cargo.toml | 1 + src/parallel.rs | 39 ++++++++++++--------------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4c5fed70..4695332c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ rayon = "1.10.0" roaring = "0.10.5" tempfile = "3.10.1" thiserror = "1.0.61" +nohash = "0.2.0" [dev-dependencies] anyhow = "1.0.86" diff --git a/src/parallel.rs b/src/parallel.rs index d9170967..969a849e 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -8,6 +8,7 @@ use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, RoTxn}; use memmap2::Mmap; +use nohash::IntMap; use rand::seq::index; use rand::Rng; use roaring::RoaringBitmap; @@ -180,9 +181,8 @@ impl ConcurrentNodeIds { /// in the mmapped file and the transaction is kept here and therefore /// no longer touches the database. pub struct ImmutableLeafs<'t, D> { - leaf_ids: RoaringBitmap, + leafs: IntMap, constant_length: Option, - offsets: Vec<*const u8>, _marker: marker::PhantomData<(&'t (), D)>, } @@ -190,9 +190,8 @@ impl<'t, D: Distance> ImmutableLeafs<'t, D> { /// Creates the structure by fetching all the leaf pointers /// and keeping the transaction making the pointers valid. pub fn new(rtxn: &'t RoTxn, database: Database, index: u16) -> heed::Result { - let mut leaf_ids = RoaringBitmap::new(); + let mut leafs = IntMap::default(); let mut constant_length = None; - let mut offsets = Vec::new(); let iter = database .remap_types::() @@ -203,11 +202,10 @@ impl<'t, D: Distance> ImmutableLeafs<'t, D> { let (key, bytes) = result?; let item_id = key.node.unwrap_item(); assert_eq!(*constant_length.get_or_insert(bytes.len()), bytes.len()); - assert!(leaf_ids.push(item_id)); - offsets.push(bytes.as_ptr()); + leafs.insert(item_id, bytes.as_ptr()); } - Ok(ImmutableLeafs { leaf_ids, constant_length, offsets, _marker: marker::PhantomData }) + Ok(ImmutableLeafs { leafs, constant_length, _marker: marker::PhantomData }) } /// Returns the leafs identified by the given ID. @@ -216,12 +214,7 @@ impl<'t, D: Distance> ImmutableLeafs<'t, D> { Some(len) => len, None => return Ok(None), }; - let ptr = match self - .leaf_ids - .rank(item_id) - .checked_sub(1) - .and_then(|offset| self.offsets.get(offset as usize)) - { + let ptr = match self.leafs.get(&item_id) { Some(ptr) => *ptr, None => return Ok(None), }; @@ -292,9 +285,7 @@ impl<'t, D: Distance> ImmutableSubsetLeafs<'t, D> { /// in the mmapped file and the transaction is kept here and therefore /// no longer touches the database. pub struct ImmutableTrees<'t, D> { - tree_ids: RoaringBitmap, - offsets: Vec<*const u8>, - lengths: Vec, + leafs: IntMap, _marker: marker::PhantomData<(&'t (), D)>, } @@ -302,9 +293,7 @@ impl<'t, D: Distance> ImmutableTrees<'t, D> { /// Creates the structure by fetching all the root pointers /// and keeping the transaction making the pointers valid. pub fn new(rtxn: &'t RoTxn, database: Database, index: u16) -> heed::Result { - let mut tree_ids = RoaringBitmap::new(); - let mut offsets = Vec::new(); - let mut lengths = Vec::new(); + let mut leafs = IntMap::default(); let iter = database .remap_types::() @@ -314,20 +303,16 @@ impl<'t, D: Distance> ImmutableTrees<'t, D> { for result in iter { let (key, bytes) = result?; let tree_id = key.node.unwrap_tree(); - assert!(tree_ids.push(tree_id)); - offsets.push(bytes.as_ptr()); - lengths.push(bytes.len()); + leafs.insert(tree_id, (bytes.len(), bytes.as_ptr())); } - Ok(ImmutableTrees { tree_ids, lengths, offsets, _marker: marker::PhantomData }) + Ok(ImmutableTrees { leafs, _marker: marker::PhantomData }) } /// Returns the tree node identified by the given ID. pub fn get(&self, item_id: ItemId) -> heed::Result>> { - let (ptr, len) = match self.tree_ids.rank(item_id).checked_sub(1).and_then(|offset| { - self.offsets.get(offset as usize).zip(self.lengths.get(offset as usize)) - }) { - Some((ptr, len)) => (*ptr, *len), + let (ptr, len) = match self.leafs.get(&item_id) { + Some((len, ptr)) => (*ptr, *len), None => return Ok(None), }; From cefadca3622582e64a80f1726d3cf3c7d7f87fde Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 10 Sep 2024 14:36:34 +0200 Subject: [PATCH 30/61] [perf] reduce the number of allocations by pre-allocating the size of the hashmap in advance --- src/parallel.rs | 30 +++++++++++++++++++++--------- src/writer.rs | 5 +++-- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/parallel.rs b/src/parallel.rs index 969a849e..6335b171 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -8,7 +8,7 @@ use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, RoTxn}; use memmap2::Mmap; -use nohash::IntMap; +use nohash::{BuildNoHashHasher, IntMap}; use rand::seq::index; use rand::Rng; use roaring::RoaringBitmap; @@ -189,8 +189,14 @@ pub struct ImmutableLeafs<'t, D> { impl<'t, D: Distance> ImmutableLeafs<'t, D> { /// Creates the structure by fetching all the leaf pointers /// and keeping the transaction making the pointers valid. - pub fn new(rtxn: &'t RoTxn, database: Database, index: u16) -> heed::Result { - let mut leafs = IntMap::default(); + pub fn new( + rtxn: &'t RoTxn, + database: Database, + index: u16, + nb_leafs: u64, + ) -> heed::Result { + let mut leafs = + IntMap::with_capacity_and_hasher(nb_leafs as usize, BuildNoHashHasher::default()); let mut constant_length = None; let iter = database @@ -285,15 +291,21 @@ impl<'t, D: Distance> ImmutableSubsetLeafs<'t, D> { /// in the mmapped file and the transaction is kept here and therefore /// no longer touches the database. pub struct ImmutableTrees<'t, D> { - leafs: IntMap, + trees: IntMap, _marker: marker::PhantomData<(&'t (), D)>, } impl<'t, D: Distance> ImmutableTrees<'t, D> { /// Creates the structure by fetching all the root pointers /// and keeping the transaction making the pointers valid. - pub fn new(rtxn: &'t RoTxn, database: Database, index: u16) -> heed::Result { - let mut leafs = IntMap::default(); + pub fn new( + rtxn: &'t RoTxn, + database: Database, + index: u16, + nb_trees: u64, + ) -> heed::Result { + let mut trees = + IntMap::with_capacity_and_hasher(nb_trees as usize, BuildNoHashHasher::default()); let iter = database .remap_types::() @@ -303,15 +315,15 @@ impl<'t, D: Distance> ImmutableTrees<'t, D> { for result in iter { let (key, bytes) = result?; let tree_id = key.node.unwrap_tree(); - leafs.insert(tree_id, (bytes.len(), bytes.as_ptr())); + trees.insert(tree_id, (bytes.len(), bytes.as_ptr())); } - Ok(ImmutableTrees { leafs, _marker: marker::PhantomData }) + Ok(ImmutableTrees { trees, _marker: marker::PhantomData }) } /// Returns the tree node identified by the given ID. pub fn get(&self, item_id: ItemId) -> heed::Result>> { - let (ptr, len) = match self.leafs.get(&item_id) { + let (ptr, len) = match self.trees.get(&item_id) { Some((len, ptr)) => (*ptr, *len), None => return Ok(None), }; diff --git a/src/writer.rs b/src/writer.rs index 8083c926..6544138a 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -336,11 +336,12 @@ impl Writer { log::debug!("Getting a reference to your {} items...", n_items); let used_node_ids = self.used_tree_node(wtxn)?; + let nb_tree_nodes = used_node_ids.len(); let concurrent_node_ids = ConcurrentNodeIds::new(used_node_ids); let frozzen_reader = FrozzenReader { - leafs: &ImmutableLeafs::new(wtxn, self.database, self.index)?, - trees: &ImmutableTrees::new(wtxn, self.database, self.index)?, + leafs: &ImmutableLeafs::new(wtxn, self.database, self.index, item_indices.len())?, + trees: &ImmutableTrees::new(wtxn, self.database, self.index, nb_tree_nodes)?, // The globally incrementing node ids that are shared between threads. concurrent_node_ids: &concurrent_node_ids, }; From c96e2044386341123b1d76adc46f01ee318ec436 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 10 Sep 2024 16:52:55 +0200 Subject: [PATCH 31/61] add or fix comments --- src/distance/binary_quantized_angular.rs | 4 +++- src/distance/binary_quantized_euclidean.rs | 16 +++++++++++----- src/distance/binary_quantized_manhattan.rs | 6 +++++- src/distance/mod.rs | 3 +++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/distance/binary_quantized_angular.rs b/src/distance/binary_quantized_angular.rs index dc6a7de4..f21d29e9 100644 --- a/src/distance/binary_quantized_angular.rs +++ b/src/distance/binary_quantized_angular.rs @@ -13,10 +13,12 @@ use crate::unaligned_vector::{BinaryQuantized, UnalignedVector}; /// The Cosine similarity is a measure of similarity between two /// non-zero vectors defined in an inner product space. Cosine similarity /// is the cosine of the angle between the vectors. +/// /!\ This distance function is binary quantized, which means it loses all its precision +/// and their scalar values are converted to `-1` or `1`. #[derive(Debug, Clone)] pub enum BinaryQuantizedAngular {} -/// The header of BinaryQuantizedAngular leaf nodes. +/// The header of `BinaryQuantizedAngular` leaf nodes. #[repr(C)] #[derive(Pod, Zeroable, Debug, Clone, Copy)] pub struct NodeHeaderBinaryQuantizedAngular { diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index a022b855..00077fd3 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -14,10 +14,12 @@ use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; /// is the length of the line segment between them. /// /// `d(p, q) = sqrt((p - q)²)` +/// /!\ This distance function is binary quantized, which means it loses all its precision +/// and their scalar values are converted to `-1` or `1`. #[derive(Debug, Clone)] pub enum BinaryQuantizedEuclidean {} -/// The header of BinaryQuantizedEuclidean leaf nodes. +/// The header of `BinaryQuantizedEuclidean` leaf nodes. #[repr(C)] #[derive(Pod, Zeroable, Debug, Clone, Copy)] pub struct NodeHeaderBinaryQuantizedEuclidean { @@ -83,22 +85,26 @@ impl Distance for BinaryQuantizedEuclidean { } /// For the binary quantized squared euclidean distance: -/// 1. We need to do the following operation: `(u - v)^2`, in our case the only allowed values are -1 and 1: +/// 1. We need to do the following operation: `(u - v)^2`, in our case the only allowed values are `-1` and `1`: +/// ```text /// -1 - -1 = 0 | ^2 => 0 /// -1 - 1 = -2 | ^2 => 4 /// 1 - -1 = 2 | ^2 => 4 /// 1 - 1 = 0 | ^2 => 0 +/// ``` /// -/// If we replace the -1 by the binary quantized 0, and the 1 stays 1s: +/// If we replace the `-1` by the binary quantized `0`, and the `1` stays `1`s: +/// ```text /// 0 * 0 = 0 /// 0 * 1 = 1 /// 1 * 0 = 1 /// 1 * 1 = 0 +/// ``` /// -/// The result must be multiplicated by 4. But that can be done at the very end. +/// The result must be multiplicated by `4`. But that can be done at the very end. /// /// 2. Then we need to do the sum of the results: -/// Since we cannot go into the negative, it's safe to hold everything in a `u32` and simply counts the 1s. +/// Since we cannot go into the negative, it's safe to hold everything in a `u32` and simply counts the `1`s. /// At the very end, before converting the value to a `f32` we can multiplies everything by 4. fn squared_euclidean_distance_binary_quantized( u: &UnalignedVector, diff --git a/src/distance/binary_quantized_manhattan.rs b/src/distance/binary_quantized_manhattan.rs index 730c6559..8576918d 100644 --- a/src/distance/binary_quantized_manhattan.rs +++ b/src/distance/binary_quantized_manhattan.rs @@ -13,6 +13,8 @@ use crate::unaligned_vector::{self, BinaryQuantized, UnalignedVector}; /// A taxicab geometry or a Manhattan geometry is a geometry whose usual distance function /// or metric of Euclidean geometry is replaced by a new metric in which the distance between /// two points is the sum of the absolute differences of their Cartesian coordinates. +/// /!\ This distance function is binary quantized, which means it loses all its precision +/// and their scalar values are converted to `-1` or `1`. #[derive(Debug, Clone)] pub enum BinaryQuantizedManhattan {} @@ -91,13 +93,15 @@ impl Distance for BinaryQuantizedManhattan { /// p.vector.iter().zip(q.vector.iter()).map(|(p, q)| (p - q).abs()).sum() /// ``` /// 1. We need to subtract two scalars and take the absolute value: +/// ```text /// -1 - -1 = 0 | abs => 0 /// -1 - 1 = -2 | abs => 2 /// 1 - -1 = 2 | abs => 2 /// 1 - 1 = 0 | abs => 0 +/// ``` /// /// It's very similar to the euclidean distance. -/// => It's a xor, we counts the ones and multiplicate the result by 2 at the end. +/// => It's a xor, we counts the `1`s and multiplicate the result by `2` at the end. fn manhattan_distance_binary_quantized( u: &UnalignedVector, v: &UnalignedVector, diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 30a10ddd..4501445e 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -189,6 +189,9 @@ pub fn two_means_binary_quantized( // can't motivate it well. The basic idea is to keep two centroids and assign // points to either one of them. We weight each centroid by the number of points // assigned to it, so to balance it. + // Even though the points we're working on are binary quantized, for the centroid + // to move, we need to store them as f32. This requires us to convert the binary quantized + // vector to f32 vectors a lot, but the recall suffers too much if we don't do it. const ITERATION_STEPS: usize = 200; From 0945e333a2edfc17c9c620516abe5d5729ada0c1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 11 Sep 2024 10:47:11 +0200 Subject: [PATCH 32/61] write an simd version of the code that converts the f32 vectors to simd vectors --- src/unaligned_vector/binary_quantized.rs | 61 ++++++++++++++++++------ 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 87ca94ea..db0dc7c6 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -26,15 +26,7 @@ impl UnalignedVectorCodec for BinaryQuantized { } fn from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { - let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); - for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { - let mut word: QuantizedWord = 0; - for scalar in chunk.iter().rev() { - word <<= 1; - word += scalar.is_sign_positive() as QuantizedWord; - } - output.extend_from_slice(&word.to_ne_bytes()); - } + let output = unsafe { from_slice_simd(slice) }; Cow::Owned(output) } @@ -61,6 +53,50 @@ impl UnalignedVectorCodec for BinaryQuantized { } } +unsafe fn from_slice_simd(slice: &[f32]) -> Vec { + use core::arch::aarch64::*; + + let iterations = slice.len() / 8; + let ptr = slice.as_ptr(); + let mut ret = Vec::with_capacity(iterations); + + for i in 0..iterations { + unsafe { + let lane = vld1q_f32(ptr.add(i * 8)); + let lane = vcltzq_f32(lane); + let lane = vmvnq_u32(lane); + let mask: Vec = vec![ + 0b_00000000_00000000_00000000_00000001, + 0b_00000000_00000000_00000000_00000010, + 0b_00000000_00000000_00000000_00000100, + 0b_00000000_00000000_00000000_00001000, + ]; + let mask = vld1q_u32(mask.as_ptr()); + let lane = vandq_u32(lane, mask); + + let left = vaddvq_u32(lane) as u8; + + let lane = vld1q_f32(ptr.add(i * 8 + 4)); + let lane = vcltzq_f32(lane); + let lane = vmvnq_u32(lane); + let mask: Vec = vec![ + 0b_00000000_00000000_00000000_00010000, + 0b_00000000_00000000_00000000_00100000, + 0b_00000000_00000000_00000000_01000000, + 0b_00000000_00000000_00000000_10000000, + ]; + let mask = vld1q_u32(mask.as_ptr()); + let lane = vandq_u32(lane, mask); + + let right = vaddvq_u32(lane) as u8; + + ret.push(left | right); + } + } + + ret +} + pub struct BinaryQuantizedIterator<'a> { current_element: usize, current_iteration: usize, @@ -81,12 +117,7 @@ impl Iterator for BinaryQuantizedIterator<'_> { self.current_element >>= 1; self.current_iteration += 1; - if bit == 0 { - Some(-1.0) - } else { - Some(1.0) - } - // Some(bit as f32) + Some(bit as f32 * 2.0 - 1.0) } fn size_hint(&self) -> (usize, Option) { From d4ed3e1ba29a4c3542aaa5eadd773c6f5e8d7f73 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 11 Sep 2024 17:49:43 +0200 Subject: [PATCH 33/61] fix the reminder while storing binary quantized vectors --- src/distance/mod.rs | 6 +-- src/reader.rs | 7 ++- src/unaligned_vector/binary_quantized.rs | 58 +++++++++++++++++++++++- src/unaligned_vector/f32.rs | 9 +++- src/unaligned_vector/mod.rs | 7 ++- src/writer.rs | 7 ++- 6 files changed, 81 insertions(+), 13 deletions(-) diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 4501445e..3c9833d8 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -196,8 +196,8 @@ pub fn two_means_binary_quantized( const ITERATION_STEPS: usize = 200; let [leaf_p, leaf_q] = leafs.choose_two(rng)?.unwrap(); - let mut leaf_p: Leaf<'static, NonBqDist> = new_leaf(leaf_p.vector.iter().collect()); - let mut leaf_q: Leaf<'static, NonBqDist> = new_leaf(leaf_q.vector.iter().collect()); + let mut leaf_p: Leaf<'static, NonBqDist> = new_leaf(leaf_p.vector.to_vec()); + let mut leaf_q: Leaf<'static, NonBqDist> = new_leaf(leaf_q.vector.to_vec()); if cosine { NonBqDist::normalize(&mut leaf_p); @@ -211,7 +211,7 @@ pub fn two_means_binary_quantized( let mut jc = 1.0; for _ in 0..ITERATION_STEPS { let node_k = leafs.choose(rng)?.unwrap(); - let node_k: Leaf<'static, NonBqDist> = new_leaf(node_k.vector.iter().collect()); + let node_k: Leaf<'static, NonBqDist> = new_leaf(node_k.vector.to_vec()); let di = ic * NonBqDist::non_built_distance(&leaf_p, &node_k); let dj = jc * NonBqDist::non_built_distance(&leaf_q, &node_k); let norm = if cosine { NonBqDist::norm(&node_k) } else { 1.0 }; diff --git a/src/reader.rs b/src/reader.rs index 0c01c96d..f72770ce 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -131,8 +131,11 @@ impl<'t, D: Distance> Reader<'t, D> { /// Returns the vector for item `i` that was previously added. pub fn item_vector(&self, rtxn: &'t RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)? - .map(|leaf| leaf.vector.iter().take(self.dimensions).collect())) + Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { + let mut vec = leaf.vector.to_vec(); + vec.truncate(self.dimensions()); + vec + })) } /// Returns `true` if the index is empty. diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index db0dc7c6..877d1d29 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -4,6 +4,8 @@ use std::{ slice::ChunksExact, }; +use ordered_float::Float; + use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; /// The type of the words used to quantize a vector @@ -35,6 +37,10 @@ impl UnalignedVectorCodec for BinaryQuantized { Cow::Owned(Self::from_slice(&vec).into_owned()) } + fn to_vec(vec: &UnalignedVector) -> Vec { + unsafe { to_vec_simd(vec) } + } + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { BinaryQuantizedIterator { current_element: 0, @@ -57,8 +63,10 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { use core::arch::aarch64::*; let iterations = slice.len() / 8; + let reminder = slice.len() % 8; + let mut ret = Vec::with_capacity(iterations + (reminder != 0) as usize); + let ptr = slice.as_ptr(); - let mut ret = Vec::with_capacity(iterations); for i in 0..iterations { unsafe { @@ -94,9 +102,57 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { } } + let mut rem: QuantizedWord = 0; + for r in slice[slice.len() - reminder..].iter().rev() { + rem <<= 1; + let r = r.is_sign_positive(); + rem |= r as QuantizedWord; + } + ret.extend(rem.to_ne_bytes()); + ret } +unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { + use core::arch::aarch64::*; + + let mut output: Vec = Vec::with_capacity(vec.len()); + let bytes = vec.as_bytes(); + let ptr = bytes.as_ptr(); + + for i in 0..bytes.len() { + unsafe { + let lane = vld1_dup_u8(ptr.add(i)); + let mask = [ + 0b_0000_0001, + 0b_0000_0010, + 0b_0000_0100, + 0b_0000_1000, + 0b_0001_0000, + 0b_0010_0000, + 0b_0100_0000, + 0b_1000_0000, + ]; + let lane = vand_u8(lane, vld1_u8(mask.as_ptr())); + let lane = vceqz_u8(lane); + let lane = vreinterpret_s8_u8(lane); + let lane = vmul_s8(lane, vdup_n_s8(2)); + let lane = vadd_s8(lane, vdup_n_s8(1)); + + output.push(vget_lane_s8(lane, 0_i32) as f32); + output.push(vget_lane_s8(lane, 1_i32) as f32); + output.push(vget_lane_s8(lane, 2_i32) as f32); + output.push(vget_lane_s8(lane, 3_i32) as f32); + output.push(vget_lane_s8(lane, 4_i32) as f32); + output.push(vget_lane_s8(lane, 5_i32) as f32); + output.push(vget_lane_s8(lane, 6_i32) as f32); + output.push(vget_lane_s8(lane, 7_i32) as f32); + } + } + + output +} + pub struct BinaryQuantizedIterator<'a> { current_element: usize, current_iteration: usize, diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index 161245a4..304ceae3 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -3,7 +3,7 @@ use std::{ mem::{size_of, transmute}, }; -use bytemuck::cast_slice; +use bytemuck::{cast_slice, checked::cast_slice_mut}; use byteorder::{ByteOrder, NativeEndian}; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; @@ -33,6 +33,13 @@ impl UnalignedVectorCodec for f32 { Cow::Owned(bytes) } + fn to_vec(vec: &UnalignedVector) -> Vec { + let iter = vec.iter(); + let mut ret = Vec::with_capacity(iter.len()); + ret.extend(iter); + ret + } + /// Returns an iterator of f32 that are read from the slice. /// The f32 are copied in memory and are therefore, aligned. fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index 60965dd2..386d3b79 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -26,6 +26,8 @@ pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// The slice is already known to be of the right length. fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector>; + fn to_vec(vec: &UnalignedVector) -> Vec; + /// Returns an iterator of f32 that are read from the vector. /// The f32 are copied in memory and are therefore, aligned. fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_; @@ -85,10 +87,7 @@ impl UnalignedVector { /// Returns an allocated and aligned `Vec`. pub fn to_vec(&self) -> Vec { - let iter = self.iter(); - let mut ret = Vec::with_capacity(iter.len()); - ret.extend(iter); - ret + Codec::to_vec(self) } /// Returns the len of the vector in terms of elements. diff --git a/src/writer.rs b/src/writer.rs index 6544138a..40debdbf 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -87,8 +87,11 @@ impl Writer { /// Returns an `Option`al vector previous stored in this database. pub fn item_vector(&self, rtxn: &RoTxn, item: ItemId) -> Result>> { - Ok(item_leaf(self.database, self.index, rtxn, item)? - .map(|leaf| leaf.vector.iter().take(self.dimensions).collect())) + Ok(item_leaf(self.database, self.index, rtxn, item)?.map(|leaf| { + let mut vec = leaf.vector.to_vec(); + vec.truncate(self.dimensions); + vec + })) } /// Returns `true` if the index is empty. From 343dbfba67b2f4b70e24be8702fe514c007c66e0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 11 Sep 2024 18:38:03 +0200 Subject: [PATCH 34/61] re-implement the binary quantized to f32 with SIMD --- src/tests/binary_quantized.rs | 28 ++++++++++-- src/tests/node.rs | 6 ++- src/unaligned_vector/binary_quantized.rs | 58 +++++++++++------------- 3 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index 62450532..03a27505 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -8,16 +8,36 @@ use crate::{ fn write_and_retrieve_binary_quantized_vector() { let handle = create_database::(); let mut wtxn = handle.env.write_txn().unwrap(); - let writer = Writer::new(handle.database, 0, 5); - writer.add_item(&mut wtxn, 0, &[-2.0, -1.0, 0.0, 1.0, 2.0]).unwrap(); + let writer = Writer::new(handle.database, 0, 16); + writer + .add_item( + &mut wtxn, + 0, + &[ + -2.0, -1.0, 0.0, -0.0, 2.0, 2.0, -12.4, 21.2, -2.0, -1.0, 0.0, 1.0, 2.0, 2.0, + -12.4, 21.2, + ], + ) + .unwrap(); let vec = writer.item_vector(&wtxn, 0).unwrap().unwrap(); insta::assert_debug_snapshot!(vec, @r###" [ + -1.0, + -1., + 1.0, + 1.0, + 1.0, + 1.0, + -1.0, + 1.0, -1.0, -1.0, 1.0, 1.0, 1.0, + 1.0, + -1.0, + 1.0, ] "###); @@ -27,8 +47,8 @@ fn write_and_retrieve_binary_quantized_vector() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, 1.0000, 1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, "other ..."] }) + Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, 1.0000, 1.0000, 1.0000, -1.0000, 1.0000, -1.0000, -1.0000, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [0] }) - Root: Metadata { dimensions: 5, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } + Root: Metadata { dimensions: 16, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } "###); } diff --git a/src/tests/node.rs b/src/tests/node.rs index 1f7b014d..85117497 100644 --- a/src/tests/node.rs +++ b/src/tests/node.rs @@ -75,7 +75,7 @@ fn unaligned_binary_quantized_large() { let unaligned = UnalignedVector::::from_slice(&original); let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); assert_snapshot!(s, @r###" - 10010110 + 10010111 01101001 11001011 10110100 @@ -91,6 +91,10 @@ fn unaligned_binary_quantized_large() { 00000000 00000000 00000000 + 00000000 + 00000000 + 00000000 + 00000000 "###); let deser: Vec<_> = unaligned.to_vec(); diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 877d1d29..9c49237a 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -9,7 +9,7 @@ use ordered_float::Float; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; /// The type of the words used to quantize a vector -type QuantizedWord = usize; +type QuantizedWord = u64; /// The size of the words used to quantize a vector const QUANTIZED_WORD_SIZE: usize = QuantizedWord::BITS as usize; @@ -18,7 +18,8 @@ pub enum BinaryQuantized {} impl UnalignedVectorCodec for BinaryQuantized { fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { - let rem = bytes.len() % size_of::(); + // let rem = bytes.len() % size_of::(); + let rem = 0; if rem == 0 { // safety: `UnalignedVector` is transparent Ok(Cow::Borrowed(unsafe { transmute(bytes) })) @@ -60,6 +61,7 @@ impl UnalignedVectorCodec for BinaryQuantized { } unsafe fn from_slice_simd(slice: &[f32]) -> Vec { + println!("from slice called with {slice:?}"); use core::arch::aarch64::*; let iterations = slice.len() / 8; @@ -110,43 +112,37 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { } ret.extend(rem.to_ne_bytes()); + println!("from slice returns {ret:?}"); ret } unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { use core::arch::aarch64::*; - let mut output: Vec = Vec::with_capacity(vec.len()); + let mut output: Vec = vec![0.0; vec.len()]; + let output_ptr = output.as_mut_ptr(); let bytes = vec.as_bytes(); - let ptr = bytes.as_ptr(); - for i in 0..bytes.len() { - unsafe { - let lane = vld1_dup_u8(ptr.add(i)); - let mask = [ - 0b_0000_0001, - 0b_0000_0010, - 0b_0000_0100, - 0b_0000_1000, - 0b_0001_0000, - 0b_0010_0000, - 0b_0100_0000, - 0b_1000_0000, - ]; - let lane = vand_u8(lane, vld1_u8(mask.as_ptr())); - let lane = vceqz_u8(lane); - let lane = vreinterpret_s8_u8(lane); - let lane = vmul_s8(lane, vdup_n_s8(2)); - let lane = vadd_s8(lane, vdup_n_s8(1)); - - output.push(vget_lane_s8(lane, 0_i32) as f32); - output.push(vget_lane_s8(lane, 1_i32) as f32); - output.push(vget_lane_s8(lane, 2_i32) as f32); - output.push(vget_lane_s8(lane, 3_i32) as f32); - output.push(vget_lane_s8(lane, 4_i32) as f32); - output.push(vget_lane_s8(lane, 5_i32) as f32); - output.push(vget_lane_s8(lane, 6_i32) as f32); - output.push(vget_lane_s8(lane, 7_i32) as f32); + for (current_byte, base) in bytes.iter().enumerate() { + let base = *base as u32; + let low_mask = [0b_0000_0001, 0b_0000_0010, 0b_0000_0100, 0b_0000_1000]; + let high_mask = [0b_0001_0000, 0b_0010_0000, 0b_0100_0000, 0b_1000_0000]; + + for (i, mask) in [low_mask, high_mask].iter().enumerate() { + unsafe { + let lane = vld1q_dup_u32(&base as *const u32); + let lane = vandq_u32(lane, vld1q_u32(mask.as_ptr())); + let lane = vceqzq_u32(lane); + // Make the exponent right (either 1 or -1) + // sign exponent mantissa + let mask: u32 = 0b0_01111111_00000000000000000000000; + let lane = vorrq_u32(lane, vld1q_dup_u32(&mask as *const u32)); + // sign exponent mantissa + let mask: u32 = 0b1_01111111_00000000000000000000000; + let lane = vandq_u32(lane, vld1q_dup_u32(&mask as *const u32)); + let lane = vreinterpretq_f32_u32(lane); + vst1q_f32(output_ptr.add(current_byte * 8 + i * 4), lane); + } } } From 1879821edac7fa9744c223e2faa4e3e60c357fd9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 11 Sep 2024 19:09:15 +0200 Subject: [PATCH 35/61] add tests --- Cargo.toml | 1 + src/tests/binary_quantized.rs | 2 +- src/unaligned_vector/binary_quantized.rs | 83 ++++++++++++++++++++---- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4695332c..11cec715 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ roaring = "0.10.5" tempfile = "3.10.1" thiserror = "1.0.61" nohash = "0.2.0" +proptest = "1.5.0" [dev-dependencies] anyhow = "1.0.86" diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index 03a27505..bc401407 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -23,7 +23,7 @@ fn write_and_retrieve_binary_quantized_vector() { insta::assert_debug_snapshot!(vec, @r###" [ -1.0, - -1., + -1.0, 1.0, 1.0, 1.0, diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 9c49237a..2cd08367 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -18,8 +18,7 @@ pub enum BinaryQuantized {} impl UnalignedVectorCodec for BinaryQuantized { fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { - // let rem = bytes.len() % size_of::(); - let rem = 0; + let rem = bytes.len() % size_of::(); if rem == 0 { // safety: `UnalignedVector` is transparent Ok(Cow::Borrowed(unsafe { transmute(bytes) })) @@ -61,12 +60,10 @@ impl UnalignedVectorCodec for BinaryQuantized { } unsafe fn from_slice_simd(slice: &[f32]) -> Vec { - println!("from slice called with {slice:?}"); use core::arch::aarch64::*; let iterations = slice.len() / 8; - let reminder = slice.len() % 8; - let mut ret = Vec::with_capacity(iterations + (reminder != 0) as usize); + let mut ret = Vec::with_capacity(iterations); let ptr = slice.as_ptr(); @@ -104,15 +101,16 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { } } - let mut rem: QuantizedWord = 0; - for r in slice[slice.len() - reminder..].iter().rev() { + let reminder = slice.len() % 8; + let mut rem: u8 = 0; + for r in slice[slice.len() - reminder - 1..].iter().rev() { rem <<= 1; let r = r.is_sign_positive(); - rem |= r as QuantizedWord; + rem |= r as u8; } - ret.extend(rem.to_ne_bytes()); + ret.push(rem); + ret.extend(std::iter::repeat(0).take(8 - (ret.len() % 8))); - println!("from slice returns {ret:?}"); ret } @@ -150,7 +148,7 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { } pub struct BinaryQuantizedIterator<'a> { - current_element: usize, + current_element: QuantizedWord, current_iteration: usize, iter: ChunksExact<'a, u8>, } @@ -187,3 +185,66 @@ impl ExactSizeIterator for BinaryQuantizedIterator<'_> { lower } } + +#[cfg(test)] +mod test { + use insta::assert_debug_snapshot; + + use crate::internals::UnalignedVectorCodec; + + use super::BinaryQuantized; + + #[test] + fn truc() { + let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; + let vector = BinaryQuantized::from_slice(&original); + let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); + assert_debug_snapshot!(iter_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + 1.0, + ] + "###); + let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + vec_vec.truncate(original.len()); + assert_debug_snapshot!(vec_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + 1.0, + ] + "###); + + assert_eq!(vec_vec, iter_vec); + } + + use proptest::collection::vec; + use proptest::prelude::*; + + proptest! { + #[test] + fn prop_truc( + original in vec(-50f32..=50.2, 80) + ){ + let vector = BinaryQuantized::from_slice(&original); + let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); + let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + vec_vec.truncate(original.len()); + + assert_eq!(vec_vec, iter_vec); + } + } +} From 4d07b355ddc7dcbaba666cb682afbe035487f5ee Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 12 Sep 2024 11:12:38 +0200 Subject: [PATCH 36/61] push leaf id in a vec instead of a roaring bitmap because it s quicker --- src/parallel.rs | 4 ++++ src/writer.rs | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/parallel.rs b/src/parallel.rs index 6335b171..d48e5327 100644 --- a/src/parallel.rs +++ b/src/parallel.rs @@ -256,6 +256,10 @@ impl<'t, D: Distance> ImmutableSubsetLeafs<'t, D> { } } + pub fn len(&self) -> u64 { + self.subset.len() + } + /// Randomly selects two leafs verified to be different. pub fn choose_two(&self, rng: &mut R) -> heed::Result; 2]>> { let indexes = index::sample(rng, self.subset.len() as usize, 2); diff --git a/src/writer.rs b/src/writer.rs index 40debdbf..1892b488 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -693,8 +693,8 @@ impl Writer { } let children = ImmutableSubsetLeafs::from_item_ids(reader.leafs, item_indices); - let mut children_left = RoaringBitmap::new(); - let mut children_right = RoaringBitmap::new(); + let mut children_left = Vec::with_capacity(children.len() as usize); + let mut children_right = Vec::with_capacity(children.len() as usize); let mut remaining_attempts = 3; let mut normal = loop { @@ -710,7 +710,7 @@ impl Writer { }; } - if split_imbalance(children_left.len(), children_right.len()) < 0.95 + if split_imbalance(children_left.len() as u64, children_right.len() as u64) < 0.95 || remaining_attempts == 0 { break normal; @@ -721,10 +721,20 @@ impl Writer { // If we didn't find a hyperplane, just randomize sides as a last option // and set the split plane to zero as a dummy plane. - if split_imbalance(children_left.len(), children_right.len()) > 0.99 { - randomly_split_children(rng, item_indices, &mut children_left, &mut children_right); - UnalignedVector::reset(&mut normal); - } + let (children_left, children_right) = + if split_imbalance(children_left.len() as u64, children_right.len() as u64) > 0.99 { + let mut children_left = RoaringBitmap::new(); + let mut children_right = RoaringBitmap::new(); + randomly_split_children(rng, item_indices, &mut children_left, &mut children_right); + UnalignedVector::reset(&mut normal); + + (children_left, children_right) + } else { + ( + RoaringBitmap::from_sorted_iter(children_left).unwrap(), + RoaringBitmap::from_sorted_iter(children_right).unwrap(), + ) + }; let normal = SplitPlaneNormal { normal, From 98cb3c9459fa036fd3443adbd561f8a644f46920 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 12 Sep 2024 18:41:05 +0200 Subject: [PATCH 37/61] fix the f32 -> binary quantized --- src/unaligned_vector/binary_quantized.rs | 123 +++++++++++++++++++---- 1 file changed, 101 insertions(+), 22 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 2cd08367..451c87ae 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -4,8 +4,6 @@ use std::{ slice::ChunksExact, }; -use ordered_float::Float; - use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; /// The type of the words used to quantize a vector @@ -63,7 +61,8 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { use core::arch::aarch64::*; let iterations = slice.len() / 8; - let mut ret = Vec::with_capacity(iterations); + let plus = if iterations % 8 == 0 { 0 } else { 8 - iterations }; + let mut ret = vec![0; iterations + plus]; let ptr = slice.as_ptr(); @@ -97,19 +96,23 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { let right = vaddvq_u32(lane) as u8; - ret.push(left | right); + ret[i] = left | right; } } + // Since we're iterating on bytes two by two. + // If we had a number of dimensions not dividible by 8 we may be + // missing some bits in the last byte. let reminder = slice.len() % 8; - let mut rem: u8 = 0; - for r in slice[slice.len() - reminder - 1..].iter().rev() { - rem <<= 1; - let r = r.is_sign_positive(); - rem |= r as u8; + if reminder != 0 { + let mut rem: u8 = 0; + for r in slice[slice.len() - reminder..].iter().rev() { + rem <<= 1; + let r = r.is_sign_positive(); + rem |= r as u8; + } + ret[iterations] = rem; } - ret.push(rem); - ret.extend(std::iter::repeat(0).take(8 - (ret.len() % 8))); ret } @@ -188,11 +191,77 @@ impl ExactSizeIterator for BinaryQuantizedIterator<'_> { #[cfg(test)] mod test { - use insta::assert_debug_snapshot; + use std::borrow::Cow; + + use insta::{assert_debug_snapshot, assert_snapshot}; + + use crate::internals::{UnalignedVector, UnalignedVectorCodec}; + + use super::{BinaryQuantized, QuantizedWord, QUANTIZED_WORD_SIZE}; + + fn original_from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { + let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); + for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { + let mut word: QuantizedWord = 0; + for scalar in chunk.iter().rev() { + word <<= 1; + word += scalar.is_sign_positive() as QuantizedWord; + } + output.extend_from_slice(&word.to_ne_bytes()); + } + + Cow::Owned(output) + } + + #[test] + fn test_from_slice() { + let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; + let vector = BinaryQuantized::from_slice(&original); - use crate::internals::UnalignedVectorCodec; + let internal = vector.as_bytes().iter().map(|b| format!("{b:08b}\n")).collect::(); + assert_snapshot!(internal, @r###" + 10101011 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + "###); + + let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); + assert_debug_snapshot!(iter_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + ] + "###); + let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + vec_vec.truncate(original.len()); + assert_debug_snapshot!(vec_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + ] + "###); - use super::BinaryQuantized; + assert_eq!(vec_vec, iter_vec); + } #[test] fn truc() { @@ -236,15 +305,25 @@ mod test { proptest! { #[test] - fn prop_truc( - original in vec(-50f32..=50.2, 80) + fn prop_truc_1( + original in vec(-50f32..=50.2, 10..512) ){ - let vector = BinaryQuantized::from_slice(&original); - let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); - let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); - vec_vec.truncate(original.len()); + let vector = BinaryQuantized::from_slice(&original); + let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); + let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + vec_vec.truncate(original.len()); - assert_eq!(vec_vec, iter_vec); - } + assert_eq!(vec_vec, iter_vec); + } + + #[test] + fn prop_truc_2( + original in vec(-50f32..=50.2, 29..65) + ){ + let vector1 = BinaryQuantized::from_slice(&original); + let vector2 = original_from_slice(&original); + + assert_eq!(vector1.as_bytes(), vector2.as_bytes()); + } } } From 1c975e2980b18107890a0752f94dc815cf282751 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 12 Sep 2024 18:46:56 +0200 Subject: [PATCH 38/61] improve comments and types --- src/unaligned_vector/binary_quantized.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 451c87ae..70f816cd 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -60,13 +60,18 @@ impl UnalignedVectorCodec for BinaryQuantized { unsafe fn from_slice_simd(slice: &[f32]) -> Vec { use core::arch::aarch64::*; - let iterations = slice.len() / 8; - let plus = if iterations % 8 == 0 { 0 } else { 8 - iterations }; - let mut ret = vec![0; iterations + plus]; + let iterations = slice.len() / size_of::(); + // The size of the returned vector must be a multiple of a word + let padding = if iterations % size_of::() == 0 { + 0 + } else { + size_of::() - iterations + }; + let mut ret = vec![0; iterations + padding]; let ptr = slice.as_ptr(); - for i in 0..iterations { + for (i, val) in ret.iter_mut().enumerate() { unsafe { let lane = vld1q_f32(ptr.add(i * 8)); let lane = vcltzq_f32(lane); @@ -96,14 +101,14 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { let right = vaddvq_u32(lane) as u8; - ret[i] = left | right; + *val = left | right; } } // Since we're iterating on bytes two by two. // If we had a number of dimensions not dividible by 8 we may be // missing some bits in the last byte. - let reminder = slice.len() % 8; + let reminder = slice.len() % size_of::(); if reminder != 0 { let mut rem: u8 = 0; for r in slice[slice.len() - reminder..].iter().rev() { From f9fea3bc5e2301bf9b9bb82922b3236c7da4673a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 13 Sep 2024 16:36:42 +0200 Subject: [PATCH 39/61] First version of SIMD on x86_64 --- src/unaligned_vector/binary_quantized.rs | 72 ++++++++++++++++++++---- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 70f816cd..5a94156f 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -1,8 +1,6 @@ -use std::{ - borrow::Cow, - mem::{size_of, transmute}, - slice::ChunksExact, -}; +use std::borrow::Cow; +use std::mem::{size_of, transmute}; +use std::slice::ChunksExact; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; @@ -57,6 +55,7 @@ impl UnalignedVectorCodec for BinaryQuantized { } } +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe fn from_slice_simd(slice: &[f32]) -> Vec { use core::arch::aarch64::*; @@ -122,6 +121,30 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { ret } +#[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] +unsafe fn from_slice_simd(slice: &[f32]) -> Vec { + // fn convert(m: __m128i) -> [u32; 4] { + // unsafe { std::mem::transmute(m) } + // } + + // fn display(name: &str, m: __m128i) { + // let [a, b, c, d] = convert(m); + // eprintln!("{name}: {a:#b} {b:#b} {c:#b} {d:#b}"); + // } + + let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); + for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { + let mut word: QuantizedWord = 0; + for scalar in chunk.iter().rev() { + word <<= 1; + word += scalar.is_sign_positive() as QuantizedWord; + } + output.extend_from_slice(&word.to_ne_bytes()); + } + output +} + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { use core::arch::aarch64::*; @@ -155,6 +178,36 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { output } +#[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] +unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { + use core::arch::x86_64::*; + + let mut output: Vec = vec![0.0; vec.len()]; + let output_ptr = output.as_mut_ptr(); + let bytes = vec.as_bytes(); + let low_mask = [0b_0000_0001, 0b_0000_0010, 0b_0000_0100, 0b_0000_1000]; + let high_mask = [0b_0001_0000, 0b_0010_0000, 0b_0100_0000, 0b_1000_0000]; + let ones = unsafe { _mm_set1_ps(1.0) }; + let minus = unsafe { _mm_set1_ps(-1.0) }; + + for (current_byte, base) in bytes.iter().enumerate() { + unsafe { + let base = _mm_set1_epi32(*base as i32); + for (i, mask) in [low_mask, high_mask].iter().enumerate() { + let mask = _mm_set_epi32(mask[3], mask[2], mask[1], mask[0]); + let mask = _mm_and_si128(base, mask); + // 0xffffffff if equal to zero and 0x00000000 otherwise + let mask = _mm_cmpeq_epi32(mask, _mm_setzero_si128()); + let lane = _mm_blendv_ps(ones, minus, _mm_castsi128_ps(mask)); + let offset = output_ptr.add(current_byte * 8 + i * 4); + _mm_store_ps(offset, lane); + } + } + } + + output +} + pub struct BinaryQuantizedIterator<'a> { current_element: QuantizedWord, current_iteration: usize, @@ -200,9 +253,8 @@ mod test { use insta::{assert_debug_snapshot, assert_snapshot}; - use crate::internals::{UnalignedVector, UnalignedVectorCodec}; - use super::{BinaryQuantized, QuantizedWord, QUANTIZED_WORD_SIZE}; + use crate::internals::{UnalignedVector, UnalignedVectorCodec}; fn original_from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); @@ -269,7 +321,7 @@ mod test { } #[test] - fn truc() { + fn super_truc() { let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; let vector = BinaryQuantized::from_slice(&original); let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); @@ -283,7 +335,7 @@ mod test { 1.0, -1.0, 1.0, - 1.0, + -1.0, ] "###); let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); @@ -298,7 +350,7 @@ mod test { 1.0, -1.0, 1.0, - 1.0, + -1.0, ] "###); From 1592fe0c606f44eb5e82859188e4941a43e84f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 13 Sep 2024 17:20:08 +0200 Subject: [PATCH 40/61] Upload the AVX version of to_vec --- src/unaligned_vector/binary_quantized.rs | 40 ++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 5a94156f..33ba4f70 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -208,6 +208,46 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { output } +// Dedicated to mm256 (AVX). Doesn't provide any real perf gain. +// #[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] +// unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { +// use core::arch::x86_64::*; + +// let mut output: Vec = vec![0.0; vec.len()]; +// let output_ptr = output.as_mut_ptr(); +// let bytes = vec.as_bytes(); +// let mask = [ +// 0b_0000_0001, +// 0b_0000_0010, +// 0b_0000_0100, +// 0b_0000_1000, +// 0b_0001_0000, +// 0b_0010_0000, +// 0b_0100_0000, +// 0b_1000_0000, +// ]; +// let ones = unsafe { _mm256_set1_ps(1.0) }; +// let minus = unsafe { _mm256_set1_ps(-1.0) }; + +// for (current_byte, base) in bytes.iter().enumerate() { +// unsafe { +// let base = _mm256_set1_epi32(*base as i32); +// let mask = _mm256_set_epi32( +// mask[7], mask[6], mask[5], mask[4], mask[3], mask[2], mask[1], mask[0], +// ); +// let mask = _mm256_and_si256(base, mask); +// // 0xffffffff if equal to zero and 0x00000000 otherwise +// let mask = _mm256_cmpeq_epi32(mask, _mm256_setzero_si256()); +// let lane = _mm256_blendv_ps(ones, minus, _mm256_castsi256_ps(mask)); +// let offset = output_ptr.add(current_byte * 8); +// // I don't understand why this is not aligned and I cannot use _mm256_store_ps +// _mm256_storeu_ps(offset, lane); +// } +// } + +// output +// } + pub struct BinaryQuantizedIterator<'a> { current_element: QuantizedWord, current_iteration: usize, From fc7a72b8058bb9903e69152c09f84af20afe9b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 14 Sep 2024 13:20:46 +0200 Subject: [PATCH 41/61] Update the simd aarch64 to_vec_simd function with blend functions --- src/unaligned_vector/binary_quantized.rs | 37 +++++++++++------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 33ba4f70..811cd65e 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -17,7 +17,7 @@ impl UnalignedVectorCodec for BinaryQuantized { let rem = bytes.len() % size_of::(); if rem == 0 { // safety: `UnalignedVector` is transparent - Ok(Cow::Borrowed(unsafe { transmute(bytes) })) + Ok(Cow::Borrowed(unsafe { transmute::<&[u8], &UnalignedVector>(bytes) })) } else { Err(SizeMismatch { vector_codec: "binary quantized", rem }) } @@ -66,8 +66,8 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { } else { size_of::() - iterations }; - let mut ret = vec![0; iterations + padding]; + let mut ret = vec![0; iterations + padding]; let ptr = slice.as_ptr(); for (i, val) in ret.iter_mut().enumerate() { @@ -151,26 +151,23 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { let mut output: Vec = vec![0.0; vec.len()]; let output_ptr = output.as_mut_ptr(); let bytes = vec.as_bytes(); + let low_mask = [0b_0000_0001, 0b_0000_0010, 0b_0000_0100, 0b_0000_1000]; + let high_mask = [0b_0001_0000, 0b_0010_0000, 0b_0100_0000, 0b_1000_0000]; + let ones = unsafe { vld1q_dup_f32(&1.0) }; + let minus = unsafe { vld1q_dup_f32(&-1.0) }; for (current_byte, base) in bytes.iter().enumerate() { - let base = *base as u32; - let low_mask = [0b_0000_0001, 0b_0000_0010, 0b_0000_0100, 0b_0000_1000]; - let high_mask = [0b_0001_0000, 0b_0010_0000, 0b_0100_0000, 0b_1000_0000]; - - for (i, mask) in [low_mask, high_mask].iter().enumerate() { - unsafe { - let lane = vld1q_dup_u32(&base as *const u32); - let lane = vandq_u32(lane, vld1q_u32(mask.as_ptr())); - let lane = vceqzq_u32(lane); - // Make the exponent right (either 1 or -1) - // sign exponent mantissa - let mask: u32 = 0b0_01111111_00000000000000000000000; - let lane = vorrq_u32(lane, vld1q_dup_u32(&mask as *const u32)); - // sign exponent mantissa - let mask: u32 = 0b1_01111111_00000000000000000000000; - let lane = vandq_u32(lane, vld1q_dup_u32(&mask as *const u32)); - let lane = vreinterpretq_f32_u32(lane); - vst1q_f32(output_ptr.add(current_byte * 8 + i * 4), lane); + unsafe { + let base = *base as u32; + let base = vld1q_dup_u32(&base); + for (i, mask) in [low_mask, high_mask].iter().enumerate() { + let mask = vld1q_u32(mask.as_ptr()); + let mask = vandq_u32(base, mask); + // 0xffffffff if equal to zero and 0x00000000 otherwise + let mask = vceqzq_u32(mask); + let lane = vbslq_f32(mask, minus, ones); + let offset = output_ptr.add(current_byte * 8 + i * 4); + vst1q_f32(offset, lane); } } } From b72958ae1310545681130424cf18090ce5d7cc9b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 14:46:59 +0200 Subject: [PATCH 42/61] fix the binary quantized conversions --- src/unaligned_vector/binary_quantized.rs | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 811cd65e..47074d54 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -61,16 +61,19 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { let iterations = slice.len() / size_of::(); // The size of the returned vector must be a multiple of a word - let padding = if iterations % size_of::() == 0 { - 0 - } else { - size_of::() - iterations - }; - - let mut ret = vec![0; iterations + padding]; + let reminder = slice.len() % size_of::(); + let mut len = iterations; + if len % size_of::() != 0 { + len += size_of::() - len % size_of::(); + } else if reminder != 0 { + // if we generated a valid number of Word but we're missing a few bits + // then we need to add a full Word at the end. + len += size_of::(); + } + let mut ret = vec![0; len]; let ptr = slice.as_ptr(); - for (i, val) in ret.iter_mut().enumerate() { + for i in 0..iterations { unsafe { let lane = vld1q_f32(ptr.add(i * 8)); let lane = vcltzq_f32(lane); @@ -100,7 +103,7 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { let right = vaddvq_u32(lane) as u8; - *val = left | right; + ret[i] = left | right; } } @@ -400,7 +403,7 @@ mod test { proptest! { #[test] fn prop_truc_1( - original in vec(-50f32..=50.2, 10..512) + original in vec(-50f32..=50.2, 0..512) ){ let vector = BinaryQuantized::from_slice(&original); let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); @@ -412,7 +415,7 @@ mod test { #[test] fn prop_truc_2( - original in vec(-50f32..=50.2, 29..65) + original in vec(-50f32..=50.2, 0..516) ){ let vector1 = BinaryQuantized::from_slice(&original); let vector2 = original_from_slice(&original); From 0f88bfde82389ffda1d84dd1f2d766cbc5b2b33f Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 15:39:55 +0200 Subject: [PATCH 43/61] gate each simd function behind the right arch and provide a fallback method in case no simd instructions are available --- src/unaligned_vector/binary_quantized.rs | 98 +++++++++++------------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 47074d54..bce7cf45 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -24,9 +24,13 @@ impl UnalignedVectorCodec for BinaryQuantized { } fn from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { - let output = unsafe { from_slice_simd(slice) }; - - Cow::Owned(output) + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return Cow::Owned(unsafe { from_slice_neon(slice) }); + } + } + Cow::Owned(from_slice_non_optimized(slice)) } fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector> { @@ -34,7 +38,19 @@ impl UnalignedVectorCodec for BinaryQuantized { } fn to_vec(vec: &UnalignedVector) -> Vec { - unsafe { to_vec_simd(vec) } + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { to_vec_neon(vec) }; + } + } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") { + return unsafe { to_vec_see(vec) }; + } + } + to_vec_non_optimized(vec) } fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { @@ -55,8 +71,21 @@ impl UnalignedVectorCodec for BinaryQuantized { } } +fn from_slice_non_optimized(slice: &[f32]) -> Vec { + let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); + for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { + let mut word: QuantizedWord = 0; + for scalar in chunk.iter().rev() { + word <<= 1; + word += scalar.is_sign_positive() as QuantizedWord; + } + output.extend_from_slice(&word.to_ne_bytes()); + } + output +} + #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -unsafe fn from_slice_simd(slice: &[f32]) -> Vec { +unsafe fn from_slice_neon(slice: &[f32]) -> Vec { use core::arch::aarch64::*; let iterations = slice.len() / size_of::(); @@ -73,6 +102,7 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { let mut ret = vec![0; len]; let ptr = slice.as_ptr(); + #[allow(clippy::needless_range_loop)] for i in 0..iterations { unsafe { let lane = vld1q_f32(ptr.add(i * 8)); @@ -124,31 +154,12 @@ unsafe fn from_slice_simd(slice: &[f32]) -> Vec { ret } -#[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] -unsafe fn from_slice_simd(slice: &[f32]) -> Vec { - // fn convert(m: __m128i) -> [u32; 4] { - // unsafe { std::mem::transmute(m) } - // } - - // fn display(name: &str, m: __m128i) { - // let [a, b, c, d] = convert(m); - // eprintln!("{name}: {a:#b} {b:#b} {c:#b} {d:#b}"); - // } - - let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); - for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { - let mut word: QuantizedWord = 0; - for scalar in chunk.iter().rev() { - word <<= 1; - word += scalar.is_sign_positive() as QuantizedWord; - } - output.extend_from_slice(&word.to_ne_bytes()); - } - output +fn to_vec_non_optimized(vec: &UnalignedVector) -> Vec { + vec.iter().collect() } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { +unsafe fn to_vec_neon(vec: &UnalignedVector) -> Vec { use core::arch::aarch64::*; let mut output: Vec = vec![0.0; vec.len()]; @@ -179,7 +190,7 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { } #[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] -unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { +unsafe fn to_vec_sse(vec: &UnalignedVector) -> Vec { use core::arch::x86_64::*; let mut output: Vec = vec![0.0; vec.len()]; @@ -210,7 +221,7 @@ unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { // Dedicated to mm256 (AVX). Doesn't provide any real perf gain. // #[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] -// unsafe fn to_vec_simd(vec: &UnalignedVector) -> Vec { +// unsafe fn to_vec_avx(vec: &UnalignedVector) -> Vec { // use core::arch::x86_64::*; // let mut output: Vec = vec![0.0; vec.len()]; @@ -289,26 +300,10 @@ impl ExactSizeIterator for BinaryQuantizedIterator<'_> { #[cfg(test)] mod test { - use std::borrow::Cow; - use insta::{assert_debug_snapshot, assert_snapshot}; - use super::{BinaryQuantized, QuantizedWord, QUANTIZED_WORD_SIZE}; - use crate::internals::{UnalignedVector, UnalignedVectorCodec}; - - fn original_from_slice(slice: &[f32]) -> Cow<'static, UnalignedVector> { - let mut output: Vec = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); - for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { - let mut word: QuantizedWord = 0; - for scalar in chunk.iter().rev() { - word <<= 1; - word += scalar.is_sign_positive() as QuantizedWord; - } - output.extend_from_slice(&word.to_ne_bytes()); - } - - Cow::Owned(output) - } + use super::*; + use crate::internals::UnalignedVectorCodec; #[test] fn test_from_slice() { @@ -406,9 +401,8 @@ mod test { original in vec(-50f32..=50.2, 0..512) ){ let vector = BinaryQuantized::from_slice(&original); - let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); - let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); - vec_vec.truncate(original.len()); + let iter_vec: Vec<_> = to_vec_non_optimized(&vector); + let vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); assert_eq!(vec_vec, iter_vec); } @@ -418,9 +412,9 @@ mod test { original in vec(-50f32..=50.2, 0..516) ){ let vector1 = BinaryQuantized::from_slice(&original); - let vector2 = original_from_slice(&original); + let vector2 = from_slice_non_optimized(&original); - assert_eq!(vector1.as_bytes(), vector2.as_bytes()); + assert_eq!(vector1.as_bytes(), &vector2); } } } From 149e338749aa458be8c627f195130c2bfcaccd6b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 15:51:15 +0200 Subject: [PATCH 44/61] fix and improve the large binary quantized test --- src/tests/node.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/tests/node.rs b/src/tests/node.rs index 85117497..971e67d0 100644 --- a/src/tests/node.rs +++ b/src/tests/node.rs @@ -67,15 +67,14 @@ fn unaligned_binary_quantized_smol() { #[test] fn unaligned_binary_quantized_large() { - let original: Vec = (0..100) - .map(|n| if n % 3 == 0 || n % 5 == 0 { n as f32 * -1.0 } else { n as f32 }) - .collect(); + let original: Vec = + (0..100).map(|n| if n % 3 == 0 || n % 5 == 0 { -1.0 } else { 1.0 }).collect(); // Two numbers should be used let unaligned = UnalignedVector::::from_slice(&original); let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); assert_snapshot!(s, @r###" - 10010111 + 10010110 01101001 11001011 10110100 @@ -91,20 +90,17 @@ fn unaligned_binary_quantized_large() { 00000000 00000000 00000000 - 00000000 - 00000000 - 00000000 - 00000000 "###); let deser: Vec<_> = unaligned.to_vec(); assert_snapshot!(format!("{:?}", &deser[0..original.len()]), -@"[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0]"); + @"[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0]"); + //[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ... for (orig, deser) in original.iter().zip(&deser) { if orig.is_sign_positive() { - assert_eq!(deser, &1.0); + assert_eq!(deser, &1.0, "Expected 1 but found {deser}"); } else { - assert_eq!(deser, &-1.0); + assert_eq!(deser, &-1.0, "Expected -1 but found {deser}"); } } } From 7e8ee314e49bb912fb59c3cdd2acefafe2b66b87 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:25:34 +0200 Subject: [PATCH 45/61] move the unaligned vector test to their own module --- .../binary_quantized_test.txt | 13 ++ src/unaligned_vector/binary_quantized.rs | 125 +----------------- src/unaligned_vector/binary_quantized_test.rs | 86 ++++++++++++ src/unaligned_vector/mod.rs | 3 + 4 files changed, 104 insertions(+), 123 deletions(-) create mode 100644 proptest-regressions/unaligned_vector/binary_quantized_test.txt create mode 100644 src/unaligned_vector/binary_quantized_test.rs diff --git a/proptest-regressions/unaligned_vector/binary_quantized_test.txt b/proptest-regressions/unaligned_vector/binary_quantized_test.txt new file mode 100644 index 00000000..8f7d9637 --- /dev/null +++ b/proptest-regressions/unaligned_vector/binary_quantized_test.txt @@ -0,0 +1,13 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc ea44f9c01981d034eb9956429dad45b1511184b813f6ef2e040da19ac845fc95 # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +cc 2c07e1f5f91825abf0debdb3da9b6e694621a2890829c6cb177cfeb899b2ce55 # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 34.928864, -28.179714, -48.003353, 41.166138, 5.3680606, 12.877358, -0.5015638, -4.6210856, -28.06363, -7.6799254, 33.296986, 34.06795, -2.2030668, 18.379503, 43.27984, -40.480953, -42.057476, -40.667183, -44.435017, -11.390173, 7.3554792, -34.6324, 30.237425, 49.819195, 37.740192, 28.644194, 13.64243, 3.4781952, -13.213591, -36.088795, -24.686224, 18.616488, -14.341648, 19.320524, -41.356083, -35.585243, 15.924325, 21.948093, 5.6754994, 12.68356, 42.997505, -29.885351, 13.714709, -7.3218246, 20.833757, -2.2652364, 2.8438904, -6.1894426, -9.367592, 22.58327, -32.287846, -1.1771767, 4.757799, 20.382475, 31.179296, -6.74811, 46.795444, 35.463306, 34.66601, 33.691433, -44.754036, -5.6832294, -26.877445, -2.0497599, -6.261805, 34.259933, 49.28321, -23.057627, 11.857405, 34.720547, -23.427761, -3.1082656, -42.940693, -8.165372, 4.151185, -2.2702107, 0.7146188, 0.20639795, 18.346779, 25.99723, -3.6434278, 14.143686, 34.238422, -1.3187963, 47.847813, 9.233831, 38.294933, 0.18077198, 20.635534, -43.652462, 44.054348, -14.8437395, -8.922008, -34.319077, -35.579784, 49.904522, 3.3247874, -35.558, 29.816944, -25.138412, -17.330849, -32.62143, -49.173794, 29.494255, 3.980562, -9.624508, 45.99614, -12.191771, 41.839634, 5.167036, -23.855576, 32.802124, 48.74138, -5.3598986, 24.385458, 4.526897, 29.327799, -40.55756, -5.0441213, -25.287378, -3.1127932, -42.64746, -19.428247, 45.801144, 33.15466, 44.32883, 21.886627, 12.657723, -21.865538, -24.102598, 8.89531, 31.890102, -19.006971, 24.23742, 29.332253, -35.34813, 3.327044, 43.049038, -48.835857, 0.5250747, 46.814182, 18.696535, 28.768568, -46.696983, -8.911978, 2.028035, -20.13224, -44.985195, -40.22917, 31.517635, -24.734465, -9.009608, 2.3076336, 10.10881, 34.201073, 11.546979, -32.683136, -5.639307, 34.2214, -28.278593, 16.776672, -44.581097, 36.885647, -24.943443, -26.297604, 16.982048, -4.67554, 41.519547, -14.282249, 31.972383, 0.7674621, 49.660698, 24.97929, -45.0877, 46.83706, -40.286777, -32.192192, 14.244556, 17.55905, -34.961445, 8.375792, 14.471733, -44.659, -19.188278, -44.387375, -26.779972, 34.183895, 12.073208, 30.986094, -27.170937, 8.04304, 28.904068, -10.029137, -25.119982, -24.075623, 27.329859, -5.4513335, 16.301952, -6.6082954, -34.37629, -25.010254, 1.3416224, -46.816994, -42.70297, -30.874166, -24.531387, 30.773514, 18.169138, -21.227873, 16.390074, 34.656322, 31.352474, 21.870699, -31.134932, -32.60656, 17.599686, -19.984236, 5.683042, 42.207596, 26.99832, 43.624447, -5.844214, -7.8979964, 30.47681, 39.64754, 42.2378, -37.107872, 45.674698, -13.914517, 21.40882, 11.19408, -35.3075, 32.853676, 41.414013, 16.595694, -7.0986795, 45.01873, -20.232714, 0.43574658, 21.05971, 19.754171, -25.670258, 17.631668, -46.949795, 2.7009163, -3.2229908, 14.882127, 48.58671, -39.506004, 21.807415, 18.907522, -19.814602, 1.2229805, 4.8909397, 10.098888, -39.273033, -18.546995, 17.73384, 36.441048, 13.93157, -42.47934, 40.324196, -33.16537, 25.32274, 13.096876, 1.471322, 35.339985, 9.950398, -30.85566, 38.340378, -43.93405, 47.64359, 1.1059145, -24.121773, -15.651234, 16.594488, 49.74783, -18.054924, -49.629642, 47.947628, -7.1770267, 15.124369, 21.970358, -39.760193, -6.540123, -2.5548592, -4.054445, 47.373825, 11.282196, -27.466738, -41.951633, -26.442652, 20.186987, -19.596186, -0.72840726, 9.877779, -12.482341, -49.00538, 33.02553, 4.108043, -38.31706, 33.502403, 7.892792, -36.261013, -24.630655, 34.565647, -25.366306, 23.112663, 28.41068, 25.982483, 25.3876, 5.1287913, -44.73712, 30.080248, 25.75063, -31.724659, 20.804203, -31.697739, 44.661118, -2.0326402, 46.769676, -22.405367, 47.756012, 7.9477787, 18.250235, 28.98381, 19.106691, 31.788456, 44.134296, 9.837776, -5.548861, 9.248742, 5.025196, -7.170146, 16.822647, 30.904768, -11.454121, 2.8200958, 45.189255, -47.68239, -31.556616, -27.454576, -8.109993, -22.960342, 34.982147, -20.257809, -36.055, -28.507612, 15.3559475, -25.589075, 46.29627, 27.874113, -2.3586626, -19.035288, 46.281574, -38.53038, -9.136727, -41.657127, 20.009525, -49.12756, 15.5407095, 36.79244, -7.1792297, -26.505068, 3.2700121, -24.918865, -42.379017, 8.290003, 40.83214, 15.226595, -2.1630929, 22.192219, 40.18269, -28.402205, 17.068417, -24.652664, 35.01961, 32.612473, 34.826115, -25.3796, 15.630849, 2.7914145, 10.425925, -33.388153, -39.421566, 38.488293, -11.703528, -24.235086, -24.423758, -33.65738, -45.802116, 20.37659, 9.975364, 46.591873, 30.084646, 26.071348, 36.820568, -40.602684, 46.129913, -19.656206, -45.001022, 12.468425, -38.338436, 19.467169, 2.1429548, -49.932816, -16.064535, 5.4957175, 26.872522, 47.22126, -38.49015, -9.346636, -15.083169, -33.9357, 37.339245, -22.75131, -26.375341, -49.313393, -36.109104, -40.226177, -44.449646, -12.919495, -46.246273, -25.251703, 10.465865, 44.225277, 18.177385, 43.48533, 29.15927, 27.36567, -25.067677, 38.682037, -19.907686, -29.48532, 7.6711164, 33.047928, 26.612247, 12.630455, -10.446119, -44.609188, 25.4607, 34.32933, 11.781093, 21.6851, 45.446465, 31.049229, -19.7296, -47.98356, -22.724695, -13.884918, 20.81162, 43.443974, -40.699524, -9.998084, 17.713615, 34.32163, -43.672134, 21.923594, -49.84672, -23.82625, -28.935364, -32.06071, -1.3000214, 42.269638, 9.24664, -37.223637, 27.48553, -37.351566, -20.982544, -23.419367, -48.679497, -49.86369, -15.103854, 2.3013034, 25.910238, -41.600662, -22.743288, -19.980015, -43.90411, 30.342438, 32.236805, 4.6266446, 39.264202, -32.63228, 16.181074, 12.68794, 34.671535, -23.388393, -18.423508, -45.097458, -2.7676394, 30.206686, -35.27305, 20.777147] +cc 85c6056f72a6b91fe3bb7863f7141d2f50f966e4b47763104075d272af8c5f96 # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +cc 6c75400b7c4dcaa0120972ce96eada24bb9a020f771f489d973f04cf7a8fc484 # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.132588, 36.389793, 44.222073, 11.051843, -39.46859, -32.652737, 37.27305, 43.1858, 14.818085, 47.277573, -4.5576797, 20.435364, -17.869621, 8.565275, -47.076046, 9.853658, 45.15765, -39.391212, 39.185513, 50.0798, 35.82915, -28.816013, -34.722176, -32.892273, -0.21254167, 12.997923, -40.854458, -47.819923, 20.927723, -43.280304, -18.001894, 9.091119, -18.140648, -21.633745, -20.628742, -7.3853645, -42.086502, 43.586082, 7.1444335, 28.666245, 10.895854, 16.542568, -49.17738, -6.032213, -46.182545, -6.9888473, 46.144405, 32.001637, -11.182925, 19.857529, -30.824213, -44.299374, -22.995352, -22.086382, -30.631557, -28.227184, -23.654264, -27.647081, 33.615875, -15.847933, -13.617885, 49.445724, 36.733383, 12.594828, -11.281194, -43.34719, 38.525322, 26.271828, 21.236029, -19.471148, -27.439333, -8.07385, 17.050514, 37.492096, -10.017692, -18.058159, 17.78233, -41.950577, -46.955215, -23.982752, -34.881664, 9.904279, -2.3816602, 10.901946, -1.308817, 10.82935, -36.47463, -14.721006, -13.099718, 12.302225, -30.274286, 14.074315, 41.543686, 5.9755635, -46.989166, -11.165659, 11.690679, -10.763054, 41.702087, -41.282093, -48.16655, 36.217785, -12.343993, 27.447716, -6.974723, 0.7732652, 3.872793, 37.187916, -49.548954, -18.03775, -18.669811, -1.4579794, 41.074574, -24.442486, -9.518786, 35.581635, -14.413731, 10.890216, 6.954555, -4.4162846, 46.954556, 5.643275, 29.59645, -23.17493, 3.115279, -32.973534, 35.120083, -39.207912, -10.857614, -17.098047, 45.14072, -37.071423, 19.840054, 48.414116, 27.808372, 29.463568, -0.08634639, -25.49661, -32.12414, 26.757723, -27.53453, -21.64966, -30.595224, 29.48856, 19.23135, -46.507534, 41.348007, 19.535263, 22.308811, 8.209455, -6.8908343, 10.751941, 9.189699, -0.001981474, -48.210255, 31.567963, -15.053722, -7.44673, 0.1782902, -36.531975, 1.7673457, -29.389248, 21.835798, -27.003855, 10.311737, -10.947969, -35.20308, 41.584442, 15.470946, -43.439705, -44.327866, -47.35727, 49.992275, -38.403652, 35.862488, 10.370646, 11.967685, 32.335167, 31.127691, 17.002134, 45.956703, 34.962177, 19.904732, 13.925583, -13.751666, 13.691076, -23.738312, 46.33687, -3.7080483, -38.860012, -35.988533, 47.905018, -29.546127, -45.96983, 43.912853, -48.729053, 10.0241585, -14.205436, -3.7115045, 12.163462, 35.895363, -32.618782, -11.89443, 7.1218615, 12.388838, 7.654014, -12.793643, -23.103441, -37.73257, 19.643778, -11.271556, 7.4793186, -49.409424, -15.348118, 26.819782, -17.303635, 39.526997, 14.747623, -43.128704, 14.352604, 41.595985, -26.057705, 43.588512, -46.30518, 10.824475, 38.42548, -0.7619881, 33.701416, 33.014896, 39.495, 10.591338, -36.90345, -25.150137, -3.424798, -17.051199, 30.529348, 41.272198, 39.630898, 47.588764, -42.239704, -43.278725, 16.668232, -25.61335, 36.8314, 35.300903, 0.497268, 8.980502, 23.173628, -12.052752, 36.568565, 45.960903, 21.987724, 15.569539, 26.569004, -15.698965, -23.741295, 46.34894, -48.34519, -16.73409, 4.798172, 41.597427, -17.904663, 14.862669, 5.803273, 14.630004, -48.141644, 31.490318, 18.03832, 9.557964, 12.449736, 21.741655, -12.834744, -1.0789651, -3.021928, 11.728263, -44.22244, -24.47842, 45.024776, 42.535625, -22.235, 28.044891, -7.94986, -33.450783, -26.14239, -45.180336, 39.915062, -1.0474228, 19.26681, -28.999025, -2.6363816, 24.273973, 40.832615, -16.70508, -47.562912, 46.234913, 29.351503, -41.833717, 21.067692, 12.181457, -0.6626018, 17.603952, -7.5360703, 25.465542, -12.988388, -6.9245415, 37.540295, 32.722122, -2.585424, 29.5797, -13.670121, -20.963812, 2.7690446, -4.708485, 32.553375, -16.828384, -28.272429, -46.073784, 46.082752, 39.897858, -15.634891, -42.61176, 12.340191, 20.719318, -44.01994, -37.397514, -46.194828, -48.353207, -15.4082775, 22.029692, -48.470505, 40.59896, -42.27035, 33.33279, 44.08435, -19.285421, -34.97491, -4.2947435, -28.612545, 47.71645, -31.804585, 32.622574, 2.7046359, -35.754704, 41.950523, -1.0963814, -49.79878, -30.814272, -8.914003, 21.26149, -1.330743, 31.6926, -7.9359593, 13.153566, 10.248735, -37.354336, -42.713787, -38.171017, 42.255856, -15.532154, 38.58112, 2.93462, 35.05488, 28.562145, -27.485615, -16.478281, 40.23814, -44.728245, -10.799299, -13.574599, -25.989988, -42.50745, 45.441166, -44.177513, 18.82882, -8.365803, -38.983902, 17.171268, -34.259483, -19.771988, -32.48962, 35.268463, 2.253426, -16.84561, 46.6736, 12.687439, -23.386368, -6.3646092, -42.450912, -15.94731, -39.31254, 49.853466, -31.198696] +cc e1066782507470e0b25a0a327857ccd8e39566dddfd9d9f853929c4c04403a9c # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +cc 8eb76015afe2f88ed365a73a27d450794c99bffd52cefd9f235cca822f99f90d # shrinks to original = [] +cc 99b3d4922ea6b104ea43dbc333ef1282a1e592d0360fbef0735e95a445f9d271 # shrinks to original = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index bce7cf45..1fff1515 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -71,7 +71,7 @@ impl UnalignedVectorCodec for BinaryQuantized { } } -fn from_slice_non_optimized(slice: &[f32]) -> Vec { +pub(super) fn from_slice_non_optimized(slice: &[f32]) -> Vec { let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { let mut word: QuantizedWord = 0; @@ -154,7 +154,7 @@ unsafe fn from_slice_neon(slice: &[f32]) -> Vec { ret } -fn to_vec_non_optimized(vec: &UnalignedVector) -> Vec { +pub(super) fn to_vec_non_optimized(vec: &UnalignedVector) -> Vec { vec.iter().collect() } @@ -297,124 +297,3 @@ impl ExactSizeIterator for BinaryQuantizedIterator<'_> { lower } } - -#[cfg(test)] -mod test { - use insta::{assert_debug_snapshot, assert_snapshot}; - - use super::*; - use crate::internals::UnalignedVectorCodec; - - #[test] - fn test_from_slice() { - let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; - let vector = BinaryQuantized::from_slice(&original); - - let internal = vector.as_bytes().iter().map(|b| format!("{b:08b}\n")).collect::(); - assert_snapshot!(internal, @r###" - 10101011 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - "###); - - let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); - assert_debug_snapshot!(iter_vec, @r###" - [ - 1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - ] - "###); - let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); - vec_vec.truncate(original.len()); - assert_debug_snapshot!(vec_vec, @r###" - [ - 1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - ] - "###); - - assert_eq!(vec_vec, iter_vec); - } - - #[test] - fn super_truc() { - let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; - let vector = BinaryQuantized::from_slice(&original); - let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); - assert_debug_snapshot!(iter_vec, @r###" - [ - 1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - ] - "###); - let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); - vec_vec.truncate(original.len()); - assert_debug_snapshot!(vec_vec, @r###" - [ - 1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - 1.0, - -1.0, - ] - "###); - - assert_eq!(vec_vec, iter_vec); - } - - use proptest::collection::vec; - use proptest::prelude::*; - - proptest! { - #[test] - fn prop_truc_1( - original in vec(-50f32..=50.2, 0..512) - ){ - let vector = BinaryQuantized::from_slice(&original); - let iter_vec: Vec<_> = to_vec_non_optimized(&vector); - let vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); - - assert_eq!(vec_vec, iter_vec); - } - - #[test] - fn prop_truc_2( - original in vec(-50f32..=50.2, 0..516) - ){ - let vector1 = BinaryQuantized::from_slice(&original); - let vector2 = from_slice_non_optimized(&original); - - assert_eq!(vector1.as_bytes(), &vector2); - } - } -} diff --git a/src/unaligned_vector/binary_quantized_test.rs b/src/unaligned_vector/binary_quantized_test.rs new file mode 100644 index 00000000..5620ac4c --- /dev/null +++ b/src/unaligned_vector/binary_quantized_test.rs @@ -0,0 +1,86 @@ +use binary_quantized_test::binary_quantized::{from_slice_non_optimized, to_vec_non_optimized}; +use insta::{assert_debug_snapshot, assert_snapshot}; +use proptest::collection::vec; +use proptest::prelude::*; + +use super::*; +use crate::internals::UnalignedVectorCodec; + +#[test] +fn test_from_slice() { + let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; + let vector = BinaryQuantized::from_slice(&original); + + #[allow(clippy::format_collect)] + let internal = vector.as_bytes().iter().map(|b| format!("{b:08b}\n")).collect::(); + assert_snapshot!(internal, @r###" + 10101011 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + "###); +} + +#[test] +fn test_to_vec_iter() { + let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; + let vector = BinaryQuantized::from_slice(&original); + let iter_vec: Vec<_> = BinaryQuantized::iter(&vector).take(original.len()).collect(); + assert_debug_snapshot!(iter_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + ] + "###); + let mut vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + vec_vec.truncate(original.len()); + assert_debug_snapshot!(vec_vec, @r###" + [ + 1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + 1.0, + -1.0, + ] + "###); + + assert_eq!(vec_vec, iter_vec); +} + +proptest! { + #[test] + fn from_slice_simd_vs_non_optimized( + original in vec(-50f32..=50.2, 0..516) + ){ + let vector = BinaryQuantized::from_slice(&original); + let iter_vec: Vec<_> = to_vec_non_optimized(&vector); + let vec_vec: Vec<_> = BinaryQuantized::to_vec(&vector); + + assert_eq!(vec_vec, iter_vec); + } + + #[test] + fn to_vec_simd_vs_non_optimized( + original in vec(-50f32..=50.2, 0..516) + ){ + let vector1 = BinaryQuantized::from_slice(&original); + let vector2 = from_slice_non_optimized(&original); + + assert_eq!(vector1.as_bytes(), &vector2); + } +} diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index 386d3b79..1194c306 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -12,6 +12,9 @@ use bytemuck::pod_collect_to_vec; mod binary_quantized; mod f32; +#[cfg(test)] +mod binary_quantized_test; + /// Determine the way the vectors should be read and written from the database pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// Creates an unaligned vector from a slice of bytes. From d61b7afb4e909271498a2cf8e96d02f19163323c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:28:46 +0200 Subject: [PATCH 46/61] move the node test to the binary_quantized_tests --- src/tests/mod.rs | 1 - src/tests/node.rs | 106 ------------------ src/unaligned_vector/binary_quantized_test.rs | 106 +++++++++++++++++- 3 files changed, 105 insertions(+), 108 deletions(-) delete mode 100644 src/tests/node.rs diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 1350b46a..0f041885 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -10,7 +10,6 @@ use crate::roaring::RoaringBitmapCodec; use crate::{Database, Distance, MetadataCodec, NodeCodec, NodeMode, Reader}; mod binary_quantized; -mod node; mod reader; mod writer; diff --git a/src/tests/node.rs b/src/tests/node.rs deleted file mode 100644 index 971e67d0..00000000 --- a/src/tests/node.rs +++ /dev/null @@ -1,106 +0,0 @@ -use insta::{assert_debug_snapshot, assert_snapshot}; - -use crate::unaligned_vector::{BinaryQuantized, UnalignedVector}; - -#[test] -fn unaligned_f32_vec() { - let original: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; - let bytes: Vec = original.iter().flat_map(|f| f.to_ne_bytes()).collect(); - - let unaligned_owned_from_f32 = UnalignedVector::::from_vec(original.clone()); - assert_eq!(bytes, unaligned_owned_from_f32.as_bytes()); - - let unchecked_unaligned_owned_from_bytes = UnalignedVector::::from_bytes_unchecked(&bytes); - assert_eq!(bytes, unchecked_unaligned_owned_from_bytes.as_bytes()); - - let unaligned_owned_from_bytes = UnalignedVector::::from_bytes(&bytes).unwrap(); - assert_eq!(bytes, unaligned_owned_from_bytes.as_bytes()); -} - -#[test] -fn unaligned_binary_quantized_iter_size() { - let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; - let unaligned = UnalignedVector::::from_slice(&original); - assert_snapshot!(unaligned.len(), @"64"); - let mut iter = unaligned.iter(); - assert_snapshot!(iter.len(), @"64"); - iter.next().unwrap(); - assert_snapshot!(iter.len(), @"63"); - iter.by_ref().take(10).for_each(drop); - assert_snapshot!(iter.len(), @"53"); - iter.by_ref().take(52).for_each(drop); - assert_snapshot!(iter.len(), @"1"); - iter.next().unwrap(); - assert_snapshot!(iter.len(), @"0"); - iter.next(); - assert_snapshot!(iter.len(), @"0"); -} - -#[test] -fn unaligned_binary_quantized_smol() { - let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; - - let unaligned = UnalignedVector::::from_slice(&original); - let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); - assert_snapshot!(s, @r###" - 00011010 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - 00000000 - "###); - - let deser: Vec<_> = unaligned.iter().collect(); - assert_debug_snapshot!(deser[0..original.len()], @r###" - [ - -1.0, - 1.0, - -1.0, - 1.0, - 1.0, - ] - "###); -} - -#[test] -fn unaligned_binary_quantized_large() { - let original: Vec = - (0..100).map(|n| if n % 3 == 0 || n % 5 == 0 { -1.0 } else { 1.0 }).collect(); - - // Two numbers should be used - let unaligned = UnalignedVector::::from_slice(&original); - let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); - assert_snapshot!(s, @r###" - 10010110 - 01101001 - 11001011 - 10110100 - 01100101 - 11011010 - 00110010 - 01101101 - 10011001 - 10110110 - 01001100 - 01011011 - 00000110 - 00000000 - 00000000 - 00000000 - "###); - - let deser: Vec<_> = unaligned.to_vec(); - assert_snapshot!(format!("{:?}", &deser[0..original.len()]), - @"[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0]"); - //[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ... - for (orig, deser) in original.iter().zip(&deser) { - if orig.is_sign_positive() { - assert_eq!(deser, &1.0, "Expected 1 but found {deser}"); - } else { - assert_eq!(deser, &-1.0, "Expected -1 but found {deser}"); - } - } -} diff --git a/src/unaligned_vector/binary_quantized_test.rs b/src/unaligned_vector/binary_quantized_test.rs index 5620ac4c..32fbb84a 100644 --- a/src/unaligned_vector/binary_quantized_test.rs +++ b/src/unaligned_vector/binary_quantized_test.rs @@ -1,3 +1,5 @@ +#![allow(clippy::format_collect)] + use binary_quantized_test::binary_quantized::{from_slice_non_optimized, to_vec_non_optimized}; use insta::{assert_debug_snapshot, assert_snapshot}; use proptest::collection::vec; @@ -11,7 +13,6 @@ fn test_from_slice() { let original = [0.1, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9]; let vector = BinaryQuantized::from_slice(&original); - #[allow(clippy::format_collect)] let internal = vector.as_bytes().iter().map(|b| format!("{b:08b}\n")).collect::(); assert_snapshot!(internal, @r###" 10101011 @@ -62,6 +63,109 @@ fn test_to_vec_iter() { assert_eq!(vec_vec, iter_vec); } +#[test] +fn unaligned_f32_vec() { + let original: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; + let bytes: Vec = original.iter().flat_map(|f| f.to_ne_bytes()).collect(); + + let unaligned_owned_from_f32 = UnalignedVector::::from_vec(original.clone()); + assert_eq!(bytes, unaligned_owned_from_f32.as_bytes()); + + let unchecked_unaligned_owned_from_bytes = UnalignedVector::::from_bytes_unchecked(&bytes); + assert_eq!(bytes, unchecked_unaligned_owned_from_bytes.as_bytes()); + + let unaligned_owned_from_bytes = UnalignedVector::::from_bytes(&bytes).unwrap(); + assert_eq!(bytes, unaligned_owned_from_bytes.as_bytes()); +} + +#[test] +fn unaligned_binary_quantized_iter_size() { + let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; + let unaligned = UnalignedVector::::from_slice(&original); + assert_snapshot!(unaligned.len(), @"64"); + let mut iter = unaligned.iter(); + assert_snapshot!(iter.len(), @"64"); + iter.next().unwrap(); + assert_snapshot!(iter.len(), @"63"); + iter.by_ref().take(10).for_each(drop); + assert_snapshot!(iter.len(), @"53"); + iter.by_ref().take(52).for_each(drop); + assert_snapshot!(iter.len(), @"1"); + iter.next().unwrap(); + assert_snapshot!(iter.len(), @"0"); + iter.next(); + assert_snapshot!(iter.len(), @"0"); +} + +#[test] +fn unaligned_binary_quantized_smol() { + let original: Vec = vec![-1.0, 2.0, -3.0, 4.0, 5.0]; + + let unaligned = UnalignedVector::::from_slice(&original); + let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); + assert_snapshot!(s, @r###" + 00011010 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + 00000000 + "###); + + let deser: Vec<_> = unaligned.iter().collect(); + assert_debug_snapshot!(deser[0..original.len()], @r###" + [ + -1.0, + 1.0, + -1.0, + 1.0, + 1.0, + ] + "###); +} + +#[test] +fn unaligned_binary_quantized_large() { + let original: Vec = + (0..100).map(|n| if n % 3 == 0 || n % 5 == 0 { -1.0 } else { 1.0 }).collect(); + + // Two numbers should be used + let unaligned = UnalignedVector::::from_slice(&original); + let s = unaligned.as_bytes().iter().map(|byte| format!("{byte:08b}\n")).collect::(); + assert_snapshot!(s, @r###" + 10010110 + 01101001 + 11001011 + 10110100 + 01100101 + 11011010 + 00110010 + 01101101 + 10011001 + 10110110 + 01001100 + 01011011 + 00000110 + 00000000 + 00000000 + 00000000 + "###); + + let deser: Vec<_> = unaligned.to_vec(); + assert_snapshot!(format!("{:?}", &deser[0..original.len()]), + @"[-1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, 1.0, -1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0]"); + //[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ... + for (orig, deser) in original.iter().zip(&deser) { + if orig.is_sign_positive() { + assert_eq!(deser, &1.0, "Expected 1 but found {deser}"); + } else { + assert_eq!(deser, &-1.0, "Expected -1 but found {deser}"); + } + } +} + proptest! { #[test] fn from_slice_simd_vs_non_optimized( From 283eb2c35cfc9e11f9c2e3688c34fe2aaf053252 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:30:54 +0200 Subject: [PATCH 47/61] fix the warnings --- src/unaligned_vector/f32.rs | 2 +- src/unaligned_vector/mod.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index 304ceae3..72a967fb 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -3,7 +3,7 @@ use std::{ mem::{size_of, transmute}, }; -use bytemuck::{cast_slice, checked::cast_slice_mut}; +use bytemuck::cast_slice; use byteorder::{ByteOrder, NativeEndian}; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index 1194c306..1b0cc899 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -29,6 +29,9 @@ pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// The slice is already known to be of the right length. fn from_vec(vec: Vec) -> Cow<'static, UnalignedVector>; + /// Converts the `UnalignedVector` to an aligned vector of `f32`. + /// It's strictly equivalent to `.iter().collect()` but the performances + /// are better. fn to_vec(vec: &UnalignedVector) -> Vec; /// Returns an iterator of f32 that are read from the vector. From f2f6a7e8963a2ce1a1638e9a2e0e22225ba8a9fe Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:35:25 +0200 Subject: [PATCH 48/61] move proptest to the dev dependencies --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 11cec715..fc962c76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ roaring = "0.10.5" tempfile = "3.10.1" thiserror = "1.0.61" nohash = "0.2.0" -proptest = "1.5.0" [dev-dependencies] anyhow = "1.0.86" @@ -32,6 +31,7 @@ clap = { version = "4.5.7", features = ["derive"] } env_logger = "0.11.3" insta = "1.39.0" instant-distance = "0.6.1" +proptest = "1.5.0" rand = { version = "0.8.5", features = ["std_rng"] } tempfile = "3.10.1" From 0b21f34226570f3e293ce60e3916eb0dc6c4f25c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:49:20 +0200 Subject: [PATCH 49/61] fix comment --- src/distance/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/distance/mod.rs b/src/distance/mod.rs index 3c9833d8..36d55b97 100644 --- a/src/distance/mod.rs +++ b/src/distance/mod.rs @@ -190,8 +190,8 @@ pub fn two_means_binary_quantized( // points to either one of them. We weight each centroid by the number of points // assigned to it, so to balance it. // Even though the points we're working on are binary quantized, for the centroid - // to move, we need to store them as f32. This requires us to convert the binary quantized - // vector to f32 vectors a lot, but the recall suffers too much if we don't do it. + // to move, we need to store it as f32. This requires us to convert every binary quantized + // vectors to f32 vectors, but the recall suffers too much if we don't do it. const ITERATION_STEPS: usize = 200; From 605b33a366a32b404284f55124c6ab1fde89c3be Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 16 Sep 2024 16:50:55 +0200 Subject: [PATCH 50/61] fix typo$ --- src/unaligned_vector/binary_quantized.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 1fff1515..77f0e10a 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -47,7 +47,7 @@ impl UnalignedVectorCodec for BinaryQuantized { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("sse") { - return unsafe { to_vec_see(vec) }; + return unsafe { to_vec_sse(vec) }; } } to_vec_non_optimized(vec) From df654bca12cc0529212bdcfbf97a70b3bc46b280 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 10:17:27 +0200 Subject: [PATCH 51/61] update a test playing with -0.0 and 0.0 since -0.0 doesn't answer the same way to is_positive under x86 and aarch64 --- src/tests/binary_quantized.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index bc401407..8eb475f1 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -14,7 +14,7 @@ fn write_and_retrieve_binary_quantized_vector() { &mut wtxn, 0, &[ - -2.0, -1.0, 0.0, -0.0, 2.0, 2.0, -12.4, 21.2, -2.0, -1.0, 0.0, 1.0, 2.0, 2.0, + -2.0, -1.0, 0.0, -0.1, 2.0, 2.0, -12.4, 21.2, -2.0, -1.0, 0.0, 1.0, 2.0, 2.0, -12.4, 21.2, ], ) @@ -25,7 +25,7 @@ fn write_and_retrieve_binary_quantized_vector() { -1.0, -1.0, 1.0, - 1.0, + -1.0, 1.0, 1.0, -1.0, @@ -47,7 +47,7 @@ fn write_and_retrieve_binary_quantized_vector() { insta::assert_snapshot!(handle, @r###" ================== Dumping index 0 - Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, 1.0000, 1.0000, 1.0000, -1.0000, 1.0000, -1.0000, -1.0000, "other ..."] }) + Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, -1.0000, 1.0000, 1.0000, -1.0000, 1.0000, -1.0000, -1.0000, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [0] }) Root: Metadata { dimensions: 16, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } "###); From be0f60059455af97146956e97a230054c6466b30 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 10:51:15 +0200 Subject: [PATCH 52/61] fix a comment --- src/distance/binary_quantized_euclidean.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/distance/binary_quantized_euclidean.rs b/src/distance/binary_quantized_euclidean.rs index 00077fd3..2d8e5c2f 100644 --- a/src/distance/binary_quantized_euclidean.rs +++ b/src/distance/binary_quantized_euclidean.rs @@ -95,17 +95,17 @@ impl Distance for BinaryQuantizedEuclidean { /// /// If we replace the `-1` by the binary quantized `0`, and the `1` stays `1`s: /// ```text -/// 0 * 0 = 0 -/// 0 * 1 = 1 -/// 1 * 0 = 1 -/// 1 * 1 = 0 +/// 0 ^ 0 = 0 +/// 0 ^ 1 = 1 +/// 1 ^ 0 = 1 +/// 1 ^ 1 = 0 /// ``` /// /// The result must be multiplicated by `4`. But that can be done at the very end. /// /// 2. Then we need to do the sum of the results: -/// Since we cannot go into the negative, it's safe to hold everything in a `u32` and simply counts the `1`s. -/// At the very end, before converting the value to a `f32` we can multiplies everything by 4. +/// Since we cannot go into the negative, it's safe to hold everything in a `u32` and simply counts the `1`s. +/// At the very end, before converting the value to a `f32` we can multiply everything by 4. fn squared_euclidean_distance_binary_quantized( u: &UnalignedVector, v: &UnalignedVector, From 2d9eeb29a41a48b9069008b6fa1b82ffea1cb26d Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:18:54 +0200 Subject: [PATCH 53/61] rename + unroll loop in from_slice_neon --- src/spaces/simple.rs | 10 ++-- src/unaligned_vector/binary_quantized.rs | 62 +++++++++++------------- 2 files changed, 33 insertions(+), 39 deletions(-) diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index 8df42cf4..c06f995c 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -83,17 +83,17 @@ pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector Vec { let iterations = slice.len() / size_of::(); // The size of the returned vector must be a multiple of a word - let reminder = slice.len() % size_of::(); + let remaining = slice.len() % size_of::(); let mut len = iterations; if len % size_of::() != 0 { len += size_of::() - len % size_of::(); - } else if reminder != 0 { + } else if remaining != 0 { // if we generated a valid number of Word but we're missing a few bits // then we need to add a full Word at the end. len += size_of::(); @@ -102,48 +102,42 @@ unsafe fn from_slice_neon(slice: &[f32]) -> Vec { let mut ret = vec![0; len]; let ptr = slice.as_ptr(); + let low: [u32; 4] = [ + 0b_00000000_00000000_00000000_00000001, + 0b_00000000_00000000_00000000_00000010, + 0b_00000000_00000000_00000000_00000100, + 0b_00000000_00000000_00000000_00001000, + ]; + let high: [u32; 4] = [ + 0b_00000000_00000000_00000000_00010000, + 0b_00000000_00000000_00000000_00100000, + 0b_00000000_00000000_00000000_01000000, + 0b_00000000_00000000_00000000_10000000, + ]; + #[allow(clippy::needless_range_loop)] for i in 0..iterations { unsafe { - let lane = vld1q_f32(ptr.add(i * 8)); - let lane = vcltzq_f32(lane); - let lane = vmvnq_u32(lane); - let mask: Vec = vec![ - 0b_00000000_00000000_00000000_00000001, - 0b_00000000_00000000_00000000_00000010, - 0b_00000000_00000000_00000000_00000100, - 0b_00000000_00000000_00000000_00001000, - ]; - let mask = vld1q_u32(mask.as_ptr()); - let lane = vandq_u32(lane, mask); - - let left = vaddvq_u32(lane) as u8; - - let lane = vld1q_f32(ptr.add(i * 8 + 4)); - let lane = vcltzq_f32(lane); - let lane = vmvnq_u32(lane); - let mask: Vec = vec![ - 0b_00000000_00000000_00000000_00010000, - 0b_00000000_00000000_00000000_00100000, - 0b_00000000_00000000_00000000_01000000, - 0b_00000000_00000000_00000000_10000000, - ]; - let mask = vld1q_u32(mask.as_ptr()); - let lane = vandq_u32(lane, mask); - - let right = vaddvq_u32(lane) as u8; - - ret[i] = left | right; + let mut byte = 0; + for (idx, mask) in [low, high].iter().enumerate() { + let lane = vld1q_f32(ptr.add(i * 8 + 4 * idx)); + let lane = vcltzq_f32(lane); + let lane = vmvnq_u32(lane); + let mask = vld1q_u32(mask.as_ptr()); + let lane = vandq_u32(lane, mask); + + byte |= vaddvq_u32(lane) as u8; + } + *ret.get_unchecked_mut(i) = byte; } } // Since we're iterating on bytes two by two. // If we had a number of dimensions not dividible by 8 we may be // missing some bits in the last byte. - let reminder = slice.len() % size_of::(); - if reminder != 0 { + if remaining != 0 { let mut rem: u8 = 0; - for r in slice[slice.len() - reminder..].iter().rev() { + for r in slice[slice.len() - remaining..].iter().rev() { rem <<= 1; let r = r.is_sign_positive(); rem |= r as u8; From 8f6eaaac07e8c1fcb6e085071458bab5846925e1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:24:23 +0200 Subject: [PATCH 54/61] update reader::plot with .is_zero --- src/reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reader.rs b/src/reader.rs index f72770ce..658146d7 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -327,7 +327,7 @@ impl<'t, D: Distance> Reader<'t, D> { writeln!(writer, "\t\t{} [label=\"{}\"]", key.node.item, key.node.item,)? } Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => { - if normal.iter().all(|n| n == 0.) { + if normal.is_zero() { writeln!(writer, "\t\t{} [color=red]", key.node.item)?; } writeln!( From c4aaa4463e0ec635b470c80ed60e9873fabcb2b6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:28:01 +0200 Subject: [PATCH 55/61] fix github lint --- src/spaces/simple.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index c06f995c..b880f293 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -84,32 +84,38 @@ pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector -2 /// 01 => 0 /// 10 => 0 /// 11 => 2 +/// ``` /// /!\ We must also take care to use signed integer to be able to go into negatives /// /// 2.2 Finally we must sum the result of all the words -/// - By taking care of not overflowing: The biggest vectors contains like 5000 dimensions, a i16 could be enough. A i32 should be perfect. -/// - We can do the sum straight away without any more tricks -/// - We can cast the result to an f32 as expected +/// - By taking care of not overflowing: The biggest vectors contains like 5000 dimensions, a i16 could be enough. A i32 should be perfect. +/// - We can do the sum straight away without any more tricks +/// - We can cast the result to an f32 as expected pub fn dot_product_binary_quantized( u: &UnalignedVector, v: &UnalignedVector, From 9c457fe4956cca87625d5de2e760b2d3a6a9e316 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:30:04 +0200 Subject: [PATCH 56/61] clippy again --- src/spaces/simple.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index b880f293..fbf4fc21 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -101,9 +101,9 @@ pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector -2 /// 01 => 0 From af598ca76e900dba23b0966e8078eeb40b0e9da2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:33:48 +0200 Subject: [PATCH 57/61] improve the display implementation of the splitnode --- src/node.rs | 3 +- ...ite_and_update_lot_of_random_points-2.snap | 98 +++++++++---------- ...write_and_update_lot_of_random_points.snap | 88 ++++++++--------- src/tests/writer.rs | 92 ++++++++--------- 4 files changed, 141 insertions(+), 140 deletions(-) diff --git a/src/node.rs b/src/node.rs index b2bc1cc8..9ae866a4 100644 --- a/src/node.rs +++ b/src/node.rs @@ -120,7 +120,8 @@ pub struct SplitPlaneNormal<'a, D: Distance> { impl fmt::Debug for SplitPlaneNormal<'_, D> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SplitPlaneNormal") + let name = format!("SplitPlaneNormal<{}>", D::name()); + f.debug_struct(&name) .field("left", &self.left) .field("right", &self.right) .field("normal", &self.normal) diff --git a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap index da91c990..1df2243b 100644 --- a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap +++ b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap @@ -106,110 +106,110 @@ Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5324, Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [6, 14, 18, 21, 22, 23, 26, 28, 40, 42, 43, 44, 47, 51, 54, 59, 61, 62, 68, 73, 80, 82, 83, 87, 90] }) Tree 1: Descendants(Descendants { descendants: [8, 10, 15, 17, 20, 24, 34, 37, 46, 49, 53, 55, 57, 66, 71, 75, 77, 79, 92, 95, 98] }) -Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) +Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) Tree 3: Descendants(Descendants { descendants: [9, 12, 29, 31, 32, 35, 36, 64, 89, 93, 96] }) Tree 4: Descendants(Descendants { descendants: [3, 5, 7, 11, 25, 30, 39, 48, 52, 58, 60, 67, 69, 70, 81, 86, 91, 97, 99] }) Tree 5: Descendants(Descendants { descendants: [0, 1, 2, 4, 13, 16, 19, 27, 33, 38, 41, 45, 50, 56, 63, 65, 72, 74, 76, 78, 84, 85, 88, 94] }) -Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) -Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) -Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) +Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) +Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) +Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) Tree 11: Descendants(Descendants { descendants: [0, 1, 3, 4, 7, 9, 10, 11, 13, 27, 29, 30, 31, 39, 45, 50, 56, 57, 60, 64, 65, 67, 73, 84, 85, 88, 91, 97] }) Tree 14: Descendants(Descendants { descendants: [25, 32, 33, 49, 81, 99] }) -Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(103), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) -Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(100), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) -Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) +Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(103), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) +Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(100), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) +Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) Tree 18: Descendants(Descendants { descendants: [4, 7, 9, 11, 25, 27, 29, 33, 48, 52, 56, 58, 60, 67, 76, 78, 81, 91, 97] }) Tree 19: Descendants(Descendants { descendants: [0, 1, 6, 12, 13, 14, 18, 19, 31, 35, 38, 39, 42, 45, 50, 51, 54, 64, 85, 86, 89, 93, 94, 96] }) -Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) +Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) Tree 21: Descendants(Descendants { descendants: [2, 3, 5, 8, 15, 16, 17, 20, 24, 32, 36, 49, 59, 62, 63, 65, 66, 69, 70, 75, 79, 80, 83, 84, 87, 92] }) -Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(106), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) -Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) +Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(106), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) +Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) Tree 25: Descendants(Descendants { descendants: [] }) Tree 26: Descendants(Descendants { descendants: [5, 16, 18, 22, 36, 43, 44, 49, 59, 83, 84, 96] }) -Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(111), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) +Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(111), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) Tree 29: Descendants(Descendants { descendants: [7, 9, 11, 12, 31, 32, 33, 35, 39, 52, 58, 69, 74, 81, 86, 89, 92, 97, 99] }) -Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) -Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) +Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) +Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) Tree 34: Descendants(Descendants { descendants: [0, 1, 2, 4, 13, 19, 24, 25, 27, 29, 41, 42, 45, 51, 56, 57, 60, 63, 64, 65, 77, 85, 91, 93, 94] }) -Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) +Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) Tree 36: Descendants(Descendants { descendants: [8, 15, 18, 21, 36, 42, 53, 59, 71, 87, 89, 93, 98] }) Tree 38: Descendants(Descendants { descendants: [4, 11, 23, 33, 48, 52, 54, 63, 64, 66, 69, 74, 75, 88, 90, 99] }) -Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(114), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) -Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) +Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(114), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) +Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) Tree 41: Descendants(Descendants { descendants: [1, 2, 3, 6, 22, 29, 40, 41, 43, 44, 45, 47, 49, 50, 65, 72, 73, 77, 80, 82, 83, 85, 92, 94] }) Tree 42: Descendants(Descendants { descendants: [13, 14, 16, 24, 30, 32, 35, 37, 38, 51, 57, 61, 78, 79, 95, 96] }) -Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) -Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) +Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) +Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) Tree 45: Descendants(Descendants { descendants: [23, 43, 46, 49, 53, 55, 66, 75, 82] }) Tree 46: Descendants(Descendants { descendants: [0, 6, 8, 18, 21, 22, 28, 31, 39, 41, 42, 44, 45, 47, 50, 51, 52, 54, 58, 59, 72, 76, 77, 80, 85, 88, 91, 93, 98] }) Tree 47: Descendants(Descendants { descendants: [10, 13, 19, 27, 37, 40, 67, 71, 73, 86, 87, 90] }) -Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) +Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) Tree 49: Descendants(Descendants { descendants: [] }) -Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) +Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) Tree 51: Descendants(Descendants { descendants: [1, 14, 29, 30, 35, 38, 57, 63, 64, 65, 78, 83, 89, 94, 95, 97] }) -Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(117), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) -Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) -Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) +Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(117), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) +Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) +Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) Tree 58: Descendants(Descendants { descendants: [1, 3, 4, 7, 11, 14, 27, 29, 30, 33, 39, 50, 52, 56, 57, 67, 68, 81, 86, 88, 93, 94, 95, 97, 99] }) Tree 59: Descendants(Descendants { descendants: [5, 9, 15, 23, 31, 32, 45, 63, 64, 65, 69, 77, 83, 89, 92] }) -Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) +Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) Tree 61: Descendants(Descendants { descendants: [12, 16, 17, 19, 24, 25, 26, 34, 35, 36, 37, 41, 49, 53, 55, 60, 62, 70, 71, 74, 75, 78, 79, 91, 96, 98] }) -Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(120), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) -Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) +Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(120), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) +Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) Tree 65: Descendants(Descendants { descendants: [3, 7, 9, 10, 17, 25, 30, 37, 38, 39, 55, 56, 60, 65, 67, 77, 78, 86, 88, 94, 97] }) Tree 66: Descendants(Descendants { descendants: [1, 4, 11, 13, 19, 27, 42, 54, 58, 73, 76, 85, 87, 91] }) -Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) +Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) Tree 68: Descendants(Descendants { descendants: [12, 32, 35, 75, 99] }) Tree 70: Descendants(Descendants { descendants: [0, 5, 6, 18, 22, 31, 33, 44, 45, 51, 64, 68, 89, 93] }) -Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(123), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) +Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(123), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) Tree 72: Descendants(Descendants { descendants: [21, 23, 41, 43, 46, 49, 53, 59, 71, 80] }) -Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) -Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) -Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) +Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) +Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) +Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) Tree 80: Descendants(Descendants { descendants: [0, 1, 4, 7, 11, 12, 17, 19, 25, 31, 32, 33, 35, 36, 48, 49, 56, 58, 60, 63, 66, 70, 74, 75, 81, 91, 93, 96, 99] }) Tree 81: Descendants(Descendants { descendants: [27, 85] }) Tree 83: Descendants(Descendants { descendants: [3, 5, 6, 8, 9, 13, 15, 18, 20, 24, 40, 45, 46, 57, 61, 65, 73, 77, 79, 82, 83, 87, 89, 92, 95] }) -Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(126), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) -Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) -Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) +Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(126), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) +Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) +Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) Tree 87: Descendants(Descendants { descendants: [0, 4, 13, 21, 27, 28, 29, 37, 38, 40, 41, 47, 50, 72, 73, 76, 80, 82, 85, 91] }) Tree 88: Descendants(Descendants { descendants: [1, 3, 7, 10, 17, 30, 56, 67, 77, 78, 86, 88] }) Tree 89: Descendants(Descendants { descendants: [8, 20, 24, 46, 49, 61, 62, 66, 70, 75, 79, 81, 92] }) Tree 90: Descendants(Descendants { descendants: [6, 14, 18, 31, 33, 39, 42, 45, 48, 51, 57, 58, 63, 64, 89, 93, 95, 97] }) Tree 91: Descendants(Descendants { descendants: [2, 5, 9, 15, 16, 22, 23, 26, 32, 34, 35, 36, 43, 44, 52, 53, 54, 55, 68, 69, 71, 74, 83, 84, 87, 90, 94, 96, 98, 99] }) -Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) +Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) Tree 93: Descendants(Descendants { descendants: [11, 12, 19, 25, 59, 60, 65] }) -Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) -Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) -Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) -Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) +Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) +Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) +Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) +Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) Tree 98: Descendants(Descendants { descendants: [12, 17, 19, 28, 35, 38, 41, 42, 53, 55, 58, 75, 77, 78, 96, 98] }) Tree 99: Descendants(Descendants { descendants: [14, 22, 37, 40, 47, 51, 61, 66, 70, 72, 76, 79, 82, 87, 90, 95] }) -Tree 100: SplitPlaneNormal(SplitPlaneNormal { left: Tree(98), right: Tree(99), normal: [-0.0078, 0.2489, -0.2359, 0.0425, -0.0624, 0.0527, 0.2042, 0.2301, -0.3498, -0.0920, "other ..."] }) +Tree 100: SplitPlaneNormal(SplitPlaneNormal { left: Tree(98), right: Tree(99), normal: [-0.0078, 0.2489, -0.2359, 0.0425, -0.0624, 0.0527, 0.2042, 0.2301, -0.3498, -0.0920, "other ..."] }) Tree 101: Descendants(Descendants { descendants: [8, 15, 24, 83, 92, 94] }) Tree 102: Descendants(Descendants { descendants: [2, 5, 6, 16, 18, 20, 21, 23, 26, 34, 36, 43, 44, 46, 48, 52, 54, 59, 62, 63, 68, 69, 71, 74, 80, 86, 89, 93] }) -Tree 103: SplitPlaneNormal(SplitPlaneNormal { left: Tree(101), right: Tree(102), normal: [0.0940, 0.1325, 0.0627, 0.0098, -0.2045, -0.0698, 0.1022, 0.0678, -0.0750, -0.3038, "other ..."] }) +Tree 103: SplitPlaneNormal(SplitPlaneNormal { left: Tree(101), right: Tree(102), normal: [0.0940, 0.1325, 0.0627, 0.0098, -0.2045, -0.0698, 0.1022, 0.0678, -0.0750, -0.3038, "other ..."] }) Tree 104: Descendants(Descendants { descendants: [10, 21, 28, 30, 37, 41, 47, 71, 72, 77, 88, 98] }) Tree 105: Descendants(Descendants { descendants: [22, 23, 26, 34, 40, 43, 44, 46, 53, 55, 57, 61, 68, 73, 74, 82, 90, 95, 99] }) -Tree 106: SplitPlaneNormal(SplitPlaneNormal { left: Tree(104), right: Tree(105), normal: [-0.1170, 0.1446, 0.0602, -0.0688, 0.2407, -0.1008, 0.1718, -0.1779, -0.1381, 0.0348, "other ..."] }) +Tree 106: SplitPlaneNormal(SplitPlaneNormal { left: Tree(104), right: Tree(105), normal: [-0.1170, 0.1446, 0.0602, -0.0688, 0.2407, -0.1008, 0.1718, -0.1779, -0.1381, 0.0348, "other ..."] }) Tree 107: Descendants(Descendants { descendants: [14, 20, 26, 40, 61, 67, 80, 87] }) Tree 108: Descendants(Descendants { descendants: [3, 6, 8, 10, 21, 23, 28, 30, 34, 37, 38, 47, 48, 50, 54, 55, 66, 68, 71, 72, 73, 76, 78, 79, 82, 88, 95, 98] }) -Tree 109: SplitPlaneNormal(SplitPlaneNormal { left: Tree(107), right: Tree(108), normal: [0.2514, -0.1588, -0.0499, 0.1317, -0.2218, -0.1911, 0.0804, 0.1173, 0.2414, 0.2416, "other ..."] }) +Tree 109: SplitPlaneNormal(SplitPlaneNormal { left: Tree(107), right: Tree(108), normal: [0.2514, -0.1588, -0.0499, 0.1317, -0.2218, -0.1911, 0.0804, 0.1173, 0.2414, 0.2416, "other ..."] }) Tree 110: Descendants(Descendants { descendants: [15, 17, 46, 53, 62, 70, 75, 90] }) -Tree 111: SplitPlaneNormal(SplitPlaneNormal { left: Tree(109), right: Tree(110), normal: [-0.2031, -0.2079, 0.3738, -0.0426, -0.0746, -0.1377, 0.0392, 0.0958, 0.1932, -0.0802, "other ..."] }) +Tree 111: SplitPlaneNormal(SplitPlaneNormal { left: Tree(109), right: Tree(110), normal: [-0.2031, -0.2079, 0.3738, -0.0426, -0.0746, -0.1377, 0.0392, 0.0958, 0.1932, -0.0802, "other ..."] }) Tree 112: Descendants(Descendants { descendants: [0, 5, 7, 9, 10, 12, 17, 19, 20, 25, 27, 31, 34, 39, 46, 55, 56, 58, 60, 67, 68, 76, 81, 84, 86, 91, 97] }) Tree 113: Descendants(Descendants { descendants: [26, 28, 62, 70] }) -Tree 114: SplitPlaneNormal(SplitPlaneNormal { left: Tree(112), right: Tree(113), normal: [-0.1107, 0.0316, 0.0135, -0.1941, -0.1442, 0.1133, -0.1829, 0.0071, 0.2601, -0.1618, "other ..."] }) +Tree 114: SplitPlaneNormal(SplitPlaneNormal { left: Tree(112), right: Tree(113), normal: [-0.1107, 0.0316, 0.0135, -0.1941, -0.1442, 0.1133, -0.1829, 0.0071, 0.2601, -0.1618, "other ..."] }) Tree 115: Descendants(Descendants { descendants: [2, 3, 4, 7, 11, 12, 16, 17, 20, 25, 32, 33, 34, 36, 48, 56, 60, 68, 69, 70, 74, 79, 81, 84, 96, 99] }) Tree 116: Descendants(Descendants { descendants: [5, 9, 15, 24, 26, 61, 62, 92] }) -Tree 117: SplitPlaneNormal(SplitPlaneNormal { left: Tree(115), right: Tree(116), normal: [-0.0406, 0.1263, 0.0654, -0.2637, -0.0650, 0.0877, -0.2022, 0.0139, -0.0035, -0.1877, "other ..."] }) +Tree 117: SplitPlaneNormal(SplitPlaneNormal { left: Tree(115), right: Tree(116), normal: [-0.0406, 0.1263, 0.0654, -0.2637, -0.0650, 0.0877, -0.2022, 0.0139, -0.0035, -0.1877, "other ..."] }) Tree 118: Descendants(Descendants { descendants: [2, 10, 13, 20, 21, 28, 40, 46, 47, 48, 61, 66, 72, 73, 76, 80, 87, 90] }) Tree 119: Descendants(Descendants { descendants: [0, 6, 8, 18, 22, 38, 42, 43, 44, 51, 54, 58, 59, 82, 84, 85] }) -Tree 120: SplitPlaneNormal(SplitPlaneNormal { left: Tree(118), right: Tree(119), normal: [0.2462, -0.2241, 0.3383, -0.2016, 0.0430, -0.0574, 0.1455, -0.2949, -0.0625, 0.0844, "other ..."] }) +Tree 120: SplitPlaneNormal(SplitPlaneNormal { left: Tree(118), right: Tree(119), normal: [0.2462, -0.2241, 0.3383, -0.2016, 0.0430, -0.0574, 0.1455, -0.2949, -0.0625, 0.0844, "other ..."] }) Tree 121: Descendants(Descendants { descendants: [8, 20, 34, 36, 47, 48, 50, 52, 63, 66, 69, 72, 74, 79, 81, 82, 84, 95, 96, 98] }) Tree 122: Descendants(Descendants { descendants: [2, 14, 15, 16, 24, 26, 28, 29, 40, 57, 61, 62, 70, 83, 90, 92] }) -Tree 123: SplitPlaneNormal(SplitPlaneNormal { left: Tree(121), right: Tree(122), normal: [-0.1843, -0.0871, -0.0603, -0.1272, 0.2035, 0.1519, -0.1754, 0.2246, 0.0560, 0.0408, "other ..."] }) +Tree 123: SplitPlaneNormal(SplitPlaneNormal { left: Tree(121), right: Tree(122), normal: [-0.1843, -0.0871, -0.0603, -0.1272, 0.2035, 0.1519, -0.1754, 0.2246, 0.0560, 0.0408, "other ..."] }) Tree 124: Descendants(Descendants { descendants: [14, 16, 22, 38, 41, 42, 43, 44, 50, 51, 53, 59, 62, 64, 80, 84] }) Tree 125: Descendants(Descendants { descendants: [2, 10, 21, 23, 26, 28, 29, 30, 34, 37, 39, 47, 52, 54, 55, 67, 68, 69, 71, 72, 76, 78, 86, 88, 90, 94, 97, 98] }) -Tree 126: SplitPlaneNormal(SplitPlaneNormal { left: Tree(124), right: Tree(125), normal: [-0.2156, 0.0925, 0.0242, 0.1246, -0.0758, -0.0672, -0.1816, 0.4853, 0.1643, 0.1418, "other ..."] }) +Tree 126: SplitPlaneNormal(SplitPlaneNormal { left: Tree(124), right: Tree(125), normal: [-0.2156, 0.0925, 0.0242, 0.1246, -0.0758, -0.0672, -0.1816, 0.4853, 0.1643, 0.1418, "other ..."] }) Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" } diff --git a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap index f7b0ddec..774440e5 100644 --- a/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap +++ b/src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap @@ -106,100 +106,100 @@ Item 98: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.5644, Item 99: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.8320, 0.5156, 0.6172, 0.6617, 0.4999, 0.2464, 0.4536, 0.3265, 0.2163, 0.5406, "other ..."] }) Tree 0: Descendants(Descendants { descendants: [4, 8, 12, 18, 21, 22, 23, 24, 43, 47, 51, 56, 58, 59, 61, 64, 73, 76, 78, 80, 82, 83, 87] }) Tree 1: Descendants(Descendants { descendants: [15, 17, 20, 32, 37, 38, 49, 52, 53, 54, 55, 57, 60, 68, 71, 75, 77, 79, 84, 88, 94, 95] }) -Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) +Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(1), normal: [-0.2169, 0.0505, 0.0138, 0.1637, -0.1566, -0.2702, 0.1215, 0.0399, 0.3132, 0.3827, "other ..."] }) Tree 3: Descendants(Descendants { descendants: [0, 9, 10, 29, 31, 34, 35, 42, 48, 50, 70, 74, 89, 92, 93] }) Tree 4: Descendants(Descendants { descendants: [2, 3, 5, 6, 7, 11, 14, 25, 39, 40, 44, 62, 67, 69, 81, 86, 91, 96, 97, 98, 99] }) Tree 5: Descendants(Descendants { descendants: [1, 13, 16, 19, 26, 27, 28, 30, 33, 36, 41, 45, 46, 63, 65, 66, 72, 85, 90] }) -Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) -Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) -Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) +Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Tree(5), normal: [0.0550, -0.0317, -0.1815, 0.1748, 0.2592, 0.1276, 0.0648, -0.3539, -0.1585, 0.0911, "other ..."] }) +Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(3), right: Tree(6), normal: [-0.0346, 0.4691, 0.1765, -0.0148, 0.0259, 0.1307, -0.1240, -0.1665, 0.0875, 0.1291, "other ..."] }) +Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Tree(7), normal: [-0.0833, -0.0190, -0.0905, 0.1619, -0.0567, 0.2069, -0.2559, -0.1137, 0.0169, -0.0864, "other ..."] }) Tree 9: Descendants(Descendants { descendants: [1, 6, 7, 10, 13, 26, 28, 31, 36, 39, 42, 48, 50, 62, 70, 86, 96, 97] }) Tree 10: Descendants(Descendants { descendants: [2, 3, 9, 11, 27, 29, 30, 45, 46, 57, 65, 66, 67, 72, 73, 85, 91] }) -Tree 11: SplitPlaneNormal(SplitPlaneNormal { left: Tree(9), right: Tree(10), normal: [0.0163, 0.0439, -0.0411, -0.3070, 0.1504, 0.3722, 0.0400, 0.0252, 0.0917, -0.0341, "other ..."] }) +Tree 11: SplitPlaneNormal(SplitPlaneNormal { left: Tree(9), right: Tree(10), normal: [0.0163, 0.0439, -0.0411, -0.3070, 0.1504, 0.3722, 0.0400, 0.0252, 0.0917, -0.0341, "other ..."] }) Tree 12: Descendants(Descendants { descendants: [0, 4, 8, 17, 18, 19, 32, 35, 37, 41, 47, 51, 52, 53, 54, 55, 60, 61, 64, 68, 75, 77, 78, 79, 87, 90, 95, 98] }) Tree 13: Descendants(Descendants { descendants: [5, 12, 15, 16, 20, 21, 22, 23, 24, 38, 43, 56, 58, 59, 63, 69, 71, 74, 76, 80, 82, 83, 89, 93, 94] }) Tree 14: Descendants(Descendants { descendants: [14, 25, 33, 34, 40, 44, 49, 81, 84, 88, 92, 99] }) -Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(13), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) -Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(12), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) -Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) +Tree 15: SplitPlaneNormal(SplitPlaneNormal { left: Tree(13), right: Tree(14), normal: [-0.3533, -0.1026, 0.1114, 0.1360, -0.1732, 0.1633, -0.0302, -0.1458, -0.0030, -0.2101, "other ..."] }) +Tree 16: SplitPlaneNormal(SplitPlaneNormal { left: Tree(12), right: Tree(15), normal: [0.2420, -0.1007, 0.3792, 0.0349, -0.0702, 0.0947, -0.3028, -0.1386, -0.2889, 0.0402, "other ..."] }) +Tree 17: SplitPlaneNormal(SplitPlaneNormal { left: Tree(11), right: Tree(16), normal: [-0.0014, -0.0866, -0.0081, 0.0327, -0.1021, -0.1147, 0.0955, 0.0205, 0.2358, -0.1408, "other ..."] }) Tree 18: Descendants(Descendants { descendants: [0, 2, 4, 6, 7, 9, 11, 14, 25, 26, 27, 28, 29, 33, 34, 40, 42, 62, 67, 81, 86, 91, 96, 97, 98] }) Tree 19: Descendants(Descendants { descendants: [1, 8, 10, 13, 19, 30, 31, 35, 36, 39, 44, 45, 46, 50, 51, 52, 54, 56, 66, 72, 80, 85, 89, 90, 92, 93] }) -Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) +Tree 20: SplitPlaneNormal(SplitPlaneNormal { left: Tree(18), right: Tree(19), normal: [-0.0557, -0.0514, -0.1661, -0.1599, 0.0364, -0.1539, 0.0511, -0.1720, -0.0989, -0.0885, "other ..."] }) Tree 21: Descendants(Descendants { descendants: [3, 5, 15, 16, 17, 18, 20, 22, 24, 32, 38, 48, 49, 58, 59, 60, 63, 65, 69, 74, 75, 76, 79, 82, 83, 84, 87, 88, 94] }) Tree 22: Descendants(Descendants { descendants: [12, 21, 23, 37, 41, 43, 47, 53, 55, 57, 61, 64, 68, 70, 71, 73, 77, 78, 95, 99] }) -Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(22), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) -Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) +Tree 23: SplitPlaneNormal(SplitPlaneNormal { left: Tree(21), right: Tree(22), normal: [0.1147, 0.1494, -0.3442, -0.2764, 0.1638, -0.3154, 0.0552, 0.1184, 0.1290, 0.0611, "other ..."] }) +Tree 24: SplitPlaneNormal(SplitPlaneNormal { left: Tree(20), right: Tree(23), normal: [-0.0617, -0.0186, -0.1350, -0.3128, -0.0542, 0.0524, 0.2907, 0.1259, 0.0908, 0.2518, "other ..."] }) Tree 25: Descendants(Descendants { descendants: [0, 14, 32, 34, 74, 84, 92] }) Tree 26: Descendants(Descendants { descendants: [5, 12, 20, 22, 24, 38, 43, 49, 58, 59, 76, 82, 83, 88, 94] }) Tree 27: Descendants(Descendants { descendants: [3, 4, 6, 8, 15, 17, 18, 21, 23, 37, 47, 50, 52, 53, 54, 55, 61, 64, 67, 68, 71, 73, 75, 78, 79, 80, 87, 95] }) -Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(27), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) +Tree 28: SplitPlaneNormal(SplitPlaneNormal { left: Tree(26), right: Tree(27), normal: [0.0098, 0.2629, -0.1633, -0.1082, -0.0680, -0.1540, 0.0015, 0.1634, 0.2796, 0.1046, "other ..."] }) Tree 29: Descendants(Descendants { descendants: [2, 7, 9, 11, 31, 33, 35, 39, 44, 46, 66, 69, 72, 81, 86, 89, 97, 98, 99] }) -Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) -Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) +Tree 30: SplitPlaneNormal(SplitPlaneNormal { left: Tree(28), right: Tree(29), normal: [-0.1005, -0.0667, 0.1734, 0.2457, 0.1330, 0.0145, -0.1141, -0.0260, -0.1916, -0.1144, "other ..."] }) +Tree 31: SplitPlaneNormal(SplitPlaneNormal { left: Tree(25), right: Tree(30), normal: [0.0357, 0.2985, -0.2477, -0.0879, 0.2653, 0.2303, 0.1487, 0.0825, -0.0937, 0.1187, "other ..."] }) Tree 32: Descendants(Descendants { descendants: [13, 16, 19, 25, 26, 27, 30, 40, 41, 42, 56, 60, 62, 77, 85, 90, 91] }) Tree 33: Descendants(Descendants { descendants: [1, 10, 28, 29, 36, 45, 48, 51, 57, 63, 65, 70, 93, 96] }) -Tree 34: SplitPlaneNormal(SplitPlaneNormal { left: Tree(32), right: Tree(33), normal: [0.1342, -0.2475, -0.0482, 0.0488, 0.0620, -0.3127, 0.1362, 0.1854, -0.4856, 0.1147, "other ..."] }) -Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) +Tree 34: SplitPlaneNormal(SplitPlaneNormal { left: Tree(32), right: Tree(33), normal: [0.1342, -0.2475, -0.0482, 0.0488, 0.0620, -0.3127, 0.1362, 0.1854, -0.4856, 0.1147, "other ..."] }) +Tree 35: SplitPlaneNormal(SplitPlaneNormal { left: Tree(31), right: Tree(34), normal: [-0.1861, -0.2141, -0.1520, 0.0974, 0.0900, 0.2616, -0.0688, -0.2960, -0.0378, -0.0949, "other ..."] }) Tree 36: Descendants(Descendants { descendants: [8, 12, 15, 16, 21, 22, 24, 30, 34, 46, 52, 53, 56, 59, 60, 71, 74, 87, 89, 92, 93] }) Tree 37: Descendants(Descendants { descendants: [2, 5, 7, 9, 14, 17, 19, 25, 27, 31, 36, 39, 40, 42, 55, 62, 66, 67, 72, 80, 81, 84, 86, 88, 91, 96, 97] }) Tree 38: Descendants(Descendants { descendants: [0, 4, 6, 11, 23, 28, 33, 44, 54, 63, 69, 75, 94, 98, 99] }) -Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(37), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) -Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) +Tree 39: SplitPlaneNormal(SplitPlaneNormal { left: Tree(37), right: Tree(38), normal: [0.2761, -0.2159, -0.3421, 0.2165, 0.3175, -0.0799, 0.0880, 0.0680, 0.0497, -0.0448, "other ..."] }) +Tree 40: SplitPlaneNormal(SplitPlaneNormal { left: Tree(36), right: Tree(39), normal: [-0.0582, 0.0824, 0.2122, 0.1920, 0.2041, -0.0451, -0.0324, 0.0349, 0.0748, 0.2791, "other ..."] }) Tree 41: Descendants(Descendants { descendants: [1, 3, 18, 29, 41, 43, 45, 47, 48, 49, 50, 58, 64, 65, 73, 76, 77, 78, 82, 83, 85] }) Tree 42: Descendants(Descendants { descendants: [10, 13, 20, 26, 32, 35, 37, 38, 51, 57, 61, 68, 70, 79, 90, 95] }) -Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) -Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) +Tree 43: SplitPlaneNormal(SplitPlaneNormal { left: Tree(41), right: Tree(42), normal: [-0.2362, 0.0074, -0.1011, 0.1902, -0.2410, -0.2179, 0.1000, -0.2451, 0.0583, 0.2037, "other ..."] }) +Tree 44: SplitPlaneNormal(SplitPlaneNormal { left: Tree(40), right: Tree(43), normal: [-0.0723, -0.1644, -0.4771, -0.0662, 0.1894, 0.0474, 0.2151, 0.0954, -0.0123, 0.1914, "other ..."] }) Tree 45: Descendants(Descendants { descendants: [20, 22, 23, 43, 49, 53, 55, 58, 75, 82, 88, 94] }) Tree 46: Descendants(Descendants { descendants: [12, 16, 21, 30, 31, 39, 41, 45, 46, 47, 51, 56, 59, 66, 72, 77, 78, 80, 85, 91, 93, 98] }) Tree 47: Descendants(Descendants { descendants: [8, 13, 19, 27, 37, 40, 42, 52, 64, 67, 68, 71, 73, 87] }) -Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) +Tree 48: SplitPlaneNormal(SplitPlaneNormal { left: Tree(46), right: Tree(47), normal: [-0.0763, 0.0885, -0.2724, 0.0145, 0.0947, -0.2347, 0.1067, -0.0488, 0.0689, 0.0441, "other ..."] }) Tree 49: Descendants(Descendants { descendants: [6, 28, 54, 90, 96] }) -Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) +Tree 50: SplitPlaneNormal(SplitPlaneNormal { left: Tree(48), right: Tree(49), normal: [-0.0493, 0.0077, 0.1071, 0.2488, -0.0877, -0.1022, -0.1081, -0.0104, 0.0486, -0.0089, "other ..."] }) Tree 51: Descendants(Descendants { descendants: [0, 1, 4, 10, 29, 35, 48, 50, 57, 63, 65, 70, 76, 83, 89, 95, 97] }) Tree 52: Descendants(Descendants { descendants: [2, 3, 5, 7, 9, 11, 14, 15, 17, 18, 24, 25, 26, 32, 33, 34, 36, 38, 44, 60, 61, 62, 69, 74, 79, 81, 84, 86, 92, 99] }) -Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(52), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) -Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) -Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) +Tree 53: SplitPlaneNormal(SplitPlaneNormal { left: Tree(51), right: Tree(52), normal: [-0.2008, 0.1922, 0.3079, -0.0453, -0.3244, 0.1055, -0.0466, -0.1035, 0.1446, -0.0438, "other ..."] }) +Tree 54: SplitPlaneNormal(SplitPlaneNormal { left: Tree(50), right: Tree(53), normal: [-0.2175, -0.1158, -0.0781, 0.0622, -0.2080, 0.0082, 0.1641, 0.0493, -0.0815, 0.2456, "other ..."] }) +Tree 55: SplitPlaneNormal(SplitPlaneNormal { left: Tree(45), right: Tree(54), normal: [-0.0297, 0.0635, -0.0090, 0.0742, 0.0765, 0.2930, -0.2378, -0.0111, -0.1069, -0.1084, "other ..."] }) Tree 56: Descendants(Descendants { descendants: [1, 2, 3, 6, 7, 14, 26, 27, 33, 36, 62, 81, 86, 93, 96, 99] }) Tree 57: Descendants(Descendants { descendants: [0, 4, 10, 11, 28, 29, 39, 42, 44, 50, 57, 67, 70, 74, 95, 97, 98] }) -Tree 58: SplitPlaneNormal(SplitPlaneNormal { left: Tree(56), right: Tree(57), normal: [0.0169, -0.4385, -0.4607, -0.2781, 0.3243, 0.0145, 0.1026, 0.2875, 0.0069, -0.1195, "other ..."] }) +Tree 58: SplitPlaneNormal(SplitPlaneNormal { left: Tree(56), right: Tree(57), normal: [0.0169, -0.4385, -0.4607, -0.2781, 0.3243, 0.0145, 0.1026, 0.2875, 0.0069, -0.1195, "other ..."] }) Tree 59: Descendants(Descendants { descendants: [5, 9, 15, 23, 31, 34, 38, 45, 48, 58, 63, 65, 69, 77, 80, 83, 89] }) -Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) +Tree 60: SplitPlaneNormal(SplitPlaneNormal { left: Tree(58), right: Tree(59), normal: [0.1786, -0.1600, -0.2775, -0.0898, 0.1079, 0.1067, -0.0025, 0.0785, -0.0136, -0.0227, "other ..."] }) Tree 61: Descendants(Descendants { descendants: [17, 19, 20, 25, 32, 35, 37, 40, 41, 49, 52, 53, 55, 60, 71, 75, 79, 82, 84, 88, 90, 91, 92] }) Tree 62: Descendants(Descendants { descendants: [8, 12, 13, 16, 18, 21, 22, 24, 30, 43, 46, 47, 51, 54, 56, 59, 61, 64, 66, 68, 72, 73, 76, 78, 85, 87, 94] }) -Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(62), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) -Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) +Tree 63: SplitPlaneNormal(SplitPlaneNormal { left: Tree(61), right: Tree(62), normal: [0.0237, 0.2316, 0.0207, 0.0743, 0.2916, 0.2200, 0.0594, 0.0390, -0.4108, -0.0552, "other ..."] }) +Tree 64: SplitPlaneNormal(SplitPlaneNormal { left: Tree(60), right: Tree(63), normal: [-0.0179, -0.0168, -0.2421, -0.1799, -0.1356, 0.0168, 0.1303, -0.2701, 0.3890, -0.1482, "other ..."] }) Tree 65: Descendants(Descendants { descendants: [0, 2, 3, 7, 9, 14, 17, 25, 37, 39, 40, 55, 60, 65, 67, 70, 77, 96, 97, 98] }) Tree 66: Descendants(Descendants { descendants: [1, 6, 8, 10, 11, 13, 19, 26, 27, 28, 30, 42, 46, 52, 62, 66, 72, 73, 85, 87, 90, 91] }) -Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) +Tree 67: SplitPlaneNormal(SplitPlaneNormal { left: Tree(65), right: Tree(66), normal: [0.0741, 0.1397, -0.1747, 0.1955, 0.0976, -0.0066, -0.2642, -0.4876, -0.0256, -0.1423, "other ..."] }) Tree 68: Descendants(Descendants { descendants: [32, 34, 35, 44, 74, 75, 84, 92, 99] }) Tree 69: Descendants(Descendants { descendants: [4, 15, 18, 20, 24, 29, 38, 47, 57, 58, 61, 63, 64, 68, 69, 76, 79, 81, 83, 86, 94, 95] }) Tree 70: Descendants(Descendants { descendants: [5, 22, 31, 33, 36, 45, 48, 50, 51, 54, 56, 80, 89, 93] }) -Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(69), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) +Tree 71: SplitPlaneNormal(SplitPlaneNormal { left: Tree(69), right: Tree(70), normal: [0.0908, -0.0903, 0.3735, 0.0708, -0.0676, -0.1918, -0.1890, -0.2189, -0.3541, -0.3300, "other ..."] }) Tree 72: Descendants(Descendants { descendants: [12, 16, 21, 23, 41, 43, 49, 53, 59, 71, 78, 82, 88] }) -Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) -Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) -Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) +Tree 73: SplitPlaneNormal(SplitPlaneNormal { left: Tree(71), right: Tree(72), normal: [-0.0215, 0.0861, 0.0260, -0.2042, -0.1901, -0.1088, 0.0154, -0.2001, 0.3676, -0.0737, "other ..."] }) +Tree 74: SplitPlaneNormal(SplitPlaneNormal { left: Tree(68), right: Tree(73), normal: [0.1338, 0.3609, -0.1010, -0.1138, 0.0836, 0.0482, -0.0207, 0.0315, 0.1349, 0.1157, "other ..."] }) +Tree 75: SplitPlaneNormal(SplitPlaneNormal { left: Tree(67), right: Tree(74), normal: [0.0874, -0.1312, 0.0401, 0.1433, -0.0151, -0.0312, 0.0888, 0.0681, -0.2158, -0.0702, "other ..."] }) Tree 76: Descendants(Descendants { descendants: [0, 1, 6, 19, 28, 40, 42, 48, 63, 81, 91] }) Tree 77: Descendants(Descendants { descendants: [2, 4, 7, 10, 11, 14, 17, 20, 25, 26, 31, 32, 33, 34, 36, 62, 70, 74, 86, 93, 99] }) -Tree 78: SplitPlaneNormal(SplitPlaneNormal { left: Tree(76), right: Tree(77), normal: [-0.2023, 0.0033, 0.1400, -0.2031, -0.1474, -0.0398, 0.4147, 0.3168, -0.3442, 0.1913, "other ..."] }) +Tree 78: SplitPlaneNormal(SplitPlaneNormal { left: Tree(76), right: Tree(77), normal: [-0.2023, 0.0033, 0.1400, -0.2031, -0.1474, -0.0398, 0.4147, 0.3168, -0.3442, 0.1913, "other ..."] }) Tree 79: Descendants(Descendants { descendants: [35, 38, 49, 60, 75, 76, 84, 88, 92] }) -Tree 80: SplitPlaneNormal(SplitPlaneNormal { left: Tree(78), right: Tree(79), normal: [-0.2920, -0.3421, -0.2821, -0.1193, -0.0368, 0.1317, 0.2692, -0.1174, -0.0436, -0.0298, "other ..."] }) +Tree 80: SplitPlaneNormal(SplitPlaneNormal { left: Tree(78), right: Tree(79), normal: [-0.2920, -0.3421, -0.2821, -0.1193, -0.0368, 0.1317, 0.2692, -0.1174, -0.0436, -0.0298, "other ..."] }) Tree 81: Descendants(Descendants { descendants: [8, 27, 30, 85] }) Tree 82: Descendants(Descendants { descendants: [12, 18, 21, 23, 29, 37, 39, 41, 43, 44, 46, 47, 51, 52, 53, 55, 56, 59, 64, 66, 67, 68, 69, 71, 72, 78, 82, 96, 97, 98] }) Tree 83: Descendants(Descendants { descendants: [3, 5, 9, 13, 15, 16, 22, 24, 45, 50, 54, 57, 58, 61, 65, 73, 77, 79, 80, 83, 87, 89, 90, 94, 95] }) -Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(82), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) -Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) -Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) +Tree 84: SplitPlaneNormal(SplitPlaneNormal { left: Tree(82), right: Tree(83), normal: [-0.0973, 0.2024, -0.0486, 0.1536, -0.0050, 0.0761, -0.0099, -0.1343, -0.3098, 0.2276, "other ..."] }) +Tree 85: SplitPlaneNormal(SplitPlaneNormal { left: Tree(81), right: Tree(84), normal: [0.1433, -0.2044, -0.0819, -0.1305, 0.0272, 0.1805, 0.0805, 0.1128, 0.0279, -0.0258, "other ..."] }) +Tree 86: SplitPlaneNormal(SplitPlaneNormal { left: Tree(80), right: Tree(85), normal: [0.0240, 0.1785, 0.0265, -0.3440, 0.1702, 0.0470, 0.0107, 0.2175, -0.0317, 0.1600, "other ..."] }) Tree 87: Descendants(Descendants { descendants: [8, 13, 16, 21, 27, 28, 29, 30, 37, 41, 46, 47, 48, 56, 60, 64, 73, 78, 85, 91] }) Tree 88: Descendants(Descendants { descendants: [1, 2, 3, 6, 7, 14, 17, 40, 42, 52, 62, 67, 77, 96, 98] }) Tree 89: Descendants(Descendants { descendants: [20, 26, 32, 38, 49, 58, 61, 68, 75, 76, 79, 81, 86, 88, 94] }) Tree 90: Descendants(Descendants { descendants: [24, 31, 33, 36, 39, 45, 50, 51, 54, 57, 63, 66, 72, 80, 89, 90, 93, 95, 97] }) Tree 91: Descendants(Descendants { descendants: [0, 4, 5, 9, 12, 15, 18, 22, 23, 34, 35, 43, 44, 53, 55, 69, 71, 74, 82, 83, 84, 87, 92, 99] }) -Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) +Tree 92: SplitPlaneNormal(SplitPlaneNormal { left: Tree(90), right: Tree(91), normal: [0.1432, -0.2381, -0.0722, -0.2146, -0.0888, -0.0131, 0.0621, 0.2812, 0.2690, -0.0300, "other ..."] }) Tree 93: Descendants(Descendants { descendants: [10, 11, 19, 25, 59, 65, 70] }) -Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) -Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) -Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) -Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) +Tree 94: SplitPlaneNormal(SplitPlaneNormal { left: Tree(92), right: Tree(93), normal: [-0.3120, -0.2834, -0.0907, 0.0227, 0.0090, 0.3812, 0.1933, -0.4350, -0.0939, -0.1180, "other ..."] }) +Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), normal: [-0.0200, -0.2200, 0.1948, -0.1418, 0.1112, 0.0083, -0.0510, -0.0801, -0.3187, -0.1000, "other ..."] }) +Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, "other ..."] }) +Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, "other ..."] }) Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" } diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 6a9d6d17..aa8b5511 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -178,7 +178,7 @@ fn write_vectors_until_there_is_a_split() { Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 2.0000, 2.0000] }) Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 3.0000, 3.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 2, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(0), normal: [-0.5774, -0.5774, -0.5774] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(0), normal: [-0.5774, -0.5774, -0.5774] }) Root: Metadata { dimensions: 3, items: RoaringBitmap<[0, 1, 2, 3]>, roots: [1], distance: "euclidean" } "###); } @@ -296,10 +296,10 @@ fn overwrite_one_item_incremental() { Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -321,10 +321,10 @@ fn overwrite_one_item_incremental() { Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [6.0000, 0.0000] }) Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(1), right: Tree(5), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(1), right: Tree(5), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Tree 5: Descendants(Descendants { descendants: [2, 3] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -482,7 +482,7 @@ fn delete_one_leaf_in_a_split() { Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 2] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(0), normal: [1.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(0), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2]>, roots: [1], distance: "euclidean" } "###); @@ -564,10 +564,10 @@ fn delete_one_item() { Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -589,8 +589,8 @@ fn delete_one_item() { Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 1: Descendants(Descendants { descendants: [1, 2] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -611,8 +611,8 @@ fn delete_one_item() { Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Item(2), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Item(2), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 2, 4, 5]>, roots: [4], distance: "euclidean" } "###); } @@ -714,7 +714,7 @@ fn add_one_item_incrementally_to_create_a_split_node() { Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] }) Tree 1: Descendants(Descendants { descendants: [1, 2] }) - Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(1), normal: [1.0000, 0.0000] }) + Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(1), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2]>, roots: [2], distance: "euclidean" } "###); } @@ -742,10 +742,10 @@ fn add_one_item_incrementally() { Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -768,10 +768,10 @@ fn add_one_item_incrementally() { Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [25.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(5), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(5), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Tree 5: Descendants(Descendants { descendants: [2, 25] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5, 25]>, roots: [4], distance: "euclidean" } "###); @@ -796,12 +796,12 @@ fn add_one_item_incrementally() { Item 8: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [8.0000, 0.0000] }) Item 25: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [25.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(7), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Tree(7), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Tree 6: Descendants(Descendants { descendants: [8, 25] }) - Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(6), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 7: SplitPlaneNormal(SplitPlaneNormal { left: Tree(6), right: Item(2), normal: [0.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5, 8, 25]>, roots: [4], distance: "euclidean" } "###); } @@ -829,11 +829,11 @@ fn delete_extraneous_tree() { Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000, 0.0000, 0.0000] }) Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000, 0.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(0), normal: [1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(0), normal: [1.0000, 0.0000, 0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) Tree 4: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) Root: Metadata { dimensions: 4, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [1, 3, 5], distance: "euclidean" } "###); @@ -851,9 +851,9 @@ fn delete_extraneous_tree() { Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000, 0.0000, 0.0000] }) Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000, 0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(2), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) Tree 4: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [3, 5], distance: "euclidean" } "###); @@ -871,7 +871,7 @@ fn delete_extraneous_tree() { Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000, 0.0000, 0.0000] }) Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000, 0.0000, 0.0000] }) Tree 4: Descendants(Descendants { descendants: [1, 2, 3, 4] }) - Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) + Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(4), right: Item(0), normal: [-1.0000, 0.0000, 0.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4]>, roots: [5], distance: "euclidean" } "###); } @@ -899,10 +899,10 @@ fn reuse_node_id() { Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] }) Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [1, 3] }) - Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) + Tree 1: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(2), normal: [0.0000, 0.0000] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -924,8 +924,8 @@ fn reuse_node_id() { Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 1: Descendants(Descendants { descendants: [2, 3] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -948,9 +948,9 @@ fn reuse_node_id() { Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] }) Tree 0: Descendants(Descendants { descendants: [2, 3] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(5), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) - Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(1), normal: [0.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(5), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(1), normal: [0.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4], distance: "euclidean" } "###); @@ -973,13 +973,13 @@ fn reuse_node_id() { Tree 0: Descendants(Descendants { descendants: [2, 3] }) Tree 1: Descendants(Descendants { descendants: [1, 3] }) Tree 2: Descendants(Descendants { descendants: [4, 5] }) - Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(5), right: Tree(2), normal: [0.0000, 0.0000] }) - Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) - Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(1), normal: [0.0000, 0.0000] }) - Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Item(5), normal: [0.0000, 0.0000] }) + Tree 3: SplitPlaneNormal(SplitPlaneNormal { left: Tree(5), right: Tree(2), normal: [0.0000, 0.0000] }) + Tree 4: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] }) + Tree 5: SplitPlaneNormal(SplitPlaneNormal { left: Tree(0), right: Item(1), normal: [0.0000, 0.0000] }) + Tree 6: SplitPlaneNormal(SplitPlaneNormal { left: Tree(1), right: Item(5), normal: [0.0000, 0.0000] }) Tree 7: Descendants(Descendants { descendants: [2, 4] }) - Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(6), right: Tree(7), normal: [0.0000, 0.0000] }) - Tree 9: SplitPlaneNormal(SplitPlaneNormal { left: Tree(8), right: Item(0), normal: [-1.0000, 0.0000] }) + Tree 8: SplitPlaneNormal(SplitPlaneNormal { left: Tree(6), right: Tree(7), normal: [0.0000, 0.0000] }) + Tree 9: SplitPlaneNormal(SplitPlaneNormal { left: Tree(8), right: Item(0), normal: [-1.0000, 0.0000] }) Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [4, 9], distance: "euclidean" } "###); } From 71f6659bb4aeed3ea64d536ba566034f66c0ad4d Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 11:56:03 +0200 Subject: [PATCH 58/61] making the constants in unaligned_vector::binary_quantized more explicits --- src/unaligned_vector/binary_quantized.rs | 34 +++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index d087f1a3..27839956 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::mem::{size_of, transmute}; +use std::mem::transmute; use std::slice::ChunksExact; use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; @@ -7,14 +7,16 @@ use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; /// The type of the words used to quantize a vector type QuantizedWord = u64; /// The size of the words used to quantize a vector -const QUANTIZED_WORD_SIZE: usize = QuantizedWord::BITS as usize; +const QUANTIZED_WORD_BITS: usize = QuantizedWord::BITS as usize; +/// The number of bytes composing a Word +const QUANTIZED_WORD_BYTES: usize = std::mem::size_of::(); #[derive(Clone, Debug, PartialEq, Eq)] pub enum BinaryQuantized {} impl UnalignedVectorCodec for BinaryQuantized { fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { - let rem = bytes.len() % size_of::(); + let rem = bytes.len() % QUANTIZED_WORD_BYTES; if rem == 0 { // safety: `UnalignedVector` is transparent Ok(Cow::Borrowed(unsafe { transmute::<&[u8], &UnalignedVector>(bytes) })) @@ -57,13 +59,13 @@ impl UnalignedVectorCodec for BinaryQuantized { BinaryQuantizedIterator { current_element: 0, // Force the pulling of the first word - current_iteration: QUANTIZED_WORD_SIZE, - iter: vec.vector.chunks_exact(size_of::()), + current_iteration: QUANTIZED_WORD_BITS, + iter: vec.vector.chunks_exact(QUANTIZED_WORD_BYTES), } } fn len(vec: &UnalignedVector) -> usize { - (vec.vector.len() / size_of::()) * QUANTIZED_WORD_SIZE + (vec.vector.len() / QUANTIZED_WORD_BYTES) * QUANTIZED_WORD_BITS } fn is_zero(vec: &UnalignedVector) -> bool { @@ -72,8 +74,8 @@ impl UnalignedVectorCodec for BinaryQuantized { } pub(super) fn from_slice_non_optimized(slice: &[f32]) -> Vec { - let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_SIZE); - for chunk in slice.chunks(QUANTIZED_WORD_SIZE) { + let mut output = Vec::with_capacity(slice.len() / QUANTIZED_WORD_BITS); + for chunk in slice.chunks(QUANTIZED_WORD_BITS) { let mut word: QuantizedWord = 0; for scalar in chunk.iter().rev() { word <<= 1; @@ -88,16 +90,16 @@ pub(super) fn from_slice_non_optimized(slice: &[f32]) -> Vec { unsafe fn from_slice_neon(slice: &[f32]) -> Vec { use core::arch::aarch64::*; - let iterations = slice.len() / size_of::(); + let iterations = slice.len() / QUANTIZED_WORD_BYTES; // The size of the returned vector must be a multiple of a word - let remaining = slice.len() % size_of::(); + let remaining = slice.len() % QUANTIZED_WORD_BYTES; let mut len = iterations; - if len % size_of::() != 0 { - len += size_of::() - len % size_of::(); + if len % QUANTIZED_WORD_BYTES != 0 { + len += QUANTIZED_WORD_BYTES - len % QUANTIZED_WORD_BYTES; } else if remaining != 0 { // if we generated a valid number of Word but we're missing a few bits // then we need to add a full Word at the end. - len += size_of::(); + len += QUANTIZED_WORD_BYTES; } let mut ret = vec![0; len]; let ptr = slice.as_ptr(); @@ -263,7 +265,7 @@ impl Iterator for BinaryQuantizedIterator<'_> { type Item = f32; fn next(&mut self) -> Option { - if self.current_iteration >= QUANTIZED_WORD_SIZE { + if self.current_iteration >= QUANTIZED_WORD_BITS { let bytes = self.iter.next()?; self.current_element = QuantizedWord::from_ne_bytes(bytes.try_into().unwrap()); self.current_iteration = 0; @@ -278,9 +280,9 @@ impl Iterator for BinaryQuantizedIterator<'_> { fn size_hint(&self) -> (usize, Option) { let (low, high) = self.iter.size_hint(); - let rem = QUANTIZED_WORD_SIZE - self.current_iteration; + let rem = QUANTIZED_WORD_BITS - self.current_iteration; - (low * QUANTIZED_WORD_SIZE + rem, high.map(|h| h * QUANTIZED_WORD_SIZE + rem)) + (low * QUANTIZED_WORD_BITS + rem, high.map(|h| h * QUANTIZED_WORD_BITS + rem)) } } From 777effb409fc0ab9f797f93ba8eded676b6bb0c8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 12:02:37 +0200 Subject: [PATCH 59/61] fix the cfg feature gates around the simd function --- src/unaligned_vector/binary_quantized.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index 27839956..d4f4f291 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -86,7 +86,7 @@ pub(super) fn from_slice_non_optimized(slice: &[f32]) -> Vec { output } -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] unsafe fn from_slice_neon(slice: &[f32]) -> Vec { use core::arch::aarch64::*; @@ -117,7 +117,6 @@ unsafe fn from_slice_neon(slice: &[f32]) -> Vec { 0b_00000000_00000000_00000000_10000000, ]; - #[allow(clippy::needless_range_loop)] for i in 0..iterations { unsafe { let mut byte = 0; @@ -154,7 +153,7 @@ pub(super) fn to_vec_non_optimized(vec: &UnalignedVector) -> Ve vec.iter().collect() } -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] unsafe fn to_vec_neon(vec: &UnalignedVector) -> Vec { use core::arch::aarch64::*; @@ -185,7 +184,7 @@ unsafe fn to_vec_neon(vec: &UnalignedVector) -> Vec { output } -#[cfg(not(any(target_arch = "aarch64", target_arch = "arm64ec")))] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe fn to_vec_sse(vec: &UnalignedVector) -> Vec { use core::arch::x86_64::*; From 393a5c578baee2de70f3a1e62a1f4794b302c3a2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 17 Sep 2024 12:16:43 +0200 Subject: [PATCH 60/61] remove the small relevancy benchmark in favor of the new repository --- README.md | 6 ++ examples/relevancy.rs | 209 ------------------------------------------ 2 files changed, 6 insertions(+), 209 deletions(-) delete mode 100644 examples/relevancy.rs diff --git a/README.md b/README.md index 857167d0..7558e6f2 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,12 @@ We do this k times so that we get a forest of trees. k has to be tuned to your n Dot Product distance (originally contributed by [@psobot](https://github.com/psobot) and [@pkorobov](https://github.com/pkorobov)) reduces the provided vectors from dot (or "inner-product") space to a more query-friendly cosine space using [a method by Bachrach et al., at Microsoft Research, published in 2014](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf). + +## Benchmarks + +The benchmarks are available [in another repository](https://github.com/meilisearch/vector-store-relevancy-benchmark). +It shows the performances of arroy in terms of recall, disk size usage, search and indexing performances with different parameters compared to other competitors. + ## Source code It's all written in Rust and based on LMDB without a handful of ugly optimizations for performance and memory usage. You have been warned :) diff --git a/examples/relevancy.rs b/examples/relevancy.rs deleted file mode 100644 index 294791e1..00000000 --- a/examples/relevancy.rs +++ /dev/null @@ -1,209 +0,0 @@ -use std::fmt; - -use rand::seq::SliceRandom; - -use arroy::distances::{ - Angular, BinaryQuantizedAngular, BinaryQuantizedEuclidean, BinaryQuantizedManhattan, - DotProduct, Euclidean, Manhattan, -}; -use arroy::internals::{self, Leaf, NodeCodec, UnalignedVector}; -use arroy::{Database, Distance, ItemId, Result, Writer}; -use heed::{EnvOpenOptions, RwTxn}; -use rand::rngs::StdRng; -use rand::{Rng, SeedableRng}; - -const TWENTY_HUNDRED_MIB: usize = 2 * 1024 * 1024 * 1024; -const NUMBER_VECTORS: usize = 4_000; -const OVERSAMPLING: usize = 3; - -fn main() { - let dimensions_tested = [256, 512, 1024, 1536, 3072]; - let recall_tested = [1, 10, 50, 100]; - let color: Vec<_> = (0..=100).step_by(10).map(|i| Recall(i as f32 / 100.0)).collect(); - - println!("Testing the following dimensions: {dimensions_tested:?}"); - println!("Testing the following recall: @{recall_tested:?}"); - println!("Oversampling of: x{OVERSAMPLING}"); - println!("With color code: {color:?}"); - println!("Starting..."); - println!(); - - for (distance_name, func) in &[ - ( - BinaryQuantizedAngular::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, - ), - (Angular::name(), &measure_distance:: as &dyn Fn(usize, usize) -> f32), - ( - BinaryQuantizedManhattan::name(), - &measure_distance:: - as &dyn Fn(usize, usize) -> f32, - ), - ( - Manhattan::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, - ), - ( - BinaryQuantizedEuclidean::name(), - &measure_distance:: - as &dyn Fn(usize, usize) -> f32, - ), - ( - Euclidean::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, - ), - ( - DotProduct::name(), - &measure_distance:: as &dyn Fn(usize, usize) -> f32, - ), - ] { - let now = std::time::Instant::now(); - println!("{distance_name}"); - // The openAI dimensions - for dimensions in [256, 512, 1024, 1536, 3072] { - let mut recall = Vec::new(); - for number_fetched in recall_tested { - let rec = (func)(number_fetched, dimensions); - recall.push(Recall(rec)); - } - println!("For {dimensions:4} dim, recall: {recall:3?}"); - } - println!("Took {:?}", now.elapsed()); - println!(); - } -} - -struct Recall(f32); - -impl fmt::Debug for Recall { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - // red - f32::NEG_INFINITY..=0.25 => write!(f, "\x1b[1;31m")?, - // yellow - 0.25..=0.5 => write!(f, "\x1b[1;33m")?, - // green - 0.5..=0.75 => write!(f, "\x1b[1;32m")?, - // blue - 0.75..=0.90 => write!(f, "\x1b[1;34m")?, - // cyan - 0.90..=0.999 => write!(f, "\x1b[1;36m")?, - // underlined cyan - 0.999..=f32::INFINITY => write!(f, "\x1b[1;4;36m")?, - _ => (), - } - write!(f, "{:.2}\x1b[0m", self.0) - } -} - -fn measure_distance( - number_fetched: usize, - dimensions: usize, -) -> f32 { - let dir = tempfile::tempdir().unwrap(); - let env = - unsafe { EnvOpenOptions::new().map_size(TWENTY_HUNDRED_MIB).open(dir.path()) }.unwrap(); - - let mut rng = StdRng::seed_from_u64(13); - let points = generate_points(&mut rng, NUMBER_VECTORS, dimensions); - let mut wtxn = env.write_txn().unwrap(); - - let database = env - .create_database::>(&mut wtxn, None) - .unwrap(); - load_into_arroy(&mut rng, &mut wtxn, database, dimensions, &points).unwrap(); - - let reader = arroy::Reader::open(&wtxn, 0, database).unwrap(); - - let mut correctly_retrieved = 0; - for _ in 0..100 { - let querying = points.choose(&mut rng).unwrap(); - - let relevant = partial_sort_by::( - points.iter().map(|(i, v)| (*i, v.as_slice())), - &querying.1, - number_fetched, - ); - - let mut arroy = reader - .nns_by_item(&wtxn, querying.0, number_fetched * OVERSAMPLING, None, None, None) - .unwrap() - .unwrap(); - arroy.truncate(number_fetched); - - for ret in arroy { - if relevant.iter().any(|(id, _, _)| *id == ret.0) { - correctly_retrieved += 1; - } - } - } - - // println!("recall@{number_fetched}: {}", correctly_retrieved as f32 / relevant.len() as f32); - correctly_retrieved as f32 / (number_fetched as f32 * 100.0) -} - -fn partial_sort_by<'a, D: Distance>( - mut vectors: impl Iterator, - sort_by: &[f32], - elements: usize, -) -> Vec<(ItemId, &'a [f32], f32)> { - let mut ret = Vec::with_capacity(elements); - ret.extend(vectors.by_ref().take(elements).map(|(i, v)| (i, v, distance::(sort_by, v)))); - ret.sort_by(|(_, _, left), (_, _, right)| left.total_cmp(right)); - - if ret.is_empty() { - return ret; - } - - for (item_id, vector) in vectors { - let distance = distance::(sort_by, vector); - if distance < ret.last().unwrap().2 { - match ret.binary_search_by(|(_, _, d)| d.total_cmp(&distance)) { - Ok(i) | Err(i) => { - ret.pop(); - ret.insert(i, (item_id, vector, distance)) - } - } - } - } - - ret -} - -fn distance(left: &[f32], right: &[f32]) -> f32 { - let left = UnalignedVector::from_slice(left); - let left = Leaf { header: D::new_header(&left), vector: left }; - let right = UnalignedVector::from_slice(right); - let right = Leaf { header: D::new_header(&right), vector: right }; - - D::built_distance(&left, &right) -} - -fn load_into_arroy( - rng: &mut StdRng, - wtxn: &mut RwTxn, - database: Database, - dimensions: usize, - points: &[(ItemId, Vec)], -) -> Result<()> { - let writer = Writer::::new(database, 0, dimensions); - for (i, vector) in points.iter() { - writer.add_item(wtxn, *i, &vector[..])?; - } - writer.build(wtxn, rng, None)?; - - Ok(()) -} - -fn generate_points(mut rng: R, count: usize, dimensions: usize) -> Vec<(ItemId, Vec)> { - let mut points = Vec::with_capacity(count); - for item_id in 0..count { - let mut vector = vec![0.0; dimensions]; - for scalar in &mut vector { - *scalar = rng.gen_range(-1.0..1.0); - } - // rng.try_fill(&mut vector[..]).unwrap(); - points.push((item_id.try_into().unwrap(), vector)); - } - points -} From ddaedd3d1ccc35889bb783669af2dbbc19167dc0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 18 Sep 2024 18:11:21 +0200 Subject: [PATCH 61/61] fix the way we change the distance --- src/tests/writer.rs | 41 ++++++++++++++++++++++++++++++++++++++++- src/writer.rs | 6 +++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/tests/writer.rs b/src/tests/writer.rs index aa8b5511..8811585e 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -3,7 +3,7 @@ use rand::seq::SliceRandom; use rand::Rng; use super::{create_database, rng}; -use crate::distance::{Angular, DotProduct, Euclidean}; +use crate::distance::{Angular, BinaryQuantizedAngular, DotProduct, Euclidean}; use crate::{Database, Reader, Writer}; #[test] @@ -1003,3 +1003,42 @@ fn need_build() { writer.del_item(&mut wtxn, 0).unwrap(); assert!(writer.need_build(&wtxn).unwrap(), "because an item has been updated"); } + +#[test] +fn prepare_changing_distance() { + let handle = create_database::(); + let mut rng = rng(); + let mut wtxn = handle.env.write_txn().unwrap(); + let writer = Writer::new(handle.database, 0, 2); + writer.add_item(&mut wtxn, 0, &[0.0, 0.0]).unwrap(); + writer.add_item(&mut wtxn, 1, &[1.0, 1.0]).unwrap(); + writer.add_item(&mut wtxn, 3, &[3.0, 3.0]).unwrap(); + writer.build(&mut wtxn, &mut rng, None).unwrap(); + let writer = Writer::new(handle.database, 1, 2); + writer.add_item(&mut wtxn, 0, &[0.0, 0.0]).unwrap(); + writer.add_item(&mut wtxn, 1, &[1.0, 1.0]).unwrap(); + writer.add_item(&mut wtxn, 3, &[3.0, 3.0]).unwrap(); + writer.build(&mut wtxn, &mut rng, None).unwrap(); + let writer = Writer::new(handle.database, 2, 2); + writer.add_item(&mut wtxn, 0, &[0.0, 0.0]).unwrap(); + writer.add_item(&mut wtxn, 1, &[1.0, 1.0]).unwrap(); + writer.add_item(&mut wtxn, 3, &[3.0, 3.0]).unwrap(); + writer.build(&mut wtxn, &mut rng, None).unwrap(); + wtxn.commit().unwrap(); + + let mut wtxn = handle.env.write_txn().unwrap(); + let writer = Writer::new(handle.database, 1, 2); + + let writer = writer.prepare_changing_distance::(&mut wtxn).unwrap(); + assert!(writer.need_build(&wtxn).unwrap(), "after changing the distance"); + + writer.build(&mut wtxn, &mut rng, None).unwrap(); + wtxn.commit().unwrap(); + + // TODO: this should not works, see https://github.com/meilisearch/arroy/issues/92 + let mut wtxn = handle.env.write_txn().unwrap(); + let writer = Writer::new(handle.database, 1, 2); + writer.del_item(&mut wtxn, 0).unwrap(); + assert!(writer.need_build(&wtxn).unwrap(), "because an item has been updated"); + writer.build(&mut wtxn, &mut rng, None).unwrap(); +} diff --git a/src/writer.rs b/src/writer.rs index 1892b488..1bcedeaa 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -52,7 +52,11 @@ impl Writer { if TypeId::of::() != TypeId::of::() { clear_tree_nodes(wtxn, self.database, self.index)?; - let mut cursor = self.database.iter_mut(wtxn)?; + let mut cursor = self + .database + .remap_key_type::() + .prefix_iter_mut(wtxn, &Prefix::item(self.index))? + .remap_key_type::(); while let Some((item_id, node)) = cursor.next().transpose()? { match node { Node::Leaf(Leaf { header: _, vector }) => {