Skip to content

Commit

Permalink
provide a function to upgrade arroy from v0.4 to v0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
irevoire committed Oct 29, 2024
1 parent 68ed432 commit 3908c9e
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ pub enum Error {
/// The item ID queried
item: ItemId,
},

/// Cannot decode the key mode
#[error("Cannot decode key mode: `{mode:?}`")]
CannotDecodeKeyMode {
/// The mode that couldn't be decoded.
mode: NodeMode,
},
}

impl Error {
Expand Down
113 changes: 113 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,19 @@ mod tests;
mod unaligned_vector;

pub use distance::Distance;
use distances::Cosine;
pub use error::Error;
use heed::{
types::{Bytes, LazyDecode, Unit},
RoTxn, RwTxn, Unspecified,
};
use internals::KeyCodec;
use key::{Key, Prefix, PrefixCodec};
use metadata::{Metadata, MetadataCodec};
use node::{Node, NodeCodec};
use node_id::{NodeId, NodeMode};
pub use reader::{QueryBuilder, Reader};
use roaring::RoaringBitmapCodec;
pub use stats::{Stats, TreeStats};
pub use writer::{ArroyBuilder, Writer};

Expand Down Expand Up @@ -150,3 +157,109 @@ pub type Database<D> = heed::Database<internals::KeyCodec, NodeCodec<D>>;

/// An identifier for the items stored in the database.
pub type ItemId = u32;

// ################ The updating code ################

#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
enum OldNodeMode {
Item = 0,
Tree = 1,
Metadata = 2,
}

impl TryFrom<u8> for OldNodeMode {
type Error = String;

fn try_from(v: u8) -> std::result::Result<Self, Self::Error> {
match v {
v if v == NodeMode::Item as u8 => Ok(OldNodeMode::Item),
v if v == NodeMode::Tree as u8 => Ok(OldNodeMode::Tree),
v if v == NodeMode::Metadata as u8 => Ok(OldNodeMode::Metadata),
v => Err(format!("Could not convert {v} as a `NodeMode`.")),
}
}
}

/// Upgrade arroy from v0.4 to v0.5 without rebuilding the trees
/// /!\ This is only valid if the arroy database was generated by Meilisearch. Do not try to use it yourself.
pub fn ugrade_from_prev_version(
rtxn: &RoTxn,
wtxn: &mut RwTxn,
database: heed::Database<Unspecified, Unspecified>,
) -> Result<()> {
// We need to update EVERY single nodes, thus we can clear the whole DB initially
database.clear(wtxn)?;

// Then we **must** iterate over everything in the database to be sure we don't miss anything.
for ret in database.remap_types::<internals::KeyCodec, LazyDecode<Bytes>>().iter(rtxn)? {
let (mut key, value) = ret?;
let old_mode = OldNodeMode::try_from(key.node.mode as u8)
.map_err(|_| Error::CannotDecodeKeyMode { mode: key.node.mode })?;

// All the modes have been remapped
match old_mode {
OldNodeMode::Item => {
key.node.mode = NodeMode::Item;
// In case of an item there is nothing else to do
database.remap_types::<KeyCodec, Bytes>().put(
wtxn,
&key,
value.remap::<Bytes>().decode().unwrap(),
)?;
}
OldNodeMode::Tree => {
key.node.mode = NodeMode::Tree;
// Meilisearch is only using Cosine distance at this point
let mut tree_node = value.remap::<NodeCodec<Cosine>>().decode().unwrap();
// The leaf and descendants tree node don't contains any node mode
if let Node::SplitPlaneNormal(split) = &mut tree_node {
let left_old_mode = OldNodeMode::try_from(split.left.mode as u8)
.map_err(|_| Error::CannotDecodeKeyMode { mode: split.left.mode })?;
split.left.mode = match left_old_mode {
OldNodeMode::Item => NodeMode::Item,
OldNodeMode::Tree => NodeMode::Tree,
OldNodeMode::Metadata => NodeMode::Metadata,
};

let right_old_mode = OldNodeMode::try_from(split.right.mode as u8)
.map_err(|_| Error::CannotDecodeKeyMode { mode: split.right.mode })?;
split.right.mode = match right_old_mode {
OldNodeMode::Item => NodeMode::Item,
OldNodeMode::Tree => NodeMode::Tree,
OldNodeMode::Metadata => NodeMode::Metadata,
};
}
database
.remap_types::<KeyCodec, NodeCodec<Cosine>>()
.put(wtxn, &key, &tree_node)?;
}
OldNodeMode::Metadata => {
match key.index {
0 => {
key.node.mode = NodeMode::Metadata;
// The distance has been renamed
let mut metadata = value.remap::<MetadataCodec>().decode().unwrap();
metadata.distance = Cosine::name();
database
.remap_types::<KeyCodec, MetadataCodec>()
.put(wtxn, &key, &metadata)?;
}
1 => {
key.node.mode = NodeMode::Updated;
// In this case we have a roaring bitmap of document id
// that we must re-insert as multiple values
let updated = value.remap::<RoaringBitmapCodec>().decode().unwrap();
for item in updated {
key.node.item = item;
database.remap_types::<KeyCodec, Unit>().put(wtxn, &key, &())?;
}
}
_ => unreachable!(),
}
}
};
}

Ok(())
}

0 comments on commit 3908c9e

Please sign in to comment.