-
-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented SortedSet key-value structure
Need to design an API to expose it, but this functionality should be all that's needed for the initial implementation.
- Loading branch information
Showing
5 changed files
with
273 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
use std::{cmp::Ordering, collections::HashMap, ops::Deref}; | ||
|
||
use arc_bytes::{ArcBytes, OwnedBytes}; | ||
use serde::{ser::SerializeMap, Deserialize, Serialize}; | ||
|
||
#[derive(Default, Clone, Debug)] | ||
pub struct SortedSet { | ||
members: HashMap<OwnedBytes, Score>, | ||
sorted_members: Vec<Entry>, | ||
} | ||
|
||
impl SortedSet { | ||
pub fn insert(&mut self, value: OwnedBytes, score: Score) -> Option<Score> { | ||
let entry = Entry { value, score }; | ||
let existing_score = self | ||
.members | ||
.insert(entry.value.clone(), entry.score.clone()); | ||
|
||
if existing_score.is_some() { | ||
let remove_index = self | ||
.sorted_members | ||
.binary_search_by(|member| member.value.cmp(&entry.value)) | ||
.unwrap(); | ||
self.sorted_members.remove(remove_index); | ||
} | ||
|
||
let insert_at = self | ||
.sorted_members | ||
.binary_search_by(|member| member.score.cmp(&entry.score)) | ||
.unwrap_or_else(|i| i); | ||
self.sorted_members.insert(insert_at, entry); | ||
|
||
existing_score | ||
} | ||
|
||
pub fn score(&self, value: &[u8]) -> Option<&Score> { | ||
self.members.get(value) | ||
} | ||
|
||
pub fn remove(&mut self, value: &[u8]) -> Option<Score> { | ||
let existing_score = self.members.remove(value); | ||
if existing_score.is_some() { | ||
let (remove_index, _) = self | ||
.sorted_members | ||
.iter() | ||
.enumerate() | ||
.find(|(_index, member)| member.value == value) | ||
.unwrap(); | ||
self.sorted_members.remove(remove_index); | ||
} | ||
existing_score | ||
} | ||
} | ||
|
||
impl Deref for SortedSet { | ||
type Target = Vec<Entry>; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
&self.sorted_members | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug)] | ||
pub struct Entry { | ||
value: OwnedBytes, | ||
score: Score, | ||
} | ||
|
||
#[derive(Clone, Debug, Serialize, Deserialize)] | ||
pub enum Score { | ||
Signed(i64), | ||
Unsigned(u64), | ||
Float(f64), | ||
Bytes(OwnedBytes), | ||
} | ||
|
||
// We check that the float value on input is not a NaN. | ||
impl Eq for Score {} | ||
|
||
impl PartialEq for Score { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.cmp(other) == Ordering::Equal | ||
} | ||
} | ||
|
||
#[allow(clippy::cast_precision_loss)] | ||
impl Ord for Score { | ||
fn cmp(&self, other: &Self) -> Ordering { | ||
match (self, other) { | ||
(Self::Signed(a), Self::Signed(b)) => a.cmp(b), | ||
(Self::Signed(a), Self::Unsigned(b)) => { | ||
if let Ok(a) = u64::try_from(*a) { | ||
a.cmp(b) | ||
} else { | ||
Ordering::Less | ||
} | ||
} | ||
(Self::Unsigned(a), Self::Signed(b)) => { | ||
if let Ok(b) = u64::try_from(*b) { | ||
a.cmp(&b) | ||
} else { | ||
Ordering::Greater | ||
} | ||
} | ||
(Self::Unsigned(a), Self::Unsigned(b)) => a.cmp(b), | ||
(Self::Float(a), Self::Float(b)) => a.partial_cmp(b).unwrap(), | ||
(Self::Float(a), Self::Signed(b)) => a.partial_cmp(&(*b as f64)).unwrap(), | ||
(Self::Float(a), Self::Unsigned(b)) => a.partial_cmp(&(*b as f64)).unwrap(), | ||
(Self::Signed(a), Self::Float(b)) => (*a as f64).partial_cmp(b).unwrap(), | ||
(Self::Unsigned(a), Self::Float(b)) => (*a as f64).partial_cmp(b).unwrap(), | ||
(Self::Bytes(a), Self::Bytes(b)) => a.cmp(b), | ||
(_, Self::Bytes(_)) => Ordering::Less, | ||
(Self::Bytes(_), _) => Ordering::Greater, | ||
} | ||
} | ||
} | ||
|
||
impl PartialOrd for Score { | ||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
impl Serialize for SortedSet { | ||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
S: serde::Serializer, | ||
{ | ||
let mut map = serializer.serialize_map(Some(self.members.len()))?; | ||
for member in &self.sorted_members { | ||
map.serialize_entry(&member.value, &member.score)?; | ||
} | ||
map.end() | ||
} | ||
} | ||
|
||
impl<'de> Deserialize<'de> for SortedSet { | ||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||
where | ||
D: serde::Deserializer<'de>, | ||
{ | ||
deserializer.deserialize_map(Visitor) | ||
} | ||
} | ||
|
||
struct Visitor; | ||
|
||
impl<'de> serde::de::Visitor<'de> for Visitor { | ||
type Value = SortedSet; | ||
|
||
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
formatter.write_str("sorted set entries") | ||
} | ||
|
||
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> | ||
where | ||
A: serde::de::MapAccess<'de>, | ||
{ | ||
let (mut members, mut sorted_members) = if let Some(size) = map.size_hint() { | ||
(HashMap::with_capacity(size), Vec::with_capacity(size)) | ||
} else { | ||
(HashMap::default(), Vec::default()) | ||
}; | ||
|
||
while let Some((value, score)) = map.next_entry::<ArcBytes<'_>, Score>()? { | ||
let entry = Entry { | ||
value: OwnedBytes(value.into_owned()), | ||
score, | ||
}; | ||
members.insert(entry.value.clone(), entry.score.clone()); | ||
sorted_members.push(entry); | ||
} | ||
|
||
sorted_members.sort_by(|a, b| a.score.cmp(&b.score)); | ||
|
||
Ok(SortedSet { | ||
members, | ||
sorted_members, | ||
}) | ||
} | ||
} | ||
|
||
#[test] | ||
fn basics() { | ||
let mut set = SortedSet::default(); | ||
assert_eq!( | ||
set.insert(OwnedBytes::from(b"first"), Score::Unsigned(2)), | ||
None | ||
); | ||
assert_eq!(set.score(b"first"), Some(&Score::Unsigned(2))); | ||
assert_eq!( | ||
set.insert(OwnedBytes::from(b"first"), Score::Unsigned(1)), | ||
Some(Score::Unsigned(2)) | ||
); | ||
assert_eq!(set.score(b"first"), Some(&Score::Unsigned(1))); | ||
|
||
assert_eq!(set.insert(OwnedBytes::from(b"a"), Score::Unsigned(2)), None); | ||
assert_eq!(set.len(), 2); | ||
assert_eq!(set.score(b"a"), Some(&Score::Unsigned(2))); | ||
assert_eq!(set[0].value, b"first"); | ||
assert_eq!(set[1].value, b"a"); | ||
assert_eq!(set.remove(b"first"), Some(Score::Unsigned(1))); | ||
assert_eq!(set.remove(b"first"), None); | ||
} | ||
|
||
#[test] | ||
fn serialization() { | ||
let mut set = SortedSet::default(); | ||
set.insert(OwnedBytes::from(b"a"), Score::Signed(2)); | ||
set.insert(OwnedBytes::from(b"b"), Score::Unsigned(1)); | ||
set.insert(OwnedBytes::from(b"c"), Score::Float(0.)); | ||
let as_bytes = pot::to_vec(&set).unwrap(); | ||
let deserialized = pot::from_slice::<SortedSet>(&as_bytes).unwrap(); | ||
assert_eq!(deserialized.score(b"a"), set.score(b"a")); | ||
assert_eq!(deserialized.score(b"b"), set.score(b"b")); | ||
assert_eq!(deserialized.score(b"c"), set.score(b"c")); | ||
assert_eq!(deserialized[0].value, b"c"); | ||
assert_eq!(deserialized[1].value, b"b"); | ||
assert_eq!(deserialized[2].value, b"a"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.