Skip to content

Commit

Permalink
perf: optimize snapshot encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
zxch3n committed Oct 20, 2023
1 parent 6023c7f commit 638dba9
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 93 deletions.
74 changes: 7 additions & 67 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use append_only_bytes::BytesSlice;
use fxhash::{FxHashMap, FxHashSet};
use generic_btree::{
rle::{HasLength, Mergeable, Sliceable},
BTree, BTreeTrait, Cursor, LeafDirtyMap, Query,
BTree, BTreeTrait, Cursor, Query,
};
use loro_common::LoroValue;
use serde::{ser::SerializeStruct, Serialize};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ impl BTreeTrait for RangeNumMapTrait {
caches: &[generic_btree::Child<Self>],
) -> isize {
let new_cache = caches.iter().map(|c| c.cache).sum();
let diff = new_cache as isize - *cache as isize;
let diff = new_cache - *cache;
*cache = new_cache;
diff
}
Expand Down
7 changes: 4 additions & 3 deletions crates/loro-internal/src/snapshot_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ use super::{
};

pub fn encode_app_snapshot(app: &LoroDoc) -> Vec<u8> {
let pre_encoded_state = preprocess_app_state(&app.app_state().lock().unwrap());
let state = app.app_state().lock().unwrap();
let pre_encoded_state = preprocess_app_state(&state);
let f = encode_oplog(&app.oplog().lock().unwrap(), Some(pre_encoded_state));
// f.diagnose_size();
f.encode()
Expand Down Expand Up @@ -503,13 +504,13 @@ struct DepsEncoding {
}

#[derive(Default)]
struct PreEncodedState {
struct PreEncodedState<'a> {
common: CommonArena<'static>,
arena: TempArena<'static>,
key_lookup: FxHashMap<InternalString, usize>,
value_lookup: FxHashMap<LoroValue, usize>,
peer_lookup: FxHashMap<PeerID, usize>,
app_state: EncodedAppState,
app_state: EncodedAppState<'a>,
}

fn preprocess_app_state(app_state: &DocState) -> PreEncodedState {
Expand Down
21 changes: 13 additions & 8 deletions crates/loro-internal/src/state/richtext_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{ops::Range, sync::Arc};
use fxhash::FxHashMap;
use generic_btree::rle::HasLength;
use loro_common::{Counter, LoroValue, PeerID, ID};
use loro_preload::{CommonArena, EncodedRichtextState, TempArena};
use loro_preload::{CommonArena, EncodedRichtextState, TempArena, TextRanges};

use crate::{
arena::SharedArena,
Expand Down Expand Up @@ -505,7 +505,7 @@ impl RichtextState {
&mut self,
EncodedRichtextState {
len,
text,
text_bytes,
styles,
is_style_start,
}: EncodedRichtextState,
Expand All @@ -519,13 +519,13 @@ impl RichtextState {
let mut is_style_start_iter = is_style_start.iter();
let mut loader = Self::get_loader();
let mut is_text = true;
let mut text_range_iter = text.iter();
let mut text_range_iter = TextRanges::decode_iter(&text_bytes).unwrap();
let mut style_iter = styles.iter();
for &len in len.iter() {
if is_text {
for _ in 0..len {
let &range = text_range_iter.next().unwrap();
let text = arena.slice_by_utf8(range.0 as usize..range.1 as usize);
let range = text_range_iter.next().unwrap();
let text = arena.slice_by_utf8(range.start..range.start + range.len);
loader.push(RichtextStateChunk::new_text(text));
}
} else {
Expand Down Expand Up @@ -562,7 +562,7 @@ impl RichtextState {
) -> EncodedRichtextState {
// lengths are interleaved [text_elem_len, style_elem_len, ..]
let mut lengths = Vec::new();
let mut text_ranges = Vec::new();
let mut text_ranges: TextRanges = Default::default();
let mut styles = Vec::new();
let mut is_style_start = BitMap::new();

Expand All @@ -577,7 +577,10 @@ impl RichtextState {
}

*lengths.last_mut().unwrap() += 1;
text_ranges.push((text.start() as u32, text.end() as u32));
text_ranges.ranges.push(loro_preload::TextRange {
start: text.start(),
len: text.len(),
});
}
RichtextStateChunk::Style { style, anchor_type } => {
if lengths.is_empty() {
Expand All @@ -603,9 +606,11 @@ impl RichtextState {
}
}

let text_bytes = text_ranges.encode();
// eprintln!("bytes len={}", text_bytes.len());
EncodedRichtextState {
len: lengths,
text: text_ranges,
text_bytes: std::borrow::Cow::Owned(text_bytes),
styles,
is_style_start: is_style_start.into_vec(),
}
Expand Down
2 changes: 1 addition & 1 deletion crates/loro-preload/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ edition = "2021"

[dependencies]
serde = {version="1", features=["derive"]}
serde_columnar = "0.2.5"
serde_columnar = { version = "0.3.2" }
loro-common = {path="../loro-common"}
bytes = "1.4.0"
59 changes: 47 additions & 12 deletions crates/loro-preload/src/encode.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use bytes::{BufMut, BytesMut};
use loro_common::{ContainerID, InternalString, LoroError, LoroValue, ID};
use serde_columnar::to_vec;
use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID};
use serde_columnar::{columnar, to_vec};
use std::borrow::Cow;

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -109,48 +109,83 @@ impl<'a> CommonArena<'a> {
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EncodedAppState {
pub struct EncodedAppState<'a> {
pub frontiers: Vec<ID>,
/// container states
pub states: Vec<EncodedContainerState>,
#[serde(borrow)]
pub states: Vec<EncodedContainerState<'a>>,
/// containers' parents
pub parents: Vec<Option<u32>>,
}

impl EncodedAppState {
impl<'a> EncodedAppState<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}

pub fn decode(data: &FinalPhase) -> Result<Self, LoroError> {
pub fn decode(data: &'a FinalPhase) -> Result<EncodedAppState<'a>, LoroError> {
serde_columnar::from_bytes(&data.app_state)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EncodedContainerState {
pub enum EncodedContainerState<'a> {
Map(Vec<MapEntry>),
List(Vec<usize>),
Richtext(EncodedRichtextState),
#[serde(borrow)]
Richtext(EncodedRichtextState<'a>),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EncodedRichtextState {
pub struct EncodedRichtextState<'a> {
/// It's composed of interleaved:
///
/// - len of text ranges
/// - len of styles anchors
pub len: Vec<u32>,
/// Text ranges in the [`TempArena`] `richtext` field
pub text: Vec<(u32, u32)>,
/// This is encoded [TextRanges]
#[serde(borrow)]
pub text_bytes: Cow<'a, [u8]>,
/// Style anchor index in the style arena
// TODO: can be optimized
pub styles: Vec<CompactStyleOp>,
/// It is a start or end anchor. It's indexed by bit position.
pub is_style_start: Vec<u8>,
}

impl EncodedContainerState {
#[columnar(vec, ser, de, iterable)]
#[derive(Debug, Clone, Copy)]
pub struct TextRange {
#[columnar(strategy = "DeltaRle")]
pub start: usize,
#[columnar(strategy = "DeltaRle")]
pub len: usize,
}

#[columnar(ser, de)]
#[derive(Debug, Default)]
pub struct TextRanges {
#[columnar(class = "vec", iter = "TextRange")]
pub ranges: Vec<TextRange>,
}

impl TextRanges {
#[inline]
pub fn decode_iter(bytes: &[u8]) -> LoroResult<impl Iterator<Item = TextRange> + '_> {
let iter = serde_columnar::iter_from_bytes::<TextRanges>(bytes).map_err(|e| {
LoroError::DecodeError(format!("Failed to decode TextRange: {}", e).into_boxed_str())
})?;
Ok(iter.ranges)
}

#[inline]
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
}

impl<'a> EncodedContainerState<'a> {
pub fn container_type(&self) -> loro_common::ContainerType {
match self {
EncodedContainerState::Map(_) => loro_common::ContainerType::Map,
Expand Down

0 comments on commit 638dba9

Please sign in to comment.