From 091a3d4d5b5c62e2f419a9e38b3b6a56e7fc4337 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Mon, 30 Oct 2023 21:44:36 +0800 Subject: [PATCH] perf: reduce mem usage when txn is large --- .../examples/encoding_refactored.rs | 19 +-- crates/loro-internal/src/handler.rs | 17 +- .../loro-internal/src/state/richtext_state.rs | 92 ++++++++--- crates/loro-internal/src/txn.rs | 149 ++++++++++++++---- 4 files changed, 208 insertions(+), 69 deletions(-) diff --git a/crates/loro-internal/examples/encoding_refactored.rs b/crates/loro-internal/examples/encoding_refactored.rs index 49cdfc04c..7082cfb25 100644 --- a/crates/loro-internal/examples/encoding_refactored.rs +++ b/crates/loro-internal/examples/encoding_refactored.rs @@ -3,8 +3,8 @@ use criterion::black_box; use loro_internal::loro::LoroDoc; fn main() { - log_size(); - // bench_decode(); + // log_size(); + bench_decode(); // bench_decode_updates(); } @@ -55,17 +55,18 @@ fn log_size() { #[allow(unused)] fn bench_decode() { + println!("Bench decode"); let actions = bench_utils::get_automerge_actions(); { - let loro = LoroDoc::default(); + let mut loro = LoroDoc::default(); let text = loro.get_text("text"); + loro.start_auto_commit(); - #[allow(warnings)] - for TextAction { pos, ins, del } in actions.iter() { - let mut txn = loro.txn().unwrap(); - text.delete(&mut txn, *pos, *del); - text.insert(&mut txn, *pos, ins); - txn.commit().unwrap(); + for _ in 0..10 { + for TextAction { pos, ins, del } in actions.iter() { + text.delete_(*pos, *del); + text.insert_(*pos, ins); + } } let snapshot = loro.export_snapshot(); // for _ in 0..100 { diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 024633fda..85728ed18 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -265,19 +265,21 @@ impl TextHandler { .get_entity_index_for_text_insert_event_index(pos) }); + let unicode_len = s.chars().count(); txn.apply_local_op( self.container_idx, crate::op::RawOpContent::List(crate::container::list::list_op::ListOp::Insert { slice: ListSlice::RawStr { str: Cow::Borrowed(s), - unicode_len: s.chars().count(), + unicode_len, }, pos: entity_index, }), EventHint::InsertText { - pos, + pos: pos as u32, // FIXME: this is wrong styles: vec![], + len: unicode_len as u32, }, &self.state, ) @@ -333,7 +335,10 @@ impl TextHandler { signed_len: (range.end - range.start) as isize, })), if is_first { - EventHint::DeleteText { pos, len } + EventHint::DeleteText(DeleteSpan { + pos: pos as isize, + signed_len: len as isize, + }) } else { EventHint::None }, @@ -408,8 +413,8 @@ impl TextHandler { info: flag, }), EventHint::Mark { - start, - end, + start: start as u32, + end: end as u32, style: crate::container::richtext::Style { key: key.into(), // FIXME: style meta is incorrect @@ -541,7 +546,7 @@ impl ListHandler { pos: pos as isize, signed_len: len as isize, })), - EventHint::DeleteList { pos, len }, + EventHint::DeleteList(DeleteSpan::new(pos as isize, len as isize)), &self.state, ) } diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index a620d5064..ee9b8c020 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -1,7 +1,7 @@ use std::{ops::Range, sync::Arc}; use fxhash::FxHashMap; -use generic_btree::rle::HasLength; +use generic_btree::rle::{HasLength, Mergeable}; use loro_common::{Counter, LoroResult, LoroValue, PeerID, ID}; use loro_preload::{CommonArena, EncodedRichtextState, TempArena, TextRanges}; @@ -87,6 +87,49 @@ enum UndoItem { }, } +impl Mergeable for UndoItem { + fn can_merge(&self, rhs: &Self) -> bool { + match (self, rhs) { + (UndoItem::Insert { index, len }, UndoItem::Insert { index: r_index, .. }) => { + *index + *len == *r_index + } + ( + UndoItem::Delete { index, content }, + UndoItem::Delete { + index: r_i, + content: r_c, + }, + ) => *r_i + r_c.rle_len() as u32 == *index && r_c.can_merge(content), + _ => false, + } + } + + fn merge_right(&mut self, rhs: &Self) { + match (self, rhs) { + (UndoItem::Insert { len, .. }, UndoItem::Insert { len: r_len, .. }) => { + *len += *r_len; + } + ( + UndoItem::Delete { content, index }, + UndoItem::Delete { + content: r_c, + index: r_i, + }, + ) => { + if *r_i + r_c.rle_len() as u32 == *index { + content.merge_right(r_c); + *index = *r_i + } + } + _ => unreachable!(), + } + } + + fn merge_left(&mut self, _: &Self) { + unreachable!() + } +} + impl ContainerState for RichtextState { // TODO: refactor fn apply_diff_and_convert(&mut self, diff: InternalDiff, _arena: &SharedArena) -> Diff { @@ -179,22 +222,13 @@ impl ContainerState for RichtextState { } } } - self.undo_stack.push(UndoItem::Insert { - index: entity_index as u32, - len: value.rle_len() as u32, - }); entity_index += value.rle_len(); } crate::delta::DeltaItem::Delete { len, meta: _ } => { let (start, end) = self.state .get_mut() - .drain_by_entity_index(entity_index, *len, |span| { - self.undo_stack.push(UndoItem::Delete { - index: entity_index as u32, - content: span, - }) - }); + .drain_by_entity_index(entity_index, *len, |_| {}); if start > event_index { for (len, styles) in self .state @@ -287,21 +321,12 @@ impl ContainerState for RichtextState { } } } - self.undo_stack.push(UndoItem::Insert { - index: entity_index as u32, - len: value.rle_len() as u32, - }); entity_index += value.rle_len(); } crate::delta::DeltaItem::Delete { len, meta: _ } => { self.state .get_mut() - .drain_by_entity_index(entity_index, *len, |span| { - self.undo_stack.push(UndoItem::Delete { - index: entity_index as u32, - content: span, - }) - }); + .drain_by_entity_index(entity_index, *len, |_| {}); } } } @@ -324,7 +349,7 @@ impl ContainerState for RichtextState { .insert_at_entity_index(*pos as usize, slice.clone()); if self.in_txn { - self.undo_stack.push(UndoItem::Insert { + self.push_undo(UndoItem::Insert { index: *pos, len: *len, }) @@ -336,10 +361,18 @@ impl ContainerState for RichtextState { rle::HasLength::atom_len(&del), |span| { if self.in_txn { - self.undo_stack.push(UndoItem::Delete { + let item = UndoItem::Delete { index: del.start() as u32, content: span, - }) + }; + match self.undo_stack.last_mut() { + Some(last) if last.can_merge(&item) => { + last.merge_right(&item); + } + _ => { + self.undo_stack.push(item); + } + } } }, ); @@ -427,6 +460,17 @@ impl RichtextState { } } + fn push_undo(&mut self, item: UndoItem) { + match self.undo_stack.last_mut() { + Some(last) if last.can_merge(&item) => { + last.merge_right(&item); + } + _ => { + self.undo_stack.push(item); + } + } + } + #[inline(always)] pub fn len_utf8(&mut self) -> usize { self.state.get_mut().len_utf8() diff --git a/crates/loro-internal/src/txn.rs b/crates/loro-internal/src/txn.rs index 6de382de5..5d6228ca5 100644 --- a/crates/loro-internal/src/txn.rs +++ b/crates/loro-internal/src/txn.rs @@ -6,13 +6,13 @@ use std::{ use debug_log::debug_dbg; use enum_as_inner::EnumAsInner; -use fxhash::FxHashMap; +use generic_btree::rle::{HasLength as RleHasLength, Mergeable, Sliceable as GBSliceable}; use loro_common::{ContainerType, LoroResult}; -use rle::{HasLength, RleVec}; +use rle::{HasLength, Mergable, RleVec, Sliceable}; use crate::{ change::{get_sys_timestamp, Change, Lamport, Timestamp}, - container::{idx::ContainerIdx, richtext::Style, IntoContainerId}, + container::{idx::ContainerIdx, list::list_op::DeleteSpan, richtext::Style, IntoContainerId}, delta::{Delta, MapValue, TreeDelta, TreeDiff}, event::Diff, id::{Counter, PeerID, ID}, @@ -44,7 +44,7 @@ pub struct Transaction { oplog: Arc>, frontiers: Frontiers, local_ops: RleVec<[Op; 1]>, // TODO: use a more efficient data structure - event_hints: FxHashMap, + event_hints: Vec, pub(super) arena: SharedArena, finished: bool, on_commit: Option, @@ -54,29 +54,23 @@ pub struct Transaction { #[derive(Debug, Clone, EnumAsInner)] pub(super) enum EventHint { Mark { - start: usize, - end: usize, + start: u32, + end: u32, style: Style, }, InsertText { /// pos is a Unicode index. If wasm, it's a UTF-16 index. - pos: usize, + pos: u32, + len: u32, styles: Vec