Skip to content

Commit b90689c

Browse files
committed
perf: reduce conversion by introducing InsertText
1 parent 4a786df commit b90689c

File tree

9 files changed

+235
-90
lines changed

9 files changed

+235
-90
lines changed

crates/loro-internal/src/arena.rs

+16-9
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,20 @@ impl SharedArena {
197197
lock.get(id.to_index() as usize).cloned()
198198
}
199199

200-
/// return utf16 len
201200
pub fn alloc_str(&self, str: &str) -> StrAllocResult {
202201
let mut text_lock = self.inner.str.lock().unwrap();
203202
_alloc_str(&mut text_lock, str)
204203
}
205204

205+
/// return slice and unicode index
206+
pub fn alloc_str_with_slice(&self, str: &str) -> (BytesSlice, usize) {
207+
let mut text_lock = self.inner.str.lock().unwrap();
208+
let start = text_lock.len_bytes();
209+
let unicode_start = text_lock.len_unicode();
210+
text_lock.alloc(str);
211+
(text_lock.slice_bytes(start..), unicode_start)
212+
}
213+
206214
/// alloc str without extra info
207215
pub fn alloc_str_fast(&self, bytes: &[u8]) {
208216
let mut text_lock = self.inner.str.lock().unwrap();
@@ -364,17 +372,16 @@ impl SharedArena {
364372
}),
365373
}
366374
}
367-
ListSlice::RawStr {
368-
str,
369-
unicode_len: _,
370-
} => {
371-
let slice = self.alloc_str(&str);
375+
ListSlice::RawStr { str, unicode_len } => {
376+
let (slice, start) = self.alloc_str_with_slice(&str);
372377
Op {
373378
counter,
374379
container,
375-
content: crate::op::InnerContent::List(InnerListOp::Insert {
376-
slice: SliceRange::from(slice.start as u32..slice.end as u32),
377-
pos,
380+
content: crate::op::InnerContent::List(InnerListOp::InsertText {
381+
slice,
382+
unicode_start: start as u32,
383+
unicode_len: unicode_len as u32,
384+
pos: pos as u32,
378385
}),
379386
}
380387
}

crates/loro-internal/src/container/list/list_op.rs

+80-24
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
use std::ops::Range;
22

3+
use append_only_bytes::BytesSlice;
34
use enum_as_inner::EnumAsInner;
45
use rle::{HasLength, Mergable, Sliceable};
56
use serde::{Deserialize, Serialize};
67

78
use crate::{
89
container::richtext::TextStyleInfoFlag,
910
op::{ListSlice, SliceRange},
11+
utils::string_slice::unicode_range_to_byte_range,
1012
InternalString,
1113
};
1214

@@ -37,6 +39,12 @@ pub enum InnerListOp {
3739
slice: SliceRange,
3840
pos: usize,
3941
},
42+
InsertText {
43+
slice: BytesSlice,
44+
unicode_start: u32,
45+
unicode_len: u32,
46+
pos: u32,
47+
},
4048
Delete(DeleteSpan),
4149
/// StyleStart and StyleEnd must be paired.
4250
StyleStart {
@@ -297,45 +305,76 @@ impl<'a> Sliceable for ListOp<'a> {
297305
}
298306

299307
impl Mergable for InnerListOp {
300-
fn is_mergable(&self, _other: &Self, _conf: &()) -> bool
308+
fn is_mergable(&self, other: &Self, _conf: &()) -> bool
301309
where
302310
Self: Sized,
303311
{
304-
match self {
305-
InnerListOp::Insert { pos, slice, .. } => match _other {
312+
match (self, other) {
313+
(
314+
InnerListOp::Insert { pos, slice, .. },
306315
InnerListOp::Insert {
307316
pos: other_pos,
308317
slice: other_slice,
309318
..
310-
} => pos + slice.content_len() == *other_pos && slice.is_mergable(other_slice, &()),
311-
_ => false,
312-
},
313-
&InnerListOp::Delete(span) => match _other {
314-
InnerListOp::Delete(other_span) => span.is_mergable(other_span, &()),
315-
_ => false,
316-
},
317-
InnerListOp::StyleStart { .. } | InnerListOp::StyleEnd { .. } => false,
319+
},
320+
) => pos + slice.content_len() == *other_pos && slice.is_mergable(other_slice, &()),
321+
(InnerListOp::Delete(span), InnerListOp::Delete(other_span)) => {
322+
span.is_mergable(other_span, &())
323+
}
324+
(
325+
InnerListOp::InsertText {
326+
unicode_start,
327+
slice,
328+
pos,
329+
unicode_len: len,
330+
},
331+
InnerListOp::InsertText {
332+
slice: other_slice,
333+
pos: other_pos,
334+
unicode_start: other_unicode_start,
335+
unicode_len: _,
336+
},
337+
) => {
338+
pos + len == *other_pos
339+
&& slice.can_merge(other_slice)
340+
&& unicode_start + len == *other_unicode_start
341+
}
342+
_ => false,
318343
}
319344
}
320345

321-
fn merge(&mut self, _other: &Self, _conf: &())
346+
fn merge(&mut self, other: &Self, _conf: &())
322347
where
323348
Self: Sized,
324349
{
325-
match self {
326-
InnerListOp::Insert { slice, .. } => match _other {
350+
match (self, other) {
351+
(
352+
InnerListOp::Insert { slice, .. },
327353
InnerListOp::Insert {
328354
slice: other_slice, ..
329-
} => {
330-
slice.merge(other_slice, &());
331-
}
332-
_ => unreachable!(),
333-
},
334-
InnerListOp::Delete(span) => match _other {
335-
InnerListOp::Delete(other_span) => span.merge(other_span, &()),
336-
_ => unreachable!(),
337-
},
338-
InnerListOp::StyleStart { .. } | InnerListOp::StyleEnd { .. } => unreachable!(),
355+
},
356+
) => {
357+
slice.merge(other_slice, &());
358+
}
359+
(InnerListOp::Delete(span), InnerListOp::Delete(other_span)) => {
360+
span.merge(other_span, &())
361+
}
362+
(
363+
InnerListOp::InsertText {
364+
slice,
365+
unicode_len: len,
366+
..
367+
},
368+
InnerListOp::InsertText {
369+
slice: other_slice,
370+
unicode_len: other_len,
371+
..
372+
},
373+
) => {
374+
slice.merge(other_slice, &());
375+
*len += *other_len;
376+
}
377+
_ => unreachable!(),
339378
}
340379
}
341380
}
@@ -344,6 +383,9 @@ impl HasLength for InnerListOp {
344383
fn content_len(&self) -> usize {
345384
match self {
346385
InnerListOp::Insert { slice, .. } => slice.content_len(),
386+
InnerListOp::InsertText {
387+
unicode_len: len, ..
388+
} => *len as usize,
347389
InnerListOp::Delete(span) => span.atom_len(),
348390
InnerListOp::StyleStart { .. } | InnerListOp::StyleEnd { .. } => 1,
349391
}
@@ -357,6 +399,20 @@ impl Sliceable for InnerListOp {
357399
slice: slice.slice(from, to),
358400
pos: *pos + from,
359401
},
402+
InnerListOp::InsertText {
403+
slice,
404+
unicode_start,
405+
unicode_len: _,
406+
pos,
407+
} => InnerListOp::InsertText {
408+
slice: {
409+
let (a, b) = unicode_range_to_byte_range(slice, from, to);
410+
slice.slice(a, b)
411+
},
412+
unicode_start: *unicode_start + from as u32,
413+
unicode_len: (to - from) as u32,
414+
pos: *pos + from as u32,
415+
},
360416
InnerListOp::Delete(span) => InnerListOp::Delete(span.slice(from, to)),
361417
InnerListOp::StyleStart { .. } | InnerListOp::StyleEnd { .. } => self.clone(),
362418
}

crates/loro-internal/src/container/richtext/richtext_state.rs

+8-16
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::{
1919
style_range_map::StyleValue,
2020
},
2121
delta::DeltaValue,
22-
utils::utf16::count_utf16_chars,
22+
utils::{string_slice::unicode_range_to_byte_range, utf16::count_utf16_chars},
2323
InternalString,
2424
};
2525

@@ -1182,21 +1182,13 @@ impl RichtextState {
11821182
let mut end_byte = text.len();
11831183
if cfg!(feature = "wasm") {
11841184
event_len = len;
1185-
let start_unicode_index = start_cursor.offset;
1186-
let end_unicode_index = start_cursor.offset + len;
1187-
let mut current_utf8_index = 0;
1188-
for (current_unicode_index, c) in s.chars().enumerate() {
1189-
if current_unicode_index == start_unicode_index {
1190-
start_byte = current_utf8_index;
1191-
}
1192-
1193-
if current_unicode_index == end_unicode_index {
1194-
end_byte = current_utf8_index;
1195-
break;
1196-
}
1197-
1198-
current_utf8_index += c.len_utf8();
1199-
}
1185+
let (s, e) = unicode_range_to_byte_range(
1186+
text,
1187+
start_cursor.offset,
1188+
start_cursor.offset + len,
1189+
);
1190+
start_byte = s;
1191+
end_byte = e;
12001192
} else {
12011193
event_len = 'e: {
12021194
let start_unicode_index = start_cursor.offset;

crates/loro-internal/src/diff_calc.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -446,10 +446,22 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
446446
crate::container::list::list_op::InnerListOp::Insert { slice, pos } => {
447447
self.tracker.insert(
448448
op.id_start(),
449-
*pos,
449+
*pos as usize,
450450
RichtextChunk::new_text(slice.0.clone()),
451451
);
452452
}
453+
crate::container::list::list_op::InnerListOp::InsertText {
454+
slice,
455+
unicode_start,
456+
unicode_len: len,
457+
pos,
458+
} => {
459+
self.tracker.insert(
460+
op.id_start(),
461+
*pos as usize,
462+
RichtextChunk::new_text(*unicode_start..*unicode_start + *len),
463+
);
464+
}
453465
crate::container::list::list_op::InnerListOp::Delete(del) => {
454466
self.tracker.delete(
455467
op.id_start(),

crates/loro-internal/src/oplog.rs

+19
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,25 @@ impl OpLog {
389389
}
390390
loro_common::ContainerType::Map => unreachable!(),
391391
},
392+
list_op::InnerListOp::InsertText {
393+
slice,
394+
unicode_len: len,
395+
unicode_start,
396+
pos,
397+
} => match container.container_type() {
398+
loro_common::ContainerType::Text => {
399+
contents.push(RawOpContent::List(list_op::ListOp::Insert {
400+
slice: ListSlice::RawStr {
401+
unicode_len: *len as usize,
402+
str: Cow::Owned(std::str::from_utf8(slice).unwrap().to_owned()),
403+
},
404+
pos: *pos as usize,
405+
}));
406+
}
407+
loro_common::ContainerType::List | loro_common::ContainerType::Map => {
408+
unreachable!()
409+
}
410+
},
392411
list_op::InnerListOp::Delete(del) => {
393412
contents.push(RawOpContent::List(list_op::ListOp::Delete(*del)))
394413
}

crates/loro-internal/src/snapshot_encode.rs

+19
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,25 @@ fn encode_oplog(oplog: &OpLog, state_ref: Option<PreEncodedState>) -> FinalPhase
723723
}
724724
loro_common::ContainerType::Map => unreachable!(),
725725
},
726+
InnerListOp::InsertText {
727+
slice,
728+
unicode_len: len,
729+
unicode_start,
730+
pos,
731+
} => match op.container.get_type() {
732+
loro_common::ContainerType::Text => {
733+
encoded_ops.push(EncodedSnapshotOp::from(
734+
SnapshotOp::RichtextInsert {
735+
pos: *pos as usize,
736+
start: slice.start(),
737+
len: *len as usize,
738+
},
739+
op.container.to_index(),
740+
))
741+
}
742+
loro_common::ContainerType::Map => unreachable!(),
743+
loro_common::ContainerType::List => unreachable!(),
744+
},
726745
InnerListOp::Delete(del) => {
727746
encoded_ops.push(EncodedSnapshotOp::from(
728747
SnapshotOp::TextOrListDelete {

crates/loro-internal/src/state/richtext_state.rs

+17
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,23 @@ impl ContainerState for RichtextState {
320320
})
321321
}
322322
}
323+
list_op::InnerListOp::InsertText {
324+
slice,
325+
unicode_len: len,
326+
unicode_start: _,
327+
pos,
328+
} => {
329+
self.state
330+
.get_mut()
331+
.insert_at_entity_index(*pos as usize, slice.clone());
332+
333+
if self.in_txn {
334+
self.undo_stack.push(UndoItem::Insert {
335+
index: *pos,
336+
len: *len,
337+
})
338+
}
339+
}
323340
list_op::InnerListOp::Delete(del) => {
324341
self.state.get_mut().drain_by_entity_index(
325342
del.start() as usize,

0 commit comments

Comments
 (0)