From d4b3d75c079aef271f7b3217a11a1f6fc3e59b34 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Thu, 7 Sep 2023 21:34:15 +0800 Subject: [PATCH 001/111] feat: richtext wip --- .vscode/settings.json | 1 + Cargo.lock | 4 +- crates/compact-bytes/Cargo.toml | 2 +- crates/loro-internal/Cargo.toml | 14 +- .../loro-internal/examples/automerge_x100.rs | 9 +- crates/loro-internal/src/arena.rs | 34 +- crates/loro-internal/src/change.rs | 10 + crates/loro-internal/src/container.rs | 1 + .../src/container/list/list_op.rs | 47 +- .../loro-internal/src/container/richtext.rs | 222 ++++++++++ .../src/container/richtext/query_by_len.rs | 70 +++ .../src/container/richtext/richtext_state.rs | 419 ++++++++++++++++++ .../src/container/richtext/style_range_map.rs | 266 +++++++++++ .../src/container/richtext/tinyvec.rs | 88 ++++ .../src/container/text/tracker.rs | 1 + .../src/encoding/encode_changes.rs | 6 + .../src/encoding/encode_enhanced.rs | 6 + crates/loro-internal/src/op/content.rs | 11 + crates/loro-internal/src/oplog.rs | 11 + crates/loro-internal/src/snapshot_encode.rs | 6 + crates/loro-internal/src/state.rs | 10 +- crates/loro-internal/src/state/list_state.rs | 1 + .../loro-internal/src/state/richtext_state.rs | 1 + crates/loro-internal/src/state/text_state.rs | 6 + crates/loro-internal/src/txn.rs | 12 + crates/rle/Cargo.toml | 2 +- 26 files changed, 1239 insertions(+), 21 deletions(-) create mode 100644 crates/loro-internal/src/container/richtext.rs create mode 100644 crates/loro-internal/src/container/richtext/query_by_len.rs create mode 100644 crates/loro-internal/src/container/richtext/richtext_state.rs create mode 100644 crates/loro-internal/src/container/richtext/style_range_map.rs create mode 100644 crates/loro-internal/src/container/richtext/tinyvec.rs create mode 100644 crates/loro-internal/src/state/richtext_state.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index fb923ab24..fd0ff3cb1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,6 +16,7 @@ "RUSTFLAGS", "smstring", "thiserror", + "tinyvec", "txns", "yspan" ], diff --git a/Cargo.lock b/Cargo.lock index 0f5c76ef4..642c559ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,9 +52,9 @@ checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" [[package]] name = "append-only-bytes" -version = "0.1.8" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd736657a12852ffb42ed309ac3409382d93f76f49ae0ad69fae4ca927e584d9" +checksum = "3c8f869514578421dd710b68b9db3e39ac9df0d9218a44641bbdcf5c99617780" [[package]] name = "arbitrary" diff --git a/crates/compact-bytes/Cargo.toml b/crates/compact-bytes/Cargo.toml index 4c7de5f40..ae26a62c8 100644 --- a/crates/compact-bytes/Cargo.toml +++ b/crates/compact-bytes/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -append-only-bytes = "0.1.8" +append-only-bytes = "0.1.11" fxhash = "0.2.1" linked-hash-map = "0.5.6" diff --git a/crates/loro-internal/Cargo.toml b/crates/loro-internal/Cargo.toml index 396e066b8..c30258715 100644 --- a/crates/loro-internal/Cargo.toml +++ b/crates/loro-internal/Cargo.toml @@ -11,25 +11,25 @@ rle = { path = "../rle" } loro-preload = { path = "../loro-preload" } loro-common = { path = "../loro-common" } smallvec = { version = "1.8.0", features = ["serde"] } -postcard = "1.0.2" +postcard = "1" fxhash = "0.2.1" serde = { version = "1", features = ["derive"] } -thiserror = "1.0.31" +thiserror = "1" enum-as-inner = "0.5.1" num = "0.4.0" crdt-list = { version = "0.4.0" } rand = { version = "0.8.5", optional = true } -arbitrary = { version = "1.1.7", optional = true } +arbitrary = { version = "1", optional = true } tabled = { version = "0.10.0", optional = true } wasm-bindgen = { version = "0.2.83", optional = true } serde-wasm-bindgen = { version = "0.5.0", optional = true } js-sys = { version = "0.3.60", optional = true } -serde_json = { version = "1.0.87", optional = true } +serde_json = { version = "1", optional = true } arref = "0.1.0" debug-log = { version = "0.1.4", features = [] } serde_columnar = { version = "0.2.5" } tracing = { version = "0.1.37" } -append-only-bytes = { version = "0.1.8", features = ["u32_range"] } +append-only-bytes = { version = "0.1.11", features = ["u32_range"] } itertools = "0.10.5" enum_dispatch = "0.3.11" im = "15.1.0" @@ -80,3 +80,7 @@ harness = false [[bench]] name = "encode" harness = false + + +[patch.crates-io] +generic-btree = { path = "../../../generic-btree" } diff --git a/crates/loro-internal/examples/automerge_x100.rs b/crates/loro-internal/examples/automerge_x100.rs index 428c0bd2c..da870faa1 100644 --- a/crates/loro-internal/examples/automerge_x100.rs +++ b/crates/loro-internal/examples/automerge_x100.rs @@ -1,21 +1,24 @@ -use loro_internal::LoroDoc; +use loro_common::ID; +use loro_internal::{version::Frontiers, LoroDoc}; fn main() { use bench_utils::TextAction; use std::time::Instant; let actions = bench_utils::get_automerge_actions(); - let loro = LoroDoc::default(); + let mut loro = LoroDoc::default(); let start = Instant::now(); // loro.subscribe_deep(Box::new(|_| ())); let text = loro.get_text("text"); for _ in 0..1 { + let mut txn = loro.txn().unwrap(); for TextAction { del, ins, pos } in actions.iter() { - let mut txn = loro.txn().unwrap(); text.delete_utf16(&mut txn, *pos, *del).unwrap(); text.insert_utf16(&mut txn, *pos, ins).unwrap(); } } + loro.checkout(&Frontiers::from(ID::new(loro.peer_id(), 100))) + .unwrap(); // loro.diagnose(); println!("{}", start.elapsed().as_millis()); } diff --git a/crates/loro-internal/src/arena.rs b/crates/loro-internal/src/arena.rs index 2eeb41e80..b88c52ecb 100644 --- a/crates/loro-internal/src/arena.rs +++ b/crates/loro-internal/src/arena.rs @@ -15,7 +15,7 @@ use crate::{ ContainerID, }, id::Counter, - op::{Op, RawOp, RawOpContent}, + op::{InnerContent, Op, RawOp, RawOpContent}, LoroValue, }; @@ -131,7 +131,22 @@ impl<'a> OpConverter<'a> { ListOp::Delete(span) => Op { counter, container, - content: crate::op::InnerContent::List(InnerListOp::Delete(span)), + content: InnerContent::List(InnerListOp::Delete(span)), + }, + ListOp::Style { + start, + end, + key, + info, + } => Op { + counter, + container, + content: InnerContent::List(InnerListOp::Style { + start, + end, + key, + info, + }), }, }, } @@ -351,6 +366,21 @@ impl SharedArena { container, content: crate::op::InnerContent::List(InnerListOp::Delete(span)), }, + ListOp::Style { + start, + end, + key, + info, + } => Op { + counter, + container, + content: InnerContent::List(InnerListOp::Style { + start, + end, + key, + info, + }), + }, }, } } diff --git a/crates/loro-internal/src/change.rs b/crates/loro-internal/src/change.rs index c822a309e..922335b17 100644 --- a/crates/loro-internal/src/change.rs +++ b/crates/loro-internal/src/change.rs @@ -166,3 +166,13 @@ pub fn get_sys_timestamp() -> Timestamp { now() as Timestamp } + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn size_of_change() { + let size = std::mem::size_of::(); + println!("{}", size); + } +} diff --git a/crates/loro-internal/src/container.rs b/crates/loro-internal/src/container.rs index 7cc167106..b52bc75ad 100644 --- a/crates/loro-internal/src/container.rs +++ b/crates/loro-internal/src/container.rs @@ -65,6 +65,7 @@ pub mod idx { pub mod list; pub mod map; +pub mod richtext; pub mod text; use idx::ContainerIdx; diff --git a/crates/loro-internal/src/container/list/list_op.rs b/crates/loro-internal/src/container/list/list_op.rs index 8c7bd1b1c..a9a64c850 100644 --- a/crates/loro-internal/src/container/list/list_op.rs +++ b/crates/loro-internal/src/container/list/list_op.rs @@ -4,22 +4,46 @@ use enum_as_inner::EnumAsInner; use rle::{HasLength, Mergable, Sliceable}; use serde::{Deserialize, Serialize}; -use crate::container::text::text_content::{ListSlice, SliceRange}; - -/// `pos` and `len` in [ListOp] are always measured in utf8 bytes for text op. +use crate::{ + container::{ + richtext::TextStyleInfo, + text::text_content::{ListSlice, SliceRange}, + }, + InternalString, +}; + +/// `len` and `pos` is measured in unicode char for text. // Note: It will be encoded into binary format, so the order of its fields should not be changed. #[derive(EnumAsInner, Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum ListOp<'a> { - Insert { slice: ListSlice<'a>, pos: usize }, + Insert { + slice: ListSlice<'a>, + pos: usize, + }, Delete(DeleteSpan), + Style { + start: u32, + end: u32, + key: InternalString, + info: TextStyleInfo, + }, } #[derive(EnumAsInner, Debug, Clone)] pub enum InnerListOp { // Note: len may not equal to slice.len() because for text len is unicode len while the slice // is utf8 bytes. - Insert { slice: SliceRange, pos: usize }, + Insert { + slice: SliceRange, + pos: usize, + }, Delete(DeleteSpan), + Style { + start: u32, + end: u32, + key: InternalString, + info: TextStyleInfo, + }, } /// `len` can be negative so that we can merge text deletions efficiently. @@ -222,6 +246,7 @@ impl<'a> Mergable for ListOp<'a> { ListOp::Delete(other_span) => span.is_mergable(other_span, &()), _ => false, }, + ListOp::Style { .. } => false, } } @@ -242,6 +267,12 @@ impl<'a> Mergable for ListOp<'a> { ListOp::Delete(other_span) => span.merge(other_span, &()), _ => unreachable!(), }, + ListOp::Style { + start, + end, + key, + info, + } => unreachable!(), } } } @@ -251,6 +282,7 @@ impl<'a> HasLength for ListOp<'a> { match self { ListOp::Insert { slice, .. } => slice.content_len(), ListOp::Delete(span) => span.atom_len(), + ListOp::Style { .. } => 1, } } } @@ -263,6 +295,7 @@ impl<'a> Sliceable for ListOp<'a> { pos: *pos + from, }, ListOp::Delete(span) => ListOp::Delete(span.slice(from, to)), + a @ ListOp::Style { .. } => a.clone(), } } } @@ -285,6 +318,7 @@ impl Mergable for InnerListOp { InnerListOp::Delete(other_span) => span.is_mergable(other_span, &()), _ => false, }, + InnerListOp::Style { .. } => false, } } @@ -305,6 +339,7 @@ impl Mergable for InnerListOp { InnerListOp::Delete(other_span) => span.merge(other_span, &()), _ => unreachable!(), }, + InnerListOp::Style { .. } => unreachable!(), } } } @@ -314,6 +349,7 @@ impl HasLength for InnerListOp { match self { InnerListOp::Insert { slice, .. } => slice.content_len(), InnerListOp::Delete(span) => span.atom_len(), + InnerListOp::Style { .. } => 1, } } } @@ -326,6 +362,7 @@ impl Sliceable for InnerListOp { pos: *pos + from, }, InnerListOp::Delete(span) => InnerListOp::Delete(span.slice(from, to)), + InnerListOp::Style { .. } => self.clone(), } } } diff --git a/crates/loro-internal/src/container/richtext.rs b/crates/loro-internal/src/container/richtext.rs new file mode 100644 index 000000000..a2c3c2a97 --- /dev/null +++ b/crates/loro-internal/src/container/richtext.rs @@ -0,0 +1,222 @@ +//! # Index +//! +//! There are several types of indexes: +//! +//! - Unicode index: the index of a unicode code point in the text. +//! - Entity index: unicode index + style anchor index. Each unicode code point or style anchor is an entity. +//! - Utf16 index +//! +//! In [crate::op::Op], we always use entity index to persist richtext ops. +//! +//! The users of this type can only operate on unicode index or utf16 index, but calculated entity index will be provided. + +mod query_by_len; +mod richtext_state; +mod style_range_map; +mod tinyvec; + +use loro_common::{Counter, LoroValue, PeerID, ID}; +use std::{ + borrow::Cow, + ops::{Range, RangeBounds}, +}; + +use crate::{change::Lamport, InternalString, VersionVector}; + +use super::list::list_op::ListOp; + +/// This is the data structure that represents a span of rich text. +/// It's used to communicate with the frontend. +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct RichtextSpan<'a> { + pub text: Cow<'a, str>, + pub styles: Vec