Skip to content

Commit 402a59a

Browse files
committed
switch to asynchronus diffing with similar
1 parent 47d3a80 commit 402a59a

File tree

8 files changed

+375
-65
lines changed

8 files changed

+375
-65
lines changed

Cargo.lock

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-vcs/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@ homepage = "https://helix-editor.com"
1111
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1212

1313
[dependencies]
14+
1415
git-repository = {version = "0.23.1", default-features = false}
16+
tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "parking_lot", "macros"] }
17+
similar = "2.2"
18+
ropey = { version = "1.5", default-features = false, features = ["simd"] }
19+
arc-swap = "1"
20+
1521

1622
[dev-dependencies]
1723
tempfile = "3.3"

helix-vcs/src/differ.rs

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
use std::mem::take;
2+
use std::ops::Deref;
3+
use std::sync::Arc;
4+
5+
use arc_swap::ArcSwap;
6+
use ropey::{Rope, RopeSlice};
7+
use similar::{capture_diff_slices_deadline, Algorithm, DiffTag};
8+
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
9+
use tokio::task::JoinHandle;
10+
use tokio::time::{timeout_at, Duration, Instant};
11+
12+
use crate::rope_line_cache::RopeLineCache;
13+
use crate::{LineDiff, LineDiffs};
14+
15+
#[cfg(test)]
16+
mod test;
17+
18+
#[derive(Clone, Debug)]
19+
pub struct Differ {
20+
channel: UnboundedSender<Event>,
21+
line_diffs: Arc<ArcSwap<LineDiffs>>,
22+
}
23+
24+
impl Differ {
25+
pub fn new(diff_base: Rope, doc: Rope) -> Differ {
26+
Differ::new_with_handle(diff_base, doc).0
27+
}
28+
29+
fn new_with_handle(diff_base: Rope, doc: Rope) -> (Differ, JoinHandle<()>) {
30+
let (sender, reciver) = unbounded_channel();
31+
let line_diffs: Arc<ArcSwap<LineDiffs>> = Arc::default();
32+
let worker = DiffWorker {
33+
channel: reciver,
34+
line_diffs: line_diffs.clone(),
35+
new_line_diffs: LineDiffs::default(),
36+
};
37+
let handle = tokio::spawn(worker.run(diff_base, doc));
38+
let differ = Differ {
39+
channel: sender,
40+
line_diffs,
41+
};
42+
(differ, handle)
43+
}
44+
pub fn get_line_diffs(&self) -> impl Deref<Target = impl Deref<Target = LineDiffs>> {
45+
self.line_diffs.load()
46+
}
47+
48+
pub fn update_document(&self, doc: Rope) -> bool {
49+
self.channel.send(Event::UpdateDocument(doc)).is_ok()
50+
}
51+
52+
pub fn update_diff_base(&self, diff_base: Rope) -> bool {
53+
self.channel.send(Event::UpdateDiffBase(diff_base)).is_ok()
54+
}
55+
}
56+
57+
// TODO configuration
58+
const DIFF_MAX_DEBOUNCE: u64 = 200;
59+
const DIFF_DEBOUNCE: u64 = 10;
60+
const DIFF_TIMEOUT: u64 = 200;
61+
const MAX_DIFF_LEN: usize = 40000;
62+
const ALGORITHM: Algorithm = Algorithm::Myers;
63+
64+
struct DiffWorker {
65+
channel: UnboundedReceiver<Event>,
66+
line_diffs: Arc<ArcSwap<LineDiffs>>,
67+
new_line_diffs: LineDiffs,
68+
}
69+
70+
impl DiffWorker {
71+
async fn run(mut self, diff_base: Rope, doc: Rope) {
72+
let mut diff_base = RopeLineCache::new(diff_base);
73+
let mut doc = RopeLineCache::new(doc);
74+
self.perform_diff(diff_base.lines(), doc.lines());
75+
self.apply_line_diff();
76+
while let Some(event) = self.channel.recv().await {
77+
let mut accumulator = EventAccumulator::new();
78+
accumulator.handle_event(event);
79+
accumulator
80+
.accumualte_debounced_events(&mut self.channel)
81+
.await;
82+
83+
if let Some(new_doc) = accumulator.doc {
84+
doc.update(new_doc)
85+
}
86+
if let Some(new_base) = accumulator.diff_base {
87+
diff_base.update(new_base)
88+
}
89+
90+
self.perform_diff(diff_base.lines(), doc.lines());
91+
self.apply_line_diff();
92+
}
93+
}
94+
95+
/// update the line diff (used by the gutter) by replacing it with `self.new_line_diffs`.
96+
/// `self.new_line_diffs` is always empty after this function runs.
97+
/// To improve performance this function trys to reuse the allocation of the old diff previously stored in `self.line_diffs`
98+
fn apply_line_diff(&mut self) {
99+
let diff_to_apply = take(&mut self.new_line_diffs);
100+
let old_line_diff = self.line_diffs.swap(Arc::new(diff_to_apply));
101+
if let Ok(mut cached_alloc) = Arc::try_unwrap(old_line_diff) {
102+
cached_alloc.clear();
103+
self.new_line_diffs = cached_alloc;
104+
}
105+
}
106+
107+
fn perform_diff(&mut self, diff_base: &[RopeSlice<'_>], doc: &[RopeSlice<'_>]) {
108+
if diff_base.len() > MAX_DIFF_LEN || doc.len() > MAX_DIFF_LEN {
109+
return;
110+
}
111+
// TODO allow configuration algorithm
112+
// TODO configure diff deadline
113+
114+
let diff = capture_diff_slices_deadline(
115+
ALGORITHM,
116+
diff_base,
117+
doc,
118+
Some(std::time::Instant::now() + std::time::Duration::from_millis(DIFF_TIMEOUT)),
119+
);
120+
for op in diff {
121+
let (tag, _, line_range) = op.as_tag_tuple();
122+
let op = match tag {
123+
DiffTag::Insert => LineDiff::Added,
124+
DiffTag::Replace => LineDiff::Modified,
125+
DiffTag::Delete => {
126+
self.add_line_diff(line_range.start, LineDiff::Deleted);
127+
continue;
128+
}
129+
DiffTag::Equal => continue,
130+
};
131+
132+
for line in line_range {
133+
self.add_line_diff(line, op)
134+
}
135+
}
136+
}
137+
138+
fn add_line_diff(&mut self, line: usize, op: LineDiff) {
139+
self.new_line_diffs.insert(line, op);
140+
}
141+
}
142+
143+
struct EventAccumulator {
144+
diff_base: Option<Rope>,
145+
doc: Option<Rope>,
146+
}
147+
impl EventAccumulator {
148+
fn new() -> EventAccumulator {
149+
EventAccumulator {
150+
diff_base: None,
151+
doc: None,
152+
}
153+
}
154+
fn handle_event(&mut self, event: Event) {
155+
match event {
156+
Event::UpdateDocument(doc) => self.doc = Some(doc),
157+
Event::UpdateDiffBase(new_diff_base) => self.diff_base = Some(new_diff_base),
158+
}
159+
}
160+
async fn accumualte_debounced_events(&mut self, channel: &mut UnboundedReceiver<Event>) {
161+
let final_time = Instant::now() + Duration::from_millis(DIFF_MAX_DEBOUNCE);
162+
let debounce = Duration::from_millis(DIFF_DEBOUNCE);
163+
loop {
164+
let mut debounce = Instant::now() + debounce;
165+
if final_time < debounce {
166+
debounce = final_time;
167+
}
168+
if let Ok(Some(event)) = timeout_at(debounce, channel.recv()).await {
169+
self.handle_event(event)
170+
} else {
171+
break;
172+
}
173+
}
174+
}
175+
}
176+
177+
enum Event {
178+
UpdateDocument(Rope),
179+
UpdateDiffBase(Rope),
180+
}

helix-vcs/src/differ/test.rs

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
use ropey::Rope;
2+
use tokio::task::JoinHandle;
3+
4+
use crate::{Differ, LineDiff};
5+
6+
impl Differ {
7+
fn new_test(diff_base: &str, doc: &str) -> (Differ, JoinHandle<()>) {
8+
Differ::new_with_handle(Rope::from_str(diff_base), Rope::from_str(doc))
9+
}
10+
async fn into_diff(self, handle: JoinHandle<()>) -> Vec<(usize, LineDiff)> {
11+
let line_diffs = self.line_diffs;
12+
// dropping th echannel terminates the task
13+
drop(self.channel);
14+
handle.await.unwrap();
15+
let diffs = line_diffs.load();
16+
let mut res: Vec<_> = diffs.iter().map(|(&line, &op)| (line, op)).collect();
17+
res.sort_unstable_by_key(|&(line, _)| line);
18+
res
19+
}
20+
}
21+
22+
#[tokio::test]
23+
async fn append_line() {
24+
let (differ, handle) = Differ::new_test("foo\n", "foo\nbar\n");
25+
let line_diffs = differ.into_diff(handle).await;
26+
assert_eq!(&line_diffs, &[(1, LineDiff::Added)])
27+
}
28+
29+
#[tokio::test]
30+
async fn prepend_line() {
31+
let (differ, handle) = Differ::new_test("foo\n", "bar\nfoo\n");
32+
let line_diffs = differ.into_diff(handle).await;
33+
assert_eq!(&line_diffs, &[(0, LineDiff::Added)])
34+
}
35+
36+
#[tokio::test]
37+
async fn modify() {
38+
let (differ, handle) = Differ::new_test("foo\nbar\n", "foo bar\nbar\n");
39+
let line_diffs = differ.into_diff(handle).await;
40+
assert_eq!(&line_diffs, &[(0, LineDiff::Modified)])
41+
}
42+
43+
#[tokio::test]
44+
async fn delete_line() {
45+
let (differ, handle) = Differ::new_test("foo\nfoo bar\nbar\n", "foo\nbar\n");
46+
let line_diffs = differ.into_diff(handle).await;
47+
assert_eq!(&line_diffs, &[(1, LineDiff::Deleted)])
48+
}
49+
50+
#[tokio::test]
51+
async fn delete_line_and_modify() {
52+
let (differ, handle) = Differ::new_test("foo\nbar\ntest\nfoo", "foo\ntest\nfoo bar");
53+
let line_diffs = differ.into_diff(handle).await;
54+
assert_eq!(
55+
&line_diffs,
56+
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
57+
)
58+
}
59+
60+
#[tokio::test]
61+
async fn add_use() {
62+
let (differ, handle) = Differ::new_test(
63+
"use ropey::Rope;\nuse tokio::task::JoinHandle;\n",
64+
"use ropey::Rope;\nuse ropey::RopeSlice;\nuse tokio::task::JoinHandle;\n",
65+
);
66+
let line_diffs = differ.into_diff(handle).await;
67+
assert_eq!(&line_diffs, &[(1, LineDiff::Added)])
68+
}
69+
70+
#[tokio::test]
71+
async fn update_document() {
72+
let (differ, handle) = Differ::new_test("foo\nbar\ntest\nfoo", "foo\nbar\ntest\nfoo");
73+
differ.update_document(Rope::from_str("foo\ntest\nfoo bar"));
74+
let line_diffs = differ.into_diff(handle).await;
75+
assert_eq!(
76+
&line_diffs,
77+
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
78+
)
79+
}
80+
81+
#[tokio::test]
82+
async fn update_base() {
83+
let (differ, handle) = Differ::new_test("foo\ntest\nfoo bar", "foo\ntest\nfoo bar");
84+
differ.update_diff_base(Rope::from_str("foo\nbar\ntest\nfoo"));
85+
let line_diffs = differ.into_diff(handle).await;
86+
assert_eq!(
87+
&line_diffs,
88+
&[(1, LineDiff::Deleted), (2, LineDiff::Modified)]
89+
)
90+
}

helix-vcs/src/lib.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
use std::{collections::HashMap, path::Path};
22

3+
pub use differ::Differ;
34
pub use git::Git;
45

6+
mod differ;
57
mod git;
8+
mod rope_line_cache;
69

710
// TODO: Move to helix_core once we have a generic diff mode
8-
#[derive(Copy, Clone, Debug)]
11+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
912
pub enum LineDiff {
1013
Added,
1114
Deleted,

helix-vcs/src/rope_line_cache.rs

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//! This modules encapsulates a tiny bit of unsafe code that
2+
//! makes diffing signifcantly faster and more ergonomic to implement.
3+
//! This code is necessaty because diffing requires quick random
4+
//! access to the lines of the text that is being diffed.
5+
//!
6+
//! Therefore it is best to collect the `Rope::lines` iterator into a vec
7+
//! first because access to the vec is `O(1)` where `Rope::line` is `O(log N)`.
8+
//! However this process can allocate a (potentially quite large) vector.
9+
//!
10+
//! To avoid realoction for every diff, the vector is reused.
11+
//! However the RopeSlice references the original rope and therefore forms a self-referential data structure.
12+
//! A transmute is used to change the lifetime of the slice to static to circumwent that project.
13+
use std::mem::transmute;
14+
15+
use ropey::{Rope, RopeSlice};
16+
17+
/// A cache that stores the `lines` of a rope as a vector.
18+
/// It allows safely reusing the allocation of the vec when updating the rope
19+
pub(crate) struct RopeLineCache {
20+
rope: Rope,
21+
lines: Vec<RopeSlice<'static>>,
22+
}
23+
24+
impl RopeLineCache {
25+
pub fn new(rope: Rope) -> RopeLineCache {
26+
let mut res = RopeLineCache {
27+
rope,
28+
lines: Vec::new(),
29+
};
30+
res.update_lines();
31+
res
32+
}
33+
34+
pub fn update(&mut self, rope: Rope) {
35+
self.lines.clear();
36+
self.rope = rope;
37+
self.update_lines()
38+
}
39+
40+
fn update_lines(&mut self) {
41+
debug_assert_eq!(self.lines.len(), 0);
42+
// Safety: This transmute is save because it only transmutes a liftime which have no effect.
43+
// The backing storage for the RopeSlices referred to by the lifetime is stored in `self.rope`.
44+
// Therefore as long as `self.rope` is not dropped/replaced this memory remains valid.
45+
// `self.rope` is only changed `self.update`, which clear the generated slices.
46+
// Furthermore when these slices are exposed to consumer in `self.lines`, the lifetime is bounded to a reference to self.
47+
// That means that on calls to update there exist no references to the slices in `self.lines`.
48+
let lines = self
49+
.rope
50+
.lines()
51+
.map(|line: RopeSlice| -> RopeSlice<'static> { unsafe { transmute(line) } });
52+
self.lines.extend(lines);
53+
54+
// if self
55+
// .lines
56+
// .last()
57+
// .and_then(|last| last.as_str())
58+
// .map_or(false, |last| last.is_empty())
59+
// {
60+
// self.lines.pop();
61+
// }
62+
}
63+
64+
// pub fn rope(&self) -> &Rope {
65+
// &self.rope
66+
// }
67+
68+
pub fn lines(&self) -> &[RopeSlice] {
69+
&self.lines
70+
}
71+
}

0 commit comments

Comments
 (0)