Skip to content

Commit ab44f45

Browse files
authored
Merge pull request #2197 from cruessler/add-tests-for-slider-problem
Add script to generate tests cases for `gix-diff`
2 parents 385ab16 + fb2ee84 commit ab44f45

File tree

15 files changed

+535
-4
lines changed

15 files changed

+535
-4
lines changed

gix-diff/src/blob/unified_diff/impls.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,8 @@ where
260260
}
261261

262262
impl DiffLineKind {
263-
const fn to_prefix(self) -> char {
263+
/// Returns a one-character representation for use in unified diffs.
264+
pub const fn to_prefix(self) -> char {
264265
match self {
265266
DiffLineKind::Context => ' ',
266267
DiffLineKind::Add => '+',

gix-diff/tests/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
## How to run diff-slider tests
2+
3+
The idea is to use https://github.com/mhagger/diff-slider-tools to create slider information for use to generate a test which invokes our own implementation and compares it to Git itself.
4+
Follow these instructions to set it up.
5+
6+
1. DIFF_SLIDER_TOOLS=/your/anticipated/path/to/diff-slider-tools
7+
2. `git clone https://github.com/mhagger/diff-slider-tools $DIFF_SLIDER_TOOLS`
8+
3. `pushd $DIFF_SLIDER_TOOLS`
9+
4. Follow [these instructions](https://github.com/mhagger/diff-slider-tools/blob/b59ed13d7a2a6cfe14a8f79d434b6221cc8b04dd/README.md?plain=1#L122-L146) to generate a file containing the slider information. Be sure to set the `repo` variable as it's used in later script invocations.
10+
- Note that `get-corpus` must be run with `./get-corpus`.
11+
- You can use an existing repository, for instance via `repo=git-human`, so there is no need to find your own repository to test.
12+
- The script suite is very slow, and it's recommended to only set a range of commits, or use a small repository for testing.
13+
14+
Finally, run the `internal-tools` program to turn that file into a fixture called `make_diff_for_sliders_repo.sh`.
15+
16+
```shell
17+
# run inside `gitoxide`
18+
popd
19+
cargo run --package internal-tools -- \
20+
create-diff-cases \
21+
--sliders-file $DIFF_SLIDER_TOOLS/corpus/$repo.sliders \
22+
--worktree-dir $DIFF_SLIDER_TOOLS/corpus/$repo.git/ \
23+
--destination-dir gix-diff/tests/fixtures/
24+
```
25+
26+
Finally, run `cargo test -p gix-diff-tests sliders -- --nocapture` to execute the actual tests to compare.

gix-diff/tests/diff/blob/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pub(crate) mod pipeline;
22
mod platform;
3+
mod slider;
34
mod unified_diff;

gix-diff/tests/diff/blob/slider.rs

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
use gix_diff::blob::intern::TokenSource;
2+
use gix_diff::blob::unified_diff::ContextSize;
3+
use gix_diff::blob::{Algorithm, UnifiedDiff};
4+
use gix_testtools::bstr::{BString, ByteVec};
5+
6+
#[test]
7+
fn baseline() -> gix_testtools::Result {
8+
let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?;
9+
let asset_dir = worktree_path.join("assets");
10+
11+
let dir = std::fs::read_dir(&worktree_path)?;
12+
13+
let mut count = 0;
14+
for entry in dir {
15+
let entry = entry?;
16+
let file_name = entry.file_name().into_string().expect("to be string");
17+
18+
if !file_name.ends_with(".baseline") {
19+
continue;
20+
}
21+
count += 1;
22+
23+
let parts: Vec<_> = file_name.split('.').collect();
24+
let [name, algorithm, ..] = parts[..] else {
25+
unreachable!()
26+
};
27+
let algorithm = match algorithm {
28+
"myers" => Algorithm::Myers,
29+
"histogram" => Algorithm::Histogram,
30+
_ => unreachable!(),
31+
};
32+
33+
let parts: Vec<_> = name.split('-').collect();
34+
let [old_blob_id, new_blob_id] = parts[..] else {
35+
unreachable!();
36+
};
37+
38+
let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?;
39+
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?;
40+
41+
let interner = gix_diff::blob::intern::InternedInput::new(
42+
tokens_for_diffing(old_data.as_slice()),
43+
tokens_for_diffing(new_data.as_slice()),
44+
);
45+
46+
let actual = gix_diff::blob::diff(
47+
algorithm,
48+
&interner,
49+
UnifiedDiff::new(
50+
&interner,
51+
baseline::DiffHunkRecorder::new(),
52+
ContextSize::symmetrical(3),
53+
),
54+
)?;
55+
56+
let baseline_path = worktree_path.join(file_name);
57+
let baseline = std::fs::read(baseline_path)?;
58+
let baseline = baseline::Baseline::new(&baseline);
59+
60+
let actual = actual
61+
.iter()
62+
.fold(BString::default(), |mut acc, diff_hunk| {
63+
acc.push_str(diff_hunk.header.to_string().as_str());
64+
acc.push(b'\n');
65+
66+
acc.extend_from_slice(&diff_hunk.lines);
67+
68+
acc
69+
})
70+
.to_string();
71+
72+
let baseline = baseline
73+
.fold(BString::default(), |mut acc, diff_hunk| {
74+
acc.push_str(diff_hunk.header.to_string().as_str());
75+
acc.push(b'\n');
76+
77+
acc.extend_from_slice(&diff_hunk.lines);
78+
79+
acc
80+
})
81+
.to_string();
82+
83+
pretty_assertions::assert_eq!(actual, baseline);
84+
}
85+
86+
if count == 0 {
87+
eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions");
88+
}
89+
90+
Ok(())
91+
}
92+
93+
fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
94+
gix_diff::blob::sources::byte_lines(data)
95+
}
96+
97+
mod baseline {
98+
use gix_object::bstr::ByteSlice;
99+
use std::iter::Peekable;
100+
101+
use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader};
102+
use gix_object::bstr::{self, BString};
103+
104+
static START_OF_HEADER: &[u8; 4] = b"@@ -";
105+
106+
#[derive(Debug, PartialEq)]
107+
pub struct DiffHunk {
108+
pub header: HunkHeader,
109+
pub lines: BString,
110+
}
111+
112+
pub struct DiffHunkRecorder {
113+
inner: Vec<DiffHunk>,
114+
}
115+
116+
impl DiffHunkRecorder {
117+
pub fn new() -> Self {
118+
Self { inner: Vec::new() }
119+
}
120+
}
121+
122+
impl ConsumeHunk for DiffHunkRecorder {
123+
type Out = Vec<DiffHunk>;
124+
125+
fn consume_hunk(
126+
&mut self,
127+
header: HunkHeader,
128+
lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])],
129+
) -> std::io::Result<()> {
130+
let mut buf = Vec::new();
131+
132+
for &(kind, line) in lines {
133+
buf.push(kind.to_prefix() as u8);
134+
buf.extend_from_slice(line);
135+
buf.push(b'\n');
136+
}
137+
138+
let diff_hunk = DiffHunk {
139+
header,
140+
lines: buf.into(),
141+
};
142+
143+
self.inner.push(diff_hunk);
144+
145+
Ok(())
146+
}
147+
148+
fn finish(self) -> Self::Out {
149+
self.inner
150+
}
151+
}
152+
153+
type Lines<'a> = Peekable<bstr::Lines<'a>>;
154+
155+
pub struct Baseline<'a> {
156+
lines: Lines<'a>,
157+
}
158+
159+
impl<'a> Baseline<'a> {
160+
pub fn new(content: &'a [u8]) -> Baseline<'a> {
161+
let mut lines = content.lines().peekable();
162+
skip_header(&mut lines);
163+
Baseline { lines }
164+
}
165+
}
166+
167+
impl Iterator for Baseline<'_> {
168+
type Item = DiffHunk;
169+
170+
fn next(&mut self) -> Option<Self::Item> {
171+
let mut hunk_header = None;
172+
let mut hunk_lines = Vec::new();
173+
174+
while let Some(line) = self.lines.next() {
175+
if line.starts_with(START_OF_HEADER) {
176+
assert!(hunk_header.is_none(), "should not overwrite existing hunk_header");
177+
hunk_header = parse_hunk_header(line).ok();
178+
179+
continue;
180+
}
181+
182+
match line[0] {
183+
b' ' | b'+' | b'-' => {
184+
hunk_lines.extend_from_slice(line);
185+
hunk_lines.push(b'\n');
186+
}
187+
_ => unreachable!("BUG: expecting unified diff format"),
188+
}
189+
190+
match self.lines.peek() {
191+
Some(next_line) if next_line.starts_with(START_OF_HEADER) => break,
192+
None => break,
193+
_ => {}
194+
}
195+
}
196+
197+
hunk_header.map(|hunk_header| DiffHunk {
198+
header: hunk_header,
199+
lines: hunk_lines.into(),
200+
})
201+
}
202+
}
203+
204+
fn skip_header(lines: &mut Lines) {
205+
// diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
206+
// index ccccccc..ddddddd 100644
207+
// --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
208+
// +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
209+
210+
let line = lines.next().expect("line to be present");
211+
assert!(line.starts_with(b"diff --git "));
212+
213+
let line = lines.next().expect("line to be present");
214+
assert!(line.starts_with(b"index "));
215+
216+
let line = lines.next().expect("line to be present");
217+
assert!(line.starts_with(b"--- "));
218+
219+
let line = lines.next().expect("line to be present");
220+
assert!(line.starts_with(b"+++ "));
221+
}
222+
223+
/// Parse diff hunk headers that conform to the unified diff hunk header format.
224+
///
225+
/// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This
226+
/// allows us to split the input on ` ` and `,` only.
227+
///
228+
/// @@ -18,6 +18,7 @@ abc def ghi
229+
/// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@
230+
fn parse_hunk_header(line: &[u8]) -> gix_testtools::Result<HunkHeader> {
231+
let line = line.strip_prefix(START_OF_HEADER).unwrap();
232+
233+
let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect();
234+
let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else {
235+
unreachable!()
236+
};
237+
238+
Ok(HunkHeader {
239+
before_hunk_start: parse_number(before_hunk_start),
240+
before_hunk_len: parse_number(before_hunk_len),
241+
after_hunk_start: parse_number(after_hunk_start),
242+
after_hunk_len: parse_number(after_hunk_len),
243+
})
244+
}
245+
246+
fn parse_number(bytes: &[u8]) -> u32 {
247+
bytes
248+
.to_str()
249+
.expect("to be a valid UTF-8 string")
250+
.parse::<u32>()
251+
.expect("to be a number")
252+
}
253+
}

gix-diff/tests/fixtures/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# The slider assets that are needed for that slider fixture script to work and build the fixture.
2+
/assets/
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# The auto-generated sliders fixtures. For now it's experimental, but we may store it later once it's all working.
2+
/make_diff_for_sliders_repo.tar
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env bash
2+
set -eu -o pipefail
3+
4+
# This script is a placeholder for a script generated by the
5+
# `create-diff-cases` subcommand of `internal-tools`. The placeholder does
6+
# nothing except making the associated test trivially pass.
7+
#
8+
# See the `gix-diff/tests/README.md` file for more information.

gix/src/repository/location.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,36 @@ impl crate::Repository {
5858
self.work_tree.as_deref()
5959
}
6060

61+
/// Forcefully set the given `workdir` to be the worktree of this repository, *in memory*,
62+
/// no matter if it had one or not, or unset it with `None`.
63+
/// Return the previous working directory if one existed.
64+
///
65+
/// Fail if the `workdir`, if not `None`, isn't accessible or isn't a directory.
66+
/// No change is performed on error.
67+
///
68+
/// ### About Worktrees
69+
///
70+
/// * When setting a main worktree to a linked worktree directory, this repository instance
71+
/// will still claim that it is the [main worktree](crate::Worktree::is_main()) as that depends
72+
/// on the `git_dir`, not the worktree dir.
73+
/// * When setting a linked worktree to a main worktree directory, this repository instance
74+
/// will still claim that it is *not* a [main worktree](crate::Worktree::is_main()) as that depends
75+
/// on the `git_dir`, not the worktree dir.
76+
#[doc(alias = "git2")]
77+
pub fn set_workdir(&mut self, workdir: impl Into<Option<PathBuf>>) -> Result<Option<PathBuf>, std::io::Error> {
78+
let workdir = workdir.into();
79+
Ok(match workdir {
80+
None => self.work_tree.take(),
81+
Some(new_workdir) => {
82+
_ = std::fs::read_dir(&new_workdir)?;
83+
84+
let old = self.work_tree.take();
85+
self.work_tree = Some(new_workdir);
86+
old
87+
}
88+
})
89+
}
90+
6191
/// Return the work tree containing all checked out files, if there is one.
6292
pub fn workdir(&self) -> Option<&std::path::Path> {
6393
self.work_tree.as_deref()

gix/src/repository/worktree.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ impl crate::Repository {
3030
res.sort_by(|a, b| a.git_dir.cmp(&b.git_dir));
3131
Ok(res)
3232
}
33+
3334
/// Return the repository owning the main worktree, typically from a linked worktree.
3435
///
3536
/// Note that it might be the one that is currently open if this repository doesn't point to a linked worktree.
@@ -41,7 +42,7 @@ impl crate::Repository {
4142

4243
/// Return the currently set worktree if there is one, acting as platform providing a validated worktree base path.
4344
///
44-
/// Note that there would be `None` if this repository is `bare` and the parent [`Repository`][crate::Repository] was instantiated without
45+
/// Note that there would be `None` if this repository is `bare` and the parent [`Repository`](crate::Repository) was instantiated without
4546
/// registered worktree in the current working dir, even if no `.git` file or directory exists.
4647
/// It's merely based on configuration, see [Worktree::dot_git_exists()] for a way to perform more validation.
4748
pub fn worktree(&self) -> Option<Worktree<'_>> {
@@ -50,7 +51,7 @@ impl crate::Repository {
5051

5152
/// Return true if this repository is bare, and has no main work tree.
5253
///
53-
/// This is not to be confused with the [`worktree()`][crate::Repository::worktree()] worktree, which may exists if this instance
54+
/// This is not to be confused with the [`worktree()`](crate::Repository::worktree()) method, which may exist if this instance
5455
/// was opened in a worktree that was created separately.
5556
pub fn is_bare(&self) -> bool {
5657
self.config.is_bare && self.workdir().is_none()

0 commit comments

Comments
 (0)