|
| 1 | +use gix_diff::blob::intern::TokenSource; |
| 2 | +use gix_diff::blob::unified_diff::ContextSize; |
| 3 | +use gix_diff::blob::{Algorithm, UnifiedDiff}; |
| 4 | +use gix_testtools::bstr::{BString, ByteVec}; |
| 5 | + |
| 6 | +#[test] |
| 7 | +fn baseline() -> gix_testtools::Result { |
| 8 | + let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?; |
| 9 | + let asset_dir = worktree_path.join("assets"); |
| 10 | + |
| 11 | + let dir = std::fs::read_dir(&worktree_path)?; |
| 12 | + |
| 13 | + let mut count = 0; |
| 14 | + for entry in dir { |
| 15 | + let entry = entry?; |
| 16 | + let file_name = entry.file_name().into_string().expect("to be string"); |
| 17 | + |
| 18 | + if !file_name.ends_with(".baseline") { |
| 19 | + continue; |
| 20 | + } |
| 21 | + count += 1; |
| 22 | + |
| 23 | + let parts: Vec<_> = file_name.split('.').collect(); |
| 24 | + let [name, algorithm, ..] = parts[..] else { |
| 25 | + unreachable!() |
| 26 | + }; |
| 27 | + let algorithm = match algorithm { |
| 28 | + "myers" => Algorithm::Myers, |
| 29 | + "histogram" => Algorithm::Histogram, |
| 30 | + _ => unreachable!(), |
| 31 | + }; |
| 32 | + |
| 33 | + let parts: Vec<_> = name.split('-').collect(); |
| 34 | + let [old_blob_id, new_blob_id] = parts[..] else { |
| 35 | + unreachable!(); |
| 36 | + }; |
| 37 | + |
| 38 | + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; |
| 39 | + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; |
| 40 | + |
| 41 | + let interner = gix_diff::blob::intern::InternedInput::new( |
| 42 | + tokens_for_diffing(old_data.as_slice()), |
| 43 | + tokens_for_diffing(new_data.as_slice()), |
| 44 | + ); |
| 45 | + |
| 46 | + let actual = gix_diff::blob::diff( |
| 47 | + algorithm, |
| 48 | + &interner, |
| 49 | + UnifiedDiff::new( |
| 50 | + &interner, |
| 51 | + baseline::DiffHunkRecorder::new(), |
| 52 | + ContextSize::symmetrical(3), |
| 53 | + ), |
| 54 | + )?; |
| 55 | + |
| 56 | + let baseline_path = worktree_path.join(file_name); |
| 57 | + let baseline = std::fs::read(baseline_path)?; |
| 58 | + let baseline = baseline::Baseline::new(&baseline); |
| 59 | + |
| 60 | + let actual = actual |
| 61 | + .iter() |
| 62 | + .fold(BString::default(), |mut acc, diff_hunk| { |
| 63 | + acc.push_str(diff_hunk.header.to_string().as_str()); |
| 64 | + acc.push(b'\n'); |
| 65 | + |
| 66 | + acc.extend_from_slice(&diff_hunk.lines); |
| 67 | + |
| 68 | + acc |
| 69 | + }) |
| 70 | + .to_string(); |
| 71 | + |
| 72 | + let baseline = baseline |
| 73 | + .fold(BString::default(), |mut acc, diff_hunk| { |
| 74 | + acc.push_str(diff_hunk.header.to_string().as_str()); |
| 75 | + acc.push(b'\n'); |
| 76 | + |
| 77 | + acc.extend_from_slice(&diff_hunk.lines); |
| 78 | + |
| 79 | + acc |
| 80 | + }) |
| 81 | + .to_string(); |
| 82 | + |
| 83 | + pretty_assertions::assert_eq!(actual, baseline); |
| 84 | + } |
| 85 | + |
| 86 | + if count == 0 { |
| 87 | + eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions"); |
| 88 | + } |
| 89 | + |
| 90 | + Ok(()) |
| 91 | +} |
| 92 | + |
| 93 | +fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> { |
| 94 | + gix_diff::blob::sources::byte_lines(data) |
| 95 | +} |
| 96 | + |
| 97 | +mod baseline { |
| 98 | + use gix_object::bstr::ByteSlice; |
| 99 | + use std::iter::Peekable; |
| 100 | + |
| 101 | + use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader}; |
| 102 | + use gix_object::bstr::{self, BString}; |
| 103 | + |
| 104 | + static START_OF_HEADER: &[u8; 4] = b"@@ -"; |
| 105 | + |
| 106 | + #[derive(Debug, PartialEq)] |
| 107 | + pub struct DiffHunk { |
| 108 | + pub header: HunkHeader, |
| 109 | + pub lines: BString, |
| 110 | + } |
| 111 | + |
| 112 | + pub struct DiffHunkRecorder { |
| 113 | + inner: Vec<DiffHunk>, |
| 114 | + } |
| 115 | + |
| 116 | + impl DiffHunkRecorder { |
| 117 | + pub fn new() -> Self { |
| 118 | + Self { inner: Vec::new() } |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + impl ConsumeHunk for DiffHunkRecorder { |
| 123 | + type Out = Vec<DiffHunk>; |
| 124 | + |
| 125 | + fn consume_hunk( |
| 126 | + &mut self, |
| 127 | + header: HunkHeader, |
| 128 | + lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], |
| 129 | + ) -> std::io::Result<()> { |
| 130 | + let mut buf = Vec::new(); |
| 131 | + |
| 132 | + for &(kind, line) in lines { |
| 133 | + buf.push(kind.to_prefix() as u8); |
| 134 | + buf.extend_from_slice(line); |
| 135 | + buf.push(b'\n'); |
| 136 | + } |
| 137 | + |
| 138 | + let diff_hunk = DiffHunk { |
| 139 | + header, |
| 140 | + lines: buf.into(), |
| 141 | + }; |
| 142 | + |
| 143 | + self.inner.push(diff_hunk); |
| 144 | + |
| 145 | + Ok(()) |
| 146 | + } |
| 147 | + |
| 148 | + fn finish(self) -> Self::Out { |
| 149 | + self.inner |
| 150 | + } |
| 151 | + } |
| 152 | + |
| 153 | + type Lines<'a> = Peekable<bstr::Lines<'a>>; |
| 154 | + |
| 155 | + pub struct Baseline<'a> { |
| 156 | + lines: Lines<'a>, |
| 157 | + } |
| 158 | + |
| 159 | + impl<'a> Baseline<'a> { |
| 160 | + pub fn new(content: &'a [u8]) -> Baseline<'a> { |
| 161 | + let mut lines = content.lines().peekable(); |
| 162 | + skip_header(&mut lines); |
| 163 | + Baseline { lines } |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + impl Iterator for Baseline<'_> { |
| 168 | + type Item = DiffHunk; |
| 169 | + |
| 170 | + fn next(&mut self) -> Option<Self::Item> { |
| 171 | + let mut hunk_header = None; |
| 172 | + let mut hunk_lines = Vec::new(); |
| 173 | + |
| 174 | + while let Some(line) = self.lines.next() { |
| 175 | + if line.starts_with(START_OF_HEADER) { |
| 176 | + assert!(hunk_header.is_none(), "should not overwrite existing hunk_header"); |
| 177 | + hunk_header = parse_hunk_header(line).ok(); |
| 178 | + |
| 179 | + continue; |
| 180 | + } |
| 181 | + |
| 182 | + match line[0] { |
| 183 | + b' ' | b'+' | b'-' => { |
| 184 | + hunk_lines.extend_from_slice(line); |
| 185 | + hunk_lines.push(b'\n'); |
| 186 | + } |
| 187 | + _ => unreachable!("BUG: expecting unified diff format"), |
| 188 | + } |
| 189 | + |
| 190 | + match self.lines.peek() { |
| 191 | + Some(next_line) if next_line.starts_with(START_OF_HEADER) => break, |
| 192 | + None => break, |
| 193 | + _ => {} |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + hunk_header.map(|hunk_header| DiffHunk { |
| 198 | + header: hunk_header, |
| 199 | + lines: hunk_lines.into(), |
| 200 | + }) |
| 201 | + } |
| 202 | + } |
| 203 | + |
| 204 | + fn skip_header(lines: &mut Lines) { |
| 205 | + // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb |
| 206 | + // index ccccccc..ddddddd 100644 |
| 207 | + // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa |
| 208 | + // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb |
| 209 | + |
| 210 | + let line = lines.next().expect("line to be present"); |
| 211 | + assert!(line.starts_with(b"diff --git ")); |
| 212 | + |
| 213 | + let line = lines.next().expect("line to be present"); |
| 214 | + assert!(line.starts_with(b"index ")); |
| 215 | + |
| 216 | + let line = lines.next().expect("line to be present"); |
| 217 | + assert!(line.starts_with(b"--- ")); |
| 218 | + |
| 219 | + let line = lines.next().expect("line to be present"); |
| 220 | + assert!(line.starts_with(b"+++ ")); |
| 221 | + } |
| 222 | + |
| 223 | + /// Parse diff hunk headers that conform to the unified diff hunk header format. |
| 224 | + /// |
| 225 | + /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This |
| 226 | + /// allows us to split the input on ` ` and `,` only. |
| 227 | + /// |
| 228 | + /// @@ -18,6 +18,7 @@ abc def ghi |
| 229 | + /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ |
| 230 | + fn parse_hunk_header(line: &[u8]) -> gix_testtools::Result<HunkHeader> { |
| 231 | + let line = line.strip_prefix(START_OF_HEADER).unwrap(); |
| 232 | + |
| 233 | + let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); |
| 234 | + let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { |
| 235 | + unreachable!() |
| 236 | + }; |
| 237 | + |
| 238 | + Ok(HunkHeader { |
| 239 | + before_hunk_start: parse_number(before_hunk_start), |
| 240 | + before_hunk_len: parse_number(before_hunk_len), |
| 241 | + after_hunk_start: parse_number(after_hunk_start), |
| 242 | + after_hunk_len: parse_number(after_hunk_len), |
| 243 | + }) |
| 244 | + } |
| 245 | + |
| 246 | + fn parse_number(bytes: &[u8]) -> u32 { |
| 247 | + bytes |
| 248 | + .to_str() |
| 249 | + .expect("to be a valid UTF-8 string") |
| 250 | + .parse::<u32>() |
| 251 | + .expect("to be a number") |
| 252 | + } |
| 253 | +} |
0 commit comments