|
| 1 | +//! Facilities to produce the unified diff format. |
| 2 | +//! |
| 3 | +//! Originally based on <https://github.com/pascalkuthe/imara-diff/pull/14>. |
| 4 | +
|
| 5 | +/// Defines the size of the context printed before and after each change. |
| 6 | +/// |
| 7 | +/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps |
| 8 | +/// with previous or next change, the context gets reduced accordingly. |
| 9 | +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] |
| 10 | +pub struct ContextSize { |
| 11 | + /// Defines the size of the context printed before and after each change. |
| 12 | + symmetrical: u32, |
| 13 | +} |
| 14 | + |
| 15 | +impl Default for ContextSize { |
| 16 | + fn default() -> Self { |
| 17 | + ContextSize::symmetrical(3) |
| 18 | + } |
| 19 | +} |
| 20 | + |
| 21 | +/// Instantiation |
| 22 | +impl ContextSize { |
| 23 | + /// Create a symmetrical context with `n` lines before and after a changed hunk. |
| 24 | + pub fn symmetrical(n: u32) -> Self { |
| 25 | + ContextSize { symmetrical: n } |
| 26 | + } |
| 27 | +} |
| 28 | + |
| 29 | +/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). |
| 30 | +pub trait ConsumeHunk { |
| 31 | + /// The item this instance produces after consuming all hunks. |
| 32 | + type Out; |
| 33 | + |
| 34 | + /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`. |
| 35 | + /// Note that all newlines are added. |
| 36 | + /// |
| 37 | + /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. |
| 38 | + /// After this method returned its first error, it will not be called anymore. |
| 39 | + /// |
| 40 | + /// The following is hunk-related information and the same that is used in the `header`. |
| 41 | + /// * `before_hunk_start` is the 1-based first line of this hunk in the old file. |
| 42 | + /// * `before_hunk_len` the amount of lines of this hunk in the old file. |
| 43 | + /// * `after_hunk_start` is the 1-based first line of this hunk in the new file. |
| 44 | + /// * `after_hunk_len` the amount of lines of this hunk in the new file. |
| 45 | + fn consume_hunk( |
| 46 | + &mut self, |
| 47 | + before_hunk_start: u32, |
| 48 | + before_hunk_len: u32, |
| 49 | + after_hunk_start: u32, |
| 50 | + after_hunk_len: u32, |
| 51 | + header: &str, |
| 52 | + hunk: &[u8], |
| 53 | + ) -> std::io::Result<()>; |
| 54 | + /// Called after the last hunk is consumed to produce an output. |
| 55 | + fn finish(self) -> Self::Out; |
| 56 | +} |
| 57 | + |
| 58 | +pub(super) mod _impl { |
| 59 | + use super::{ConsumeHunk, ContextSize}; |
| 60 | + use bstr::{ByteSlice, ByteVec}; |
| 61 | + use imara_diff::{intern, Sink}; |
| 62 | + use intern::{InternedInput, Interner, Token}; |
| 63 | + use std::hash::Hash; |
| 64 | + use std::io::ErrorKind; |
| 65 | + use std::ops::Range; |
| 66 | + |
| 67 | + /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used, |
| 68 | + /// and passes it in full to a consumer. |
| 69 | + pub struct UnifiedDiff<'a, T, D> |
| 70 | + where |
| 71 | + T: Hash + Eq + AsRef<[u8]>, |
| 72 | + D: ConsumeHunk, |
| 73 | + { |
| 74 | + before: &'a [Token], |
| 75 | + after: &'a [Token], |
| 76 | + interner: &'a Interner<T>, |
| 77 | + |
| 78 | + pos: u32, |
| 79 | + before_hunk_start: u32, |
| 80 | + after_hunk_start: u32, |
| 81 | + before_hunk_len: u32, |
| 82 | + after_hunk_len: u32, |
| 83 | + /// Symmetrical context before and after the changed hunk. |
| 84 | + ctx_size: u32, |
| 85 | + |
| 86 | + buffer: Vec<u8>, |
| 87 | + header_buf: String, |
| 88 | + delegate: D, |
| 89 | + newline: &'a str, |
| 90 | + |
| 91 | + err: Option<std::io::Error>, |
| 92 | + } |
| 93 | + |
| 94 | + impl<'a, T, D> UnifiedDiff<'a, T, D> |
| 95 | + where |
| 96 | + T: Hash + Eq + AsRef<[u8]>, |
| 97 | + D: ConsumeHunk, |
| 98 | + { |
| 99 | + /// Create a new instance to create unified diff using the lines in `input`, |
| 100 | + /// which also must be used when running the diff algorithm. |
| 101 | + /// `context_size` is the amount of lines around each hunk which will be passed |
| 102 | + ///to `consume_hunk`. |
| 103 | + /// |
| 104 | + /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`, |
| 105 | + pub fn new( |
| 106 | + input: &'a InternedInput<T>, |
| 107 | + consume_hunk: D, |
| 108 | + newline_separator: &'a str, |
| 109 | + context_size: ContextSize, |
| 110 | + ) -> Self { |
| 111 | + Self { |
| 112 | + before_hunk_start: 0, |
| 113 | + after_hunk_start: 0, |
| 114 | + before_hunk_len: 0, |
| 115 | + after_hunk_len: 0, |
| 116 | + buffer: Vec::with_capacity(8), |
| 117 | + header_buf: String::new(), |
| 118 | + delegate: consume_hunk, |
| 119 | + interner: &input.interner, |
| 120 | + before: &input.before, |
| 121 | + after: &input.after, |
| 122 | + pos: 0, |
| 123 | + ctx_size: context_size.symmetrical, |
| 124 | + newline: newline_separator, |
| 125 | + |
| 126 | + err: None, |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + fn print_tokens(&mut self, tokens: &[Token], prefix: char) { |
| 131 | + for &token in tokens { |
| 132 | + self.buffer.push_char(prefix); |
| 133 | + self.buffer.push_str(&self.interner[token]); |
| 134 | + self.buffer.push_str(self.newline.as_bytes()); |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + fn flush(&mut self) -> std::io::Result<()> { |
| 139 | + if self.before_hunk_len == 0 && self.after_hunk_len == 0 { |
| 140 | + return Ok(()); |
| 141 | + } |
| 142 | + |
| 143 | + let end = (self.pos + self.ctx_size).min(self.before.len() as u32); |
| 144 | + self.update_pos(end, end); |
| 145 | + |
| 146 | + self.header_buf.clear(); |
| 147 | + |
| 148 | + std::fmt::Write::write_fmt( |
| 149 | + &mut self.header_buf, |
| 150 | + format_args!( |
| 151 | + "@@ -{},{} +{},{} @@{nl}", |
| 152 | + self.before_hunk_start + 1, |
| 153 | + self.before_hunk_len, |
| 154 | + self.after_hunk_start + 1, |
| 155 | + self.after_hunk_len, |
| 156 | + nl = self.newline |
| 157 | + ), |
| 158 | + ) |
| 159 | + .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?; |
| 160 | + self.delegate.consume_hunk( |
| 161 | + self.before_hunk_start + 1, |
| 162 | + self.before_hunk_len, |
| 163 | + self.after_hunk_start + 1, |
| 164 | + self.after_hunk_len, |
| 165 | + &self.header_buf, |
| 166 | + &self.buffer, |
| 167 | + )?; |
| 168 | + self.buffer.clear(); |
| 169 | + self.before_hunk_len = 0; |
| 170 | + self.after_hunk_len = 0; |
| 171 | + Ok(()) |
| 172 | + } |
| 173 | + |
| 174 | + fn update_pos(&mut self, print_to: u32, move_to: u32) { |
| 175 | + self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' '); |
| 176 | + let len = print_to - self.pos; |
| 177 | + self.pos = move_to; |
| 178 | + self.before_hunk_len += len; |
| 179 | + self.after_hunk_len += len; |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + impl<T, D> Sink for UnifiedDiff<'_, T, D> |
| 184 | + where |
| 185 | + T: Hash + Eq + AsRef<[u8]>, |
| 186 | + D: ConsumeHunk, |
| 187 | + { |
| 188 | + type Out = std::io::Result<D::Out>; |
| 189 | + |
| 190 | + fn process_change(&mut self, before: Range<u32>, after: Range<u32>) { |
| 191 | + if self.err.is_some() { |
| 192 | + return; |
| 193 | + } |
| 194 | + if ((self.pos == 0) && (before.start - self.pos > self.ctx_size)) |
| 195 | + || (before.start - self.pos > 2 * self.ctx_size) |
| 196 | + { |
| 197 | + if let Err(err) = self.flush() { |
| 198 | + self.err = Some(err); |
| 199 | + return; |
| 200 | + } |
| 201 | + self.pos = before.start - self.ctx_size; |
| 202 | + self.before_hunk_start = self.pos; |
| 203 | + self.after_hunk_start = after.start - self.ctx_size; |
| 204 | + } |
| 205 | + self.update_pos(before.start, before.end); |
| 206 | + self.before_hunk_len += before.end - before.start; |
| 207 | + self.after_hunk_len += after.end - after.start; |
| 208 | + self.print_tokens(&self.before[before.start as usize..before.end as usize], '-'); |
| 209 | + self.print_tokens(&self.after[after.start as usize..after.end as usize], '+'); |
| 210 | + } |
| 211 | + |
| 212 | + fn finish(mut self) -> Self::Out { |
| 213 | + if let Err(err) = self.flush() { |
| 214 | + self.err = Some(err); |
| 215 | + } |
| 216 | + if let Some(err) = self.err { |
| 217 | + return Err(err); |
| 218 | + } |
| 219 | + Ok(self.delegate.finish()) |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + /// An implementation that fails if the input isn't UTF-8. |
| 224 | + impl ConsumeHunk for String { |
| 225 | + type Out = Self; |
| 226 | + |
| 227 | + fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { |
| 228 | + self.push_str(header); |
| 229 | + self.push_str( |
| 230 | + hunk.to_str() |
| 231 | + .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?, |
| 232 | + ); |
| 233 | + Ok(()) |
| 234 | + } |
| 235 | + |
| 236 | + fn finish(self) -> Self::Out { |
| 237 | + self |
| 238 | + } |
| 239 | + } |
| 240 | + |
| 241 | + /// An implementation that writes hunks into a byte buffer. |
| 242 | + impl ConsumeHunk for Vec<u8> { |
| 243 | + type Out = Self; |
| 244 | + |
| 245 | + fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> { |
| 246 | + self.push_str(header); |
| 247 | + self.push_str(hunk); |
| 248 | + Ok(()) |
| 249 | + } |
| 250 | + |
| 251 | + fn finish(self) -> Self::Out { |
| 252 | + self |
| 253 | + } |
| 254 | + } |
| 255 | +} |
0 commit comments