From d3d402c518062f6d299beb57b62f53a72ded32cb Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 7 Apr 2025 01:00:37 +0000 Subject: [PATCH] splitstream: Drop an unsafe This one didn't have a `SAFETY` comment; looking through git log here I do see: > It no longer does internal buffer allocations when reading tar headers, saving > another ~10% off the total time to print a merged dumpfile. However, it looks like that was going from allocation-per-read to "read into a global buffer", which we're still doing here. I didn't measure performance here, but I think we have other performance worries and it's not worth carrying an `unsafe` for this. In very hot I/O paths one *can* measure the impact of zero-initializing, but I'm doubtful of that here. Just looking at the bigger picture here I think instead of a generic `R: Read` we should take a `BufReader` and then use `fill_buf` on it, which is intended for this use case. Anyways for now just drop the `unsafe`. Also add tests. Signed-off-by: Colin Walters --- src/splitstream.rs | 69 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/src/splitstream.rs b/src/splitstream.rs index aef1344..fc7756e 100644 --- a/src/splitstream.rs +++ b/src/splitstream.rs @@ -198,13 +198,13 @@ fn read_u64_le(reader: &mut R) -> Result> { } } +/// Using the provided [`vec`] as a buffer, read exactly [`size`] +/// bytes of content from [`reader`] into it. Any existing content +/// in [`vec`] will be discarded; however its capacity will be reused, +/// making this function suitable for use in loops. fn read_into_vec(reader: &mut impl Read, vec: &mut Vec, size: usize) -> Result<()> { - unsafe { - vec.truncate(0); - vec.reserve(size); - reader.read_exact(std::slice::from_raw_parts_mut(vec.as_mut_ptr(), size))?; - vec.set_len(size); - } + vec.resize(size, 0u8); + reader.read_exact(vec.as_mut_slice())?; Ok(()) } @@ -391,3 +391,60 @@ impl Read for SplitStreamReader { } } } + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + fn test_read_into_vec() -> Result<()> { + // Test with an empty reader + let mut reader = Cursor::new(vec![]); + let mut vec = Vec::new(); + let result = read_into_vec(&mut reader, &mut vec, 0); + assert!(result.is_ok()); + assert_eq!(vec.len(), 0); + + // Test with a reader that has some data + let mut reader = Cursor::new(vec![1, 2, 3, 4, 5]); + let mut vec = Vec::new(); + let result = read_into_vec(&mut reader, &mut vec, 3); + assert!(result.is_ok()); + assert_eq!(vec.len(), 3); + assert_eq!(vec, vec![1, 2, 3]); + + // Test reading more than the reader has + let mut reader = Cursor::new(vec![1, 2, 3]); + let mut vec = Vec::new(); + let result = read_into_vec(&mut reader, &mut vec, 5); + assert!(result.is_err()); + + // Test reading exactly what the reader has + let mut reader = Cursor::new(vec![1, 2, 3]); + let mut vec = Vec::new(); + let result = read_into_vec(&mut reader, &mut vec, 3); + assert!(result.is_ok()); + assert_eq!(vec.len(), 3); + assert_eq!(vec, vec![1, 2, 3]); + + // Test reading into a vector with existing capacity + let mut reader = Cursor::new(vec![1, 2, 3, 4, 5]); + let mut vec = Vec::with_capacity(10); + let result = read_into_vec(&mut reader, &mut vec, 4); + assert!(result.is_ok()); + assert_eq!(vec.len(), 4); + assert_eq!(vec, vec![1, 2, 3, 4]); + assert_eq!(vec.capacity(), 10); + + // Test reading into a vector with existing data + let mut reader = Cursor::new(vec![1, 2, 3]); + let mut vec = vec![9, 9, 9]; + let result = read_into_vec(&mut reader, &mut vec, 2); + assert!(result.is_ok()); + assert_eq!(vec.len(), 2); + assert_eq!(vec, vec![1, 2]); + + Ok(()) + } +}