diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs index 75f77242b9e1..926a874219b6 100644 --- a/arrow-buffer/src/buffer/mutable.rs +++ b/arrow-buffer/src/buffer/mutable.rs @@ -112,6 +112,10 @@ impl MutableBuffer { /// Allocate a new [MutableBuffer] with initial capacity to be at least `capacity`. /// /// See [`MutableBuffer::with_capacity`]. + /// + /// # Panics + /// + /// See [`MutableBuffer::with_capacity`]. #[inline] pub fn new(capacity: usize) -> Self { Self::with_capacity(capacity) @@ -147,6 +151,7 @@ impl MutableBuffer { /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where /// all bytes are guaranteed to be `0u8`. + /// /// # Example /// ``` /// # use arrow_buffer::buffer::{Buffer, MutableBuffer}; @@ -156,6 +161,10 @@ impl MutableBuffer { /// let data = buffer.as_slice_mut(); /// assert_eq!(data[126], 0u8); /// ``` + /// + /// # Panics + /// + /// Panics if `len` is too large to construct a valid allocation [`Layout`] pub fn from_len_zeroed(len: usize) -> Self { let layout = Layout::from_size_align(len, ALIGNMENT).unwrap(); let data = match layout.size() { @@ -199,6 +208,10 @@ impl MutableBuffer { /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits. /// This is useful to create a buffer for packed bitmaps. + /// + /// # Panics + /// + /// See [`MutableBuffer::from_len_zeroed`]. pub fn new_null(len: usize) -> Self { let num_bytes = bit_util::ceil(len, 8); MutableBuffer::from_len_zeroed(num_bytes) @@ -210,6 +223,10 @@ impl MutableBuffer { /// This is useful when one wants to clear (or set) the bits and then manipulate /// the buffer directly (e.g., modifying the buffer by holding a mutable reference /// from `data_mut()`). + /// + /// # Panics + /// + /// Panics if `end` exceeds the buffer capacity. pub fn with_bitset(mut self, end: usize, val: bool) -> Self { assert!(end <= self.layout.size()); let v = if val { 255 } else { 0 }; @@ -225,6 +242,10 @@ impl MutableBuffer { /// This is used to initialize the bits in a buffer, however, it has no impact on the /// `len` of the buffer and so can be used to initialize the memory region from /// `len` to `capacity`. + /// + /// # Panics + /// + /// Panics if the byte range `start..start + count` exceeds the buffer capacity. pub fn set_null_bits(&mut self, start: usize, count: usize) { assert!( start.saturating_add(count) <= self.layout.size(), @@ -250,11 +271,19 @@ impl MutableBuffer { /// let buffer: Buffer = buffer.into(); /// assert_eq!(buffer.len(), 253); /// ``` + /// + /// # Panics + /// + /// Panics if `self.len + additional` overflows `usize`, or if the required capacity is too + /// large to round up to the next 64-byte boundary and construct a valid allocation layout. // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just // exits. #[inline(always)] pub fn reserve(&mut self, additional: usize) { - let required_cap = self.len + additional; + let required_cap = self + .len + .checked_add(additional) + .expect("buffer length overflow"); if required_cap > self.layout.size() { let new_capacity = bit_util::round_upto_multiple_of_64(required_cap); let new_capacity = std::cmp::max(new_capacity, self.layout.size() * 2); @@ -274,6 +303,12 @@ impl MutableBuffer { /// buffer.repeat_slice_n_times(bytes_to_repeat, 3); /// assert_eq!(buffer.as_slice(), b"ababab"); /// ``` + /// + /// # Panics + /// + /// Panics if the repeated slice byte length overflows `usize`, if the resulting buffer + /// length overflows `usize`, or if reserving the required capacity fails for the same + /// reasons as [`MutableBuffer::reserve`]. pub fn repeat_slice_n_times( &mut self, slice_to_repeat: &[T], @@ -385,6 +420,11 @@ impl MutableBuffer { /// buffer.resize(253, 2); // allocates for the first time /// assert_eq!(buffer.as_slice()[252], 2u8); /// ``` + /// + /// # Panics + /// + /// Panics if growing the buffer requires reserving a capacity that fails for the same + /// reasons as [`MutableBuffer::reserve`]. // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just // exits. #[inline(always)] @@ -420,6 +460,11 @@ impl MutableBuffer { /// buffer.shrink_to_fit(); /// assert!(buffer.capacity() >= 64 && buffer.capacity() < 128); /// ``` + /// + /// # Panics + /// + /// Panics if the current length is too large to round up to the next 64-byte boundary and + /// construct a valid allocation layout. pub fn shrink_to_fit(&mut self) { let new_capacity = bit_util::round_upto_multiple_of_64(self.len); if new_capacity < self.layout.size() { @@ -493,8 +538,8 @@ impl MutableBuffer { /// /// # Panics /// - /// This function panics if the underlying buffer is not aligned - /// correctly for type `T`. + /// This function panics if the underlying buffer is not aligned correctly for type `T`, or + /// if its length is not a multiple of `size_of::()`. pub fn typed_data_mut(&mut self) -> &mut [T] { // SAFETY // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect @@ -508,8 +553,8 @@ impl MutableBuffer { /// /// # Panics /// - /// This function panics if the underlying buffer is not aligned - /// correctly for type `T`. + /// This function panics if the underlying buffer is not aligned correctly for type `T`, or + /// if its length is not a multiple of `size_of::()`. pub fn typed_data(&self) -> &[T] { // SAFETY // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect @@ -527,6 +572,11 @@ impl MutableBuffer { /// buffer.extend_from_slice(&[2u32, 0]); /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes /// ``` + /// + /// # Panics + /// + /// Panics if extending the buffer requires reserving a capacity that fails for the same + /// reasons as [`MutableBuffer::reserve`]. #[inline] pub fn extend_from_slice(&mut self, items: &[T]) { let additional = mem::size_of_val(items); @@ -550,6 +600,11 @@ impl MutableBuffer { /// buffer.push(256u32); /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes /// ``` + /// + /// # Panics + /// + /// Panics if extending the buffer requires reserving a capacity that fails for the same + /// reasons as [`MutableBuffer::reserve`]. #[inline] pub fn push(&mut self, item: T) { let additional = std::mem::size_of::(); @@ -575,13 +630,26 @@ impl MutableBuffer { } /// Extends the buffer by `additional` bytes equal to `0u8`, incrementing its capacity if needed. + /// + /// # Panics + /// + /// Panics if `self.len + additional` overflows `usize`, or if growing the buffer requires + /// reserving a capacity that fails for the same reasons as [`MutableBuffer::reserve`]. #[inline] pub fn extend_zeros(&mut self, additional: usize) { - self.resize(self.len + additional, 0); + let new_len = self + .len + .checked_add(additional) + .expect("buffer length overflow"); + self.resize(new_len, 0); } /// # Safety /// The caller must ensure that the buffer was properly initialized up to `len`. + /// + /// # Panics + /// + /// Panics if `len` exceeds the buffer capacity. #[inline] pub unsafe fn set_len(&mut self, len: usize) { assert!(len <= self.capacity()); @@ -726,6 +794,13 @@ impl MutableBuffer { /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) }; /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes /// ``` + /// + /// # Panics + /// + /// Panics if the iterator does not report an upper bound via `size_hint`, or if the + /// reported length does not match the number of items produced, or if allocating the + /// required buffer fails for the same reasons as [`MutableBuffer::new`]. + /// /// # Safety /// This method assumes that the iterator's size is correct and is undefined behavior /// to use it on an iterator that reports an incorrect length. @@ -770,6 +845,12 @@ impl MutableBuffer { /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) }; /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte /// ``` + /// + /// # Panics + /// + /// Panics if the iterator does not report an upper bound via `size_hint`, or if it yields + /// fewer items than reported. + /// /// # Safety /// This method assumes that the iterator's size is correct and is undefined behavior /// to use it on an iterator that reports an incorrect length. @@ -788,6 +869,14 @@ impl MutableBuffer { /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors /// if any of the items of the iterator is an error. /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. + /// + /// # Panics + /// + /// Panics if the iterator does not report an upper bound via `size_hint`, or if the + /// reported length does not match the number of items produced before an error-free finish, + /// or if allocating the required buffer fails for the same reasons as + /// [`MutableBuffer::new`]. + /// /// # Safety /// This method assumes that the iterator's size is correct and is undefined behavior /// to use it on an iterator that reports an incorrect length. diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs index e44397258fd3..0c58f2c5c792 100644 --- a/arrow-buffer/src/builder/mod.rs +++ b/arrow-buffer/src/builder/mod.rs @@ -426,4 +426,28 @@ mod tests { let slice = builder.as_slice_mut(); assert_eq!(slice.len(), 222); } + + #[test] + #[should_panic(expected = "buffer length overflow")] + fn reserve_length_overflow() { + let mut builder = BufferBuilder::::new(1); + builder.append(0); + builder.reserve(usize::MAX); + } + + #[test] + #[should_panic(expected = "buffer length overflow")] + fn append_n_zeroed_length_overflow() { + let mut builder = BufferBuilder::::new(1); + builder.append_n_zeroed(1); + builder.append_n_zeroed(usize::MAX / mem::size_of::()); + } + + #[test] + #[should_panic(expected = "buffer length overflow")] + fn advance_length_overflow() { + let mut builder = BufferBuilder::::new(1); + builder.advance(1); + builder.advance(usize::MAX / mem::size_of::()); + } }