diff --git a/vortex-compute/src/arrow/fixed_size_list.rs b/vortex-compute/src/arrow/fixed_size_list.rs new file mode 100644 index 00000000000..89813ea8b9f --- /dev/null +++ b/vortex-compute/src/arrow/fixed_size_list.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use arrow_array::{ArrayRef, FixedSizeListArray}; +use arrow_schema::Field; +use vortex_error::VortexResult; +use vortex_vector::FixedSizeListVector; + +use crate::arrow::IntoArrow; + +impl IntoArrow for FixedSizeListVector { + fn into_arrow(self) -> VortexResult { + let (elements, list_size, validity) = self.into_parts(); + + let converted_elements = elements.as_ref().clone().into_arrow()?; + let field = Arc::new(Field::new_list_field( + converted_elements.data_type().clone(), + true, // Vectors are always nullable. + )); + + Ok(Arc::new(FixedSizeListArray::try_new( + field, + list_size as i32, + converted_elements, + validity.into_arrow()?, + )?)) + } +} diff --git a/vortex-compute/src/arrow/mod.rs b/vortex-compute/src/arrow/mod.rs index 816ecbd4a5f..f3b67b96b1a 100644 --- a/vortex-compute/src/arrow/mod.rs +++ b/vortex-compute/src/arrow/mod.rs @@ -8,6 +8,7 @@ use vortex_error::VortexResult; mod binaryview; mod bool; mod decimal; +mod fixed_size_list; mod mask; mod null; mod primitive; diff --git a/vortex-compute/src/mask/mod.rs b/vortex-compute/src/mask/mod.rs index f1c2f004ac8..a47c0c7442f 100644 --- a/vortex-compute/src/mask/mod.rs +++ b/vortex-compute/src/mask/mod.rs @@ -8,9 +8,9 @@ use std::ops::BitAnd; use vortex_dtype::{NativeDecimalType, NativePType}; use vortex_mask::Mask; use vortex_vector::{ - BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, NullVector, PVector, - PrimitiveVector, StructVector, Vector, match_each_dvector, match_each_pvector, - match_each_vector, + BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, FixedSizeListVector, + NullVector, PVector, PrimitiveVector, StructVector, Vector, match_each_dvector, + match_each_pvector, match_each_vector, }; /// Trait for masking the validity of an array or vector. @@ -79,6 +79,15 @@ impl MaskValidity for BinaryViewVector { } } +impl MaskValidity for FixedSizeListVector { + fn mask_validity(self, mask: &Mask) -> Self { + let (elements, list_size, validity) = self.into_parts(); + // SAFETY: we are preserving the original elements and `list_size`, only modifying the + // validity. + unsafe { Self::new_unchecked(elements, list_size, validity.bitand(mask)) } + } +} + impl MaskValidity for StructVector { fn mask_validity(self, mask: &Mask) -> Self { let (fields, validity) = self.into_parts(); diff --git a/vortex-vector/src/fixed_size_list/mod.rs b/vortex-vector/src/fixed_size_list/mod.rs new file mode 100644 index 00000000000..561e69a2ac7 --- /dev/null +++ b/vortex-vector/src/fixed_size_list/mod.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`FixedSizeListVector`] and [`FixedSizeListVectorMut`]. + +mod vector; +pub use vector::FixedSizeListVector; + +mod vector_mut; +pub use vector_mut::FixedSizeListVectorMut; + +use crate::{Vector, VectorMut}; + +impl From for Vector { + fn from(v: FixedSizeListVector) -> Self { + Self::FixedSizeList(v) + } +} + +impl From for VectorMut { + fn from(v: FixedSizeListVectorMut) -> Self { + Self::FixedSizeList(v) + } +} diff --git a/vortex-vector/src/fixed_size_list/vector.rs b/vortex-vector/src/fixed_size_list/vector.rs new file mode 100644 index 00000000000..10e042f5ce2 --- /dev/null +++ b/vortex-vector/src/fixed_size_list/vector.rs @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`FixedSizeListVector`]. + +use std::sync::Arc; + +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; +use vortex_mask::Mask; + +use crate::{FixedSizeListVectorMut, Vector, VectorOps}; + +/// An immutable vector of fixed-size lists. +/// +/// `FixedSizeListVector` can be considered a borrowed / frozen version of +/// [`FixedSizeListVectorMut`], which is created via the [`freeze`](crate::VectorMutOps::freeze) +/// method. +/// +/// See the documentation for [`FixedSizeListVectorMut`] for more information. +#[derive(Debug, Clone)] +pub struct FixedSizeListVector { + /// The child vector of elements. + pub(super) elements: Arc, + + /// The size of every list in the vector. + pub(super) list_size: u32, + + /// The validity mask (where `true` represents a list is **not** null). + /// + /// Note that the `elements` vector will have its own internal validity, denoting if individual + /// list elements are null. + pub(super) validity: Mask, + + /// The length of the vector (which is the same as the length of the validity mask). + /// + /// This is stored here as a convenience, as the validity also tracks this information. + pub(super) len: usize, +} + +impl FixedSizeListVector { + /// Creates a new [`FixedSizeListVector`] from the given `elements` vector, size of each list, + /// and validity mask. + /// + /// # Panics + /// + /// Panics if the length of the `validity` mask multiplied by the `list_size` is not + /// equal to the length of the `elements` vector. + /// + /// Put another way, the length of the `elements` vector divided by the `list_size` must be + /// equal to the length of the validity, or this function will panic. + pub fn new(elements: Arc, list_size: u32, validity: Mask) -> Self { + Self::try_new(elements, list_size, validity) + .vortex_expect("Failed to create `FixedSizeListVector`") + } + + /// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each + /// list, and validity mask. + /// + /// # Errors + /// + /// Returns and error if the length of the `validity` mask multiplied by the `list_size` is not + /// equal to the length of the `elements` vector. + /// + /// Put another way, the length of the `elements` vector divided by the `list_size` must be + /// equal to the length of the validity. + pub fn try_new(elements: Arc, list_size: u32, validity: Mask) -> VortexResult { + let len = validity.len(); + let elements_len = elements.len(); + + if list_size == 0 { + vortex_ensure!( + elements.is_empty(), + "A degenerate (`list_size == 0`) `FixedSizeListVector` should have no underlying elements", + ); + } else { + vortex_ensure!( + list_size as usize * len == elements_len, + "Tried to create a `FixedSizeListVector` of length {len} and list_size {list_size} \ + with an child vector of size {elements_len} ({list_size} * {len} != {elements_len})", + ); + } + + Ok(Self { + elements, + list_size, + validity, + len, + }) + } + + /// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each + /// list, and validity mask without validation. + /// + /// # Safety + /// + /// The caller must ensure that the length of the `validity` mask multiplied by the `list_size` + /// is exactly equal to the length of the `elements` vector. + pub unsafe fn new_unchecked(elements: Arc, list_size: u32, validity: Mask) -> Self { + let len = validity.len(); + + if cfg!(debug_assertions) { + Self::new(elements, list_size, validity) + } else { + Self { + elements, + list_size, + validity, + len, + } + } + } + + /// Decomposes the `FixedSizeListVector` into its constituent parts (child elements, list size, + /// and validity). + pub fn into_parts(self) -> (Arc, u32, Mask) { + (self.elements, self.list_size, self.validity) + } + + /// Returns the child vector of elements, which represents the contiguous fixed-size lists of + /// the `FixedSizeListVector`. + pub fn elements(&self) -> &Arc { + &self.elements + } + + /// Returns the size of every list in the vector. + pub fn list_size(&self) -> u32 { + self.list_size + } +} + +impl VectorOps for FixedSizeListVector { + type Mutable = FixedSizeListVectorMut; + + fn len(&self) -> usize { + self.len + } + + fn validity(&self) -> &Mask { + &self.validity + } + + fn try_into_mut(self) -> Result + where + Self: Sized, + { + let len = self.len; + let list_size = self.list_size; + + let elements = match Arc::try_unwrap(self.elements) { + Ok(elements) => elements, + Err(elements) => return Err(FixedSizeListVector { elements, ..self }), + }; + + let validity = match self.validity.try_into_mut() { + Ok(validity) => validity, + Err(validity) => { + return Err(FixedSizeListVector { + elements: Arc::new(elements), + list_size, + validity, + len, + }); + } + }; + + match elements.try_into_mut() { + Ok(mutable_elements) => Ok(FixedSizeListVectorMut { + elements: Box::new(mutable_elements), + list_size, + validity, + len, + }), + Err(elements) => Err(FixedSizeListVector { + elements: Arc::new(elements), + list_size, + validity: validity.freeze(), + len, + }), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use vortex_mask::Mask; + + use super::*; + use crate::{PVectorMut, Vector, VectorMutOps}; + + #[test] + fn test_constructor_and_validation() { + // Valid construction with new(). + let elements: Arc = Arc::new( + PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]) + .freeze() + .into(), + ); + let validity = Mask::new_true(2); + let vec = FixedSizeListVector::new(elements.clone(), 3, validity.clone()); + assert_eq!(vec.len(), 2); + assert_eq!(vec.list_size(), 3); + + // Valid construction with try_new(). + let result = FixedSizeListVector::try_new(elements.clone(), 3, validity); + assert!(result.is_ok()); + assert_eq!(result.unwrap().len(), 2); + + // Length mismatch error - elements length != list_size * validity length. + let bad_validity = Mask::new_true(3); // Should be 2 for 6 elements with list_size=3. + let result = FixedSizeListVector::try_new(elements.clone(), 3, bad_validity); + assert!(result.is_err()); + + // Degenerate case (list_size = 0) with empty elements is valid. + let empty_elements: Arc = Arc::new( + PVectorMut::::from_iter(Vec::::new()) + .freeze() + .into(), + ); + let validity = Mask::new_true(5); + let result = FixedSizeListVector::try_new(empty_elements, 0, validity); + assert!(result.is_ok()); + let vec = result.unwrap(); + assert_eq!(vec.len(), 5); + assert_eq!(vec.list_size(), 0); + + // Degenerate case with non-empty elements should fail. + let result = FixedSizeListVector::try_new(elements, 0, Mask::new_true(1)); + assert!(result.is_err()); + + // Test unsafe new_unchecked in debug mode (it should still validate). + let elements: Arc = + Arc::new(PVectorMut::::from_iter([1, 2, 3, 4]).freeze().into()); + let validity = Mask::new_true(2); + let vec = unsafe { FixedSizeListVector::new_unchecked(elements, 2, validity) }; + assert_eq!(vec.len(), 2); + assert_eq!(vec.list_size(), 2); + } + + #[test] + fn test_try_into_mut_conversion() { + // Create a vector that we solely own. + let elements: Arc = Arc::new( + PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]) + .freeze() + .into(), + ); + let validity = Mask::new_true(2); + let vec = FixedSizeListVector::new(elements, 3, validity); + + // Successful conversion when solely owned. + let result = vec.try_into_mut(); + assert!(result.is_ok()); + let mut_vec = result.unwrap(); + assert_eq!(mut_vec.len(), 2); + assert_eq!(mut_vec.list_size(), 3); + + // Freeze and try again - roundtrip test. + let vec = mut_vec.freeze(); + let result = vec.try_into_mut(); + assert!(result.is_ok()); + + // Test failed conversion with shared ownership. + let elements: Arc = + Arc::new(PVectorMut::::from_iter([1, 2, 3, 4]).freeze().into()); + let validity = Mask::new_true(2); + let vec = FixedSizeListVector::new(elements, 2, validity); + + // Keep a clone to maintain shared ownership. + let _shared = vec.clone(); + + let result = vec.try_into_mut(); + assert!(result.is_err()); + + // The error case should return the original vector. + if let Err(returned_vec) = result { + assert_eq!(returned_vec.len(), 2); + assert_eq!(returned_vec.list_size(), 2); + } + } + + #[test] + fn test_accessors_and_parts() { + let elements: Arc = Arc::new( + PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]) + .freeze() + .into(), + ); + let validity = Mask::new_true(3); + let vec = FixedSizeListVector::new(elements, 2, validity); + + // Test accessors. + assert_eq!(vec.len(), 3); + assert_eq!(vec.list_size(), 2); + assert_eq!(vec.elements().len(), 6); + assert_eq!(vec.validity().true_count(), 3); + + // Test into_parts. + let (parts_elements, list_size, parts_validity) = vec.into_parts(); + assert_eq!(parts_elements.len(), 6); + assert_eq!(list_size, 2); + assert_eq!(parts_validity.true_count(), 3); + } +} diff --git a/vortex-vector/src/fixed_size_list/vector_mut.rs b/vortex-vector/src/fixed_size_list/vector_mut.rs new file mode 100644 index 00000000000..70f33ec008f --- /dev/null +++ b/vortex-vector/src/fixed_size_list/vector_mut.rs @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Definition and implementation of [`FixedSizeListVectorMut`]. + +use std::sync::Arc; + +use vortex_dtype::DType; +use vortex_error::{VortexExpect, VortexResult, vortex_ensure}; +use vortex_mask::MaskMut; + +use crate::{FixedSizeListVector, VectorMut, VectorMutOps, match_vector_pair}; + +/// A mutable vector of fixed-size lists. +/// +/// `FixedSizeList` vectors can mostly be thought of as a wrapper around other vectors that "groups" +/// a fixed number of elements together for each list scalar. +/// +/// More specifically, each list scalar in the vector has the same number of elements (fixed size), +/// with all list elements stored contiguously in a child [`VectorMut`]. +/// +/// Note that the validity mask tracks which lists are null, not which individual elements are null. +/// +/// # Structure +/// +/// For a vector of `n` lists each with size `list_size`: +/// - The `elements` vector has length `n * list_size` +/// - The `validity` mask has length `n` +/// - Each list `i` occupies `elements[i * list_size..(i+1) * list_size] +/// +/// # Examples +/// +/// ## Working with nulls +/// +/// Nulls can exist at two levels: entire lists can be null, or individual elements within lists can +/// be null. +/// +/// ``` +/// use vortex_vector::{FixedSizeListVectorMut, PVectorMut, VectorMut, VectorMutOps}; +/// use vortex_mask::{Mask, MaskMut}; +/// +/// // Create elements with some null values. +/// // This will be 9 elements total: [1, null, 3, 4, 5, null, null, 8, 9] +/// let mut elements = PVectorMut::::from_iter([ +/// Some(1), None, Some(3), // First list +/// Some(4), Some(5), None, // Second list +/// None, Some(8), Some(9), // Third list +/// ]); +/// +/// // Create validity for the lists themselves. +/// // All lists are valid in this example. +/// let validity = MaskMut::new_true(3); +/// +/// let mut fsl_vec = FixedSizeListVectorMut::new( +/// Box::new(elements.into()), +/// 3, // Each list has 3 elements +/// validity, +/// ); +/// +/// assert_eq!(fsl_vec.len(), 3); +/// assert_eq!(fsl_vec.list_size(), 3); +/// +/// // Can also append null lists. +/// fsl_vec.append_nulls(2); +/// assert_eq!(fsl_vec.len(), 5); +/// ``` +/// +/// ## Working with [`split_off()`] and [`unsplit()`] +/// +/// [`split_off()`]: VectorMutOps::split_off +/// [`unsplit()`]: VectorMutOps::unsplit +/// +/// ``` +/// use vortex_vector::{FixedSizeListVectorMut, PVectorMut, VectorMut, VectorMutOps}; +/// use vortex_mask::MaskMut; +/// +/// // Create a vector with 6 lists, each containing 2 integers. +/// let elements = PVectorMut::::from_iter([ +/// 1, 2, // List 0 +/// 3, 4, // List 1 +/// 5, 6, // List 2 +/// 7, 8, // List 3 +/// 9, 10, // List 4 +/// 11, 12, // List 5 +/// ]); +/// +/// let mut fsl_vec = FixedSizeListVectorMut::new( +/// Box::new(elements.into()), +/// 2, // Each list has 2 elements +/// MaskMut::new_true(6), +/// ); +/// +/// // Split at position 4 (keeping first 4 lists, splitting off last 2). +/// let second_part = fsl_vec.split_off(4); +/// +/// assert_eq!(fsl_vec.len(), 4); +/// assert_eq!(second_part.len(), 2); +/// +/// // The elements are also split accordingly. +/// assert_eq!(fsl_vec.elements().len(), 8); // 4 lists * 2 elements +/// assert_eq!(second_part.elements().len(), 4); // 2 lists * 2 elements +/// +/// // Rejoin the parts. +/// fsl_vec.unsplit(second_part); +/// assert_eq!(fsl_vec.len(), 6); +/// assert_eq!(fsl_vec.elements().len(), 12); +/// ``` +#[derive(Debug, Clone)] +pub struct FixedSizeListVectorMut { + /// The mutable child vector of elements. + pub(super) elements: Box, + + /// The size of every list in the vector. + pub(super) list_size: u32, + + /// The validity mask (where `true` represents a list is **not** null). + /// + /// Note that the `elements` vector will have its own internal validity, denoting if individual + /// list elements are null. + pub(super) validity: MaskMut, + + /// The length of the vector (which is the same as the length of the validity mask). + /// + /// This is stored here as a convenience, as the validity also tracks this information. + pub(super) len: usize, +} + +impl FixedSizeListVectorMut { + /// Creates a new [`FixedSizeListVectorMut`] from the given `elements` vector, size of each + /// list, and validity mask. + /// + /// # Panics + /// + /// Panics if the length of the `validity` mask multiplied by the `list_size` is not + /// equal to the length of the `elements` vector. + /// + /// Put another way, the length of the `elements` vector divided by the `list_size` must be + /// equal to the length of the validity, or this function will panic. + pub fn new(elements: Box, list_size: u32, validity: MaskMut) -> Self { + Self::try_new(elements, list_size, validity) + .vortex_expect("Failed to create `FixedSizeListVectorMut`") + } + + /// Tries to create a new [`FixedSizeListVectorMut`] from the given `elements` vector, size of + /// each list, and validity mask. + /// + /// # Errors + /// + /// Returns and error if the length of the `validity` mask multiplied by the `list_size` is not + /// equal to the length of the `elements` vector. + /// + /// Put another way, the length of the `elements` vector divided by the `list_size` must be + /// equal to the length of the validity. + pub fn try_new( + elements: Box, + list_size: u32, + validity: MaskMut, + ) -> VortexResult { + let len = validity.len(); + let elements_len = elements.len(); + + if list_size == 0 { + vortex_ensure!( + elements.is_empty(), + "A degenerate (`list_size == 0`) `FixedSizeListVectorMut` should have no underlying elements", + ); + } else { + vortex_ensure!( + list_size as usize * len == elements_len, + "Tried to create a `FixedSizeListVectorMut` of length {len} and list_size {list_size} \ + with an child vector of size {elements_len} ({list_size} * {len} != {elements_len})", + ); + } + + Ok(Self { + elements, + list_size, + validity, + len, + }) + } + + /// Tries to create a new [`FixedSizeListVectorMut`] from the given `elements` vector, size of + /// each list, and validity mask without validation. + /// + /// # Safety + /// + /// The caller must ensure that the length of the `validity` mask multiplied by the `list_size` + /// is exactly equal to the length of the `elements` vector. + pub unsafe fn new_unchecked( + elements: Box, + list_size: u32, + validity: MaskMut, + ) -> Self { + let len = validity.len(); + + if cfg!(debug_assertions) { + Self::new(elements, list_size, validity) + } else { + Self { + elements, + list_size, + validity, + len, + } + } + } + + /// Creates a new [`FixedSizeListVectorMut`] with given element type, list size, and capacity. + pub fn with_capacity(elem_dtype: &DType, list_size: u32, capacity: usize) -> Self { + let elements = Box::new(VectorMut::with_capacity( + elem_dtype, + capacity * list_size as usize, + )); + + let validity = MaskMut::with_capacity(capacity); + let len = validity.len(); + + Self { + elements, + list_size, + validity, + len, + } + } + + /// Decomposes the `FixedSizeListVector` into its constituent parts (child elements, list size, + /// and validity). + pub fn into_parts(self) -> (Box, u32, MaskMut) { + (self.elements, self.list_size, self.validity) + } + + /// Returns the child vector of elements, which represents the contiguous fixed-size lists of + /// the `FixedSizeListVector`. + pub fn elements(&self) -> &VectorMut { + &self.elements + } + + /// Returns the size of every list in the vector. + pub fn list_size(&self) -> u32 { + self.list_size + } +} + +impl VectorMutOps for FixedSizeListVectorMut { + type Immutable = FixedSizeListVector; + + fn len(&self) -> usize { + self.len + } + + /// In the case that `list_size == 0`, the capacity of the vector is infinite because it will + /// never take up any space. + fn capacity(&self) -> usize { + self.elements + .capacity() + .checked_div(self.list_size as usize) + .unwrap_or(usize::MAX) + } + + fn reserve(&mut self, additional: usize) { + self.elements.reserve(additional * self.list_size as usize); + } + + fn extend_from_vector(&mut self, other: &Self::Immutable) { + match_vector_pair!( + self.elements.as_mut(), + other.elements.as_ref(), + |a: VectorMut, b: Vector| { + // This will panic if `other.elements` is not the correct type of vector. + a.extend_from_vector(b); + } + ); + + self.validity.append_mask(&other.validity); + self.len += other.len; + debug_assert_eq!(self.len, self.validity.len()); + } + + fn append_nulls(&mut self, n: usize) { + self.elements.append_nulls(n * self.list_size as usize); + self.validity.append_n(false, n); + self.len += n; + debug_assert_eq!(self.len, self.validity.len()); + } + + fn freeze(self) -> Self::Immutable { + FixedSizeListVector { + elements: Arc::new(self.elements.freeze()), + list_size: self.list_size, + validity: self.validity.freeze(), + len: self.len, + } + } + + fn split_off(&mut self, at: usize) -> Self { + assert!( + at <= self.capacity(), + "split_off out of bounds: {} > {}", + at, + self.capacity() + ); + + let split_elements = self.elements.split_off(at * self.list_size as usize); + + let split_validity = self.validity.split_off(at); + let split_len = self.len.saturating_sub(at); + self.len = at; + + debug_assert_eq!(self.len, self.validity.len()); + + Self { + elements: Box::new(split_elements), + list_size: self.list_size, + validity: split_validity, + len: split_len, + } + } + + fn unsplit(&mut self, other: Self) { + assert_eq!(self.list_size, other.list_size); + + self.elements.unsplit(*other.elements); + self.validity.unsplit(other.validity); + + self.len += other.len; + debug_assert_eq!(self.len, self.validity.len()); + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use vortex_dtype::{DType, PType}; + use vortex_mask::{Mask, MaskMut}; + + use super::*; + use crate::{PVectorMut, VectorOps}; + + #[test] + fn test_core_operations() { + // Test with_capacity constructor. + let dtype = DType::Primitive(PType::I32, vortex_dtype::Nullability::Nullable); + let mut vec = FixedSizeListVectorMut::with_capacity(&dtype, 3, 10); + assert_eq!(vec.len(), 0); + assert_eq!(vec.list_size(), 3); + assert!(vec.capacity() >= 10); + + // Create a vector to extend from. + let elements = Arc::new( + PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]) + .freeze() + .into(), + ); + let validity = Mask::new_true(2); + let immutable = FixedSizeListVector::new(elements, 3, validity); + + // Test extend_from_vector. + vec.extend_from_vector(&immutable); + assert_eq!(vec.len(), 2); + assert_eq!(vec.elements().len(), 6); + + // Test append_nulls. + vec.append_nulls(3); + assert_eq!(vec.len(), 5); + assert_eq!(vec.elements().len(), 15); // 5 lists * 3 elements each. + + // Test freeze and accessors. + let frozen = vec.freeze(); + assert_eq!(frozen.len(), 5); + assert_eq!(frozen.list_size(), 3); + assert_eq!(frozen.elements().len(), 15); + } + + #[test] + fn test_split_unsplit_operations() { + // Create a vector with 6 lists, each containing 2 elements. + let elements = PVectorMut::::from_iter([ + 1, 2, // List 0 + 3, 4, // List 1 + 5, 6, // List 2 + 7, 8, // List 3 + 9, 10, // List 4 + 11, 12, // List 5 + ]); + let mut vec = + FixedSizeListVectorMut::new(Box::new(elements.into()), 2, MaskMut::new_true(6)); + + // Test split at different positions. + + // Split at position 0 (take nothing). + let split = vec.split_off(0); + assert_eq!(vec.len(), 0); + assert_eq!(split.len(), 6); + vec.unsplit(split); + assert_eq!(vec.len(), 6); + + // Split at middle position. + let split = vec.split_off(3); + assert_eq!(vec.len(), 3); + assert_eq!(split.len(), 3); + assert_eq!(vec.elements().len(), 6); // 3 lists * 2 elements. + assert_eq!(split.elements().len(), 6); // 3 lists * 2 elements. + + // Verify the correct elements are in each half. + // First half should have [1,2,3,4,5,6]. + // Second half should have [7,8,9,10,11,12]. + + // Rejoin the parts. + vec.unsplit(split); + assert_eq!(vec.len(), 6); + assert_eq!(vec.elements().len(), 12); + + // Split at the end (take everything). + let split = vec.split_off(6); + assert_eq!(vec.len(), 6); + assert_eq!(split.len(), 0); + vec.unsplit(split); + assert_eq!(vec.len(), 6); + } + + #[test] + fn test_null_handling() { + // Test nullable lists with non-null elements. + let elements = PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]); + let validity = MaskMut::new_true(3); + // We can't directly set individual validity, but we can create vectors with nulls. + + let mut vec = FixedSizeListVectorMut::new(Box::new(elements.into()), 2, validity); + + // Append null lists. + vec.append_nulls(2); + assert_eq!(vec.len(), 5); + + // After freezing, check validity is preserved. + let frozen = vec.freeze(); + assert_eq!(frozen.len(), 5); + assert_eq!(frozen.validity().true_count(), 3); // First 3 are valid. + + // Test non-null lists with nullable elements. + let elements_with_nulls = PVectorMut::::from_iter([ + Some(1), + None, + Some(3), // First list has a null element. + Some(4), + Some(5), + None, // Second list has a null element. + ]); + let validity = MaskMut::new_true(2); // Both lists are valid. + + let mut vec = + FixedSizeListVectorMut::new(Box::new(elements_with_nulls.into()), 3, validity); + + assert_eq!(vec.len(), 2); + assert_eq!(vec.elements().len(), 6); + + // Operations should preserve element nullability. + let split = vec.split_off(1); + assert_eq!(vec.len(), 1); + assert_eq!(split.len(), 1); + + vec.unsplit(split); + assert_eq!(vec.len(), 2); + } + + #[test] + fn test_edge_cases() { + // Test empty vector (0 lists). + let elements = PVectorMut::::from_iter(Vec::::new()); + let validity = MaskMut::new_true(0); + let mut vec = FixedSizeListVectorMut::new(Box::new(elements.into()), 3, validity); + assert_eq!(vec.len(), 0); + assert_eq!(vec.list_size(), 3); + assert_eq!(vec.elements().len(), 0); + + // Operations on empty vector. + vec.append_nulls(1); + assert_eq!(vec.len(), 1); + + // Test single element list. + let elements = PVectorMut::::from_iter([42]); + let validity = MaskMut::new_true(1); + let vec = FixedSizeListVectorMut::new( + Box::new(elements.into()), + 1, // List size of 1. + validity, + ); + assert_eq!(vec.len(), 1); + assert_eq!(vec.list_size(), 1); + + // Test large list size. + let large_elements: Vec = (0..1000).collect(); + let elements = PVectorMut::::from_iter(large_elements); + let validity = MaskMut::new_true(1); // Single list with 1000 elements. + let vec = FixedSizeListVectorMut::new(Box::new(elements.into()), 1000, validity); + assert_eq!(vec.len(), 1); + assert_eq!(vec.list_size(), 1000); + assert_eq!(vec.elements().len(), 1000); + + // Verify operations work correctly. + let frozen = vec.freeze(); + assert_eq!(frozen.len(), 1); + assert_eq!(frozen.list_size(), 1000); + } + + #[test] + fn test_capacity_management() { + let dtype = DType::Primitive(PType::I32, vortex_dtype::Nullability::Nullable); + + // Test initial capacity from with_capacity. + let mut vec = FixedSizeListVectorMut::with_capacity(&dtype, 3, 10); + assert!(vec.capacity() >= 10); + assert!(vec.elements().capacity() >= 30); // At least 10 lists * 3 elements. + + // Test reserve works without panicking. + // The exact capacity increase depends on the underlying allocation strategy. + vec.reserve(100); + // After reserving, we should be able to hold at least the current length + reserved amount. + // Since current length is 0, capacity should be at least 100. + assert!(vec.capacity() >= 100); + + // Test capacity calculation with different list sizes. + let vec2 = FixedSizeListVectorMut::with_capacity(&dtype, 5, 20); + assert!(vec2.capacity() >= 20); + assert!(vec2.elements().capacity() >= 100); // At least 20 lists * 5 elements. + + // Edge case: capacity when list_size = 0. + // Based on the documentation, capacity is infinite (usize::MAX) for degenerate case. + let vec3 = FixedSizeListVectorMut::with_capacity(&dtype, 0, 10); + assert_eq!(vec3.capacity(), usize::MAX); // Infinite capacity for degenerate case. + + // Test that capacity is preserved through operations. + let elements = PVectorMut::::from_iter([1, 2, 3, 4, 5, 6]); + vec.elements = Box::new(elements.into()); + vec.validity = MaskMut::new_true(2); + vec.len = 2; + vec.list_size = 3; + + vec.reserve(8); // Reserve space for 8 more lists. + assert!(vec.capacity() >= 10); + + // Test that split_off and unsplit work without panicking. + let split = vec.split_off(1); + assert_eq!(vec.len(), 1); + assert_eq!(split.len(), 1); + + vec.unsplit(split); + assert_eq!(vec.len(), 2); + } +} diff --git a/vortex-vector/src/lib.rs b/vortex-vector/src/lib.rs index 8adf9016b89..23d431a6afb 100644 --- a/vortex-vector/src/lib.rs +++ b/vortex-vector/src/lib.rs @@ -13,6 +13,7 @@ mod binaryview; mod bool; mod decimal; +mod fixed_size_list; mod null; mod primitive; mod struct_; @@ -20,6 +21,7 @@ mod struct_; pub use binaryview::*; pub use bool::*; pub use decimal::*; +pub use fixed_size_list::*; pub use null::*; pub use primitive::*; pub use struct_::*; diff --git a/vortex-vector/src/macros.rs b/vortex-vector/src/macros.rs index 7aeb66c87df..cd417b1e30a 100644 --- a/vortex-vector/src/macros.rs +++ b/vortex-vector/src/macros.rs @@ -45,6 +45,7 @@ macro_rules! match_each_vector { $crate::Vector::Primitive($vec) => $body, $crate::Vector::String($vec) => $body, $crate::Vector::Binary($vec) => $body, + $crate::Vector::FixedSizeList($vec) => $body, $crate::Vector::Struct($vec) => $body, } }}; @@ -91,6 +92,7 @@ macro_rules! match_each_vector_mut { $crate::VectorMut::Primitive($vec) => $body, $crate::VectorMut::String($vec) => $body, $crate::VectorMut::Binary($vec) => $body, + $crate::VectorMut::FixedSizeList($vec) => $body, $crate::VectorMut::Struct($vec) => $body, } }}; @@ -115,6 +117,9 @@ macro_rules! __match_vector_pair_arms { ($crate::$enum_left::Primitive($a), $crate::$enum_right::Primitive($b)) => $body, ($crate::$enum_left::String($a), $crate::$enum_right::String($b)) => $body, ($crate::$enum_left::Binary($a), $crate::$enum_right::Binary($b)) => $body, + ($crate::$enum_left::FixedSizeList($a), $crate::$enum_right::FixedSizeList($b)) => { + $body + } ($crate::$enum_left::Struct($a), $crate::$enum_right::Struct($b)) => $body, _ => ::vortex_error::vortex_panic!("Mismatched vector types"), } diff --git a/vortex-vector/src/private.rs b/vortex-vector/src/private.rs index 2ae860b6894..1add13cc0a0 100644 --- a/vortex-vector/src/private.rs +++ b/vortex-vector/src/private.rs @@ -37,5 +37,8 @@ impl Sealed for PVectorMut {} impl Sealed for BinaryViewVector {} impl Sealed for BinaryViewVectorMut {} +impl Sealed for FixedSizeListVector {} +impl Sealed for FixedSizeListVectorMut {} + impl Sealed for StructVector {} impl Sealed for StructVectorMut {} diff --git a/vortex-vector/src/struct_/vector_mut.rs b/vortex-vector/src/struct_/vector_mut.rs index ab02c755c27..c81ce5b34f2 100644 --- a/vortex-vector/src/struct_/vector_mut.rs +++ b/vortex-vector/src/struct_/vector_mut.rs @@ -308,11 +308,11 @@ impl VectorMutOps for StructVectorMut { .collect(); let split_validity = self.validity.split_off(at); - - // Update self's state. let split_len = self.len.saturating_sub(at); self.len = at; + debug_assert_eq!(self.len, self.validity.len()); + Self { fields: split_fields.into_boxed_slice(), len: split_len, diff --git a/vortex-vector/src/vector.rs b/vortex-vector/src/vector.rs index 9735285a7cf..430785696b5 100644 --- a/vortex-vector/src/vector.rs +++ b/vortex-vector/src/vector.rs @@ -8,6 +8,7 @@ use vortex_error::vortex_panic; +use crate::fixed_size_list::FixedSizeListVector; use crate::{ BinaryVector, BoolVector, DecimalVector, NullVector, PrimitiveVector, StringVector, StructVector, VectorMut, VectorOps, match_each_vector, @@ -41,8 +42,8 @@ pub enum Vector { Binary(BinaryVector), // List // List(ListVector), - // FixedList - // FixedList(FixedListVector), + /// Vectors of Lists with fixed sizes. + FixedSizeList(FixedSizeListVector), /// Vectors of Struct elements. Struct(StructVector), } @@ -109,6 +110,14 @@ impl Vector { vortex_panic!("Expected BinaryVector, got {self:?}"); } + /// Returns a reference to the inner [`FixedSizeListVector`] if `self` is of that variant. + pub fn as_fixed_size_list(&self) -> &FixedSizeListVector { + if let Vector::FixedSizeList(v) = self { + return v; + } + vortex_panic!("Expected FixedSizeListVector, got {self:?}"); + } + /// Returns a reference to the inner [`StructVector`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVector { if let Vector::Struct(v) = self { @@ -159,6 +168,15 @@ impl Vector { vortex_panic!("Expected BinaryVector, got {self:?}"); } + /// Consumes `self` and returns the inner [`FixedSizeListVector`] if `self` is of that + /// variant. + pub fn into_fixed_size_list(self) -> FixedSizeListVector { + if let Vector::FixedSizeList(v) = self { + return v; + } + vortex_panic!("Expected FixedSizeListVector, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVector`] if `self` is of that variant. pub fn into_struct(self) -> StructVector { if let Vector::Struct(v) = self { diff --git a/vortex-vector/src/vector_mut.rs b/vortex-vector/src/vector_mut.rs index a79663475a0..b53e55587ee 100644 --- a/vortex-vector/src/vector_mut.rs +++ b/vortex-vector/src/vector_mut.rs @@ -10,6 +10,7 @@ use vortex_dtype::DType; use vortex_error::vortex_panic; use crate::binaryview::{BinaryVectorMut, StringVectorMut}; +use crate::fixed_size_list::FixedSizeListVectorMut; use crate::{ BoolVectorMut, DecimalVectorMut, NullVectorMut, PrimitiveVectorMut, StructVectorMut, Vector, VectorMutOps, match_each_vector_mut, match_vector_pair, @@ -41,6 +42,8 @@ pub enum VectorMut { String(StringVectorMut), /// Mutable Binary vectors. Binary(BinaryVectorMut), + /// Mutable vectors of Lists with fixed sizes. + FixedSizeList(FixedSizeListVectorMut), /// Mutable vectors of Struct elements. Struct(StructVectorMut), } @@ -54,6 +57,9 @@ impl VectorMut { DType::Primitive(ptype, _) => { PrimitiveVectorMut::with_capacity(*ptype, capacity).into() } + DType::FixedSizeList(elem_dtype, list_size, _) => { + FixedSizeListVectorMut::with_capacity(elem_dtype, *list_size, capacity).into() + } DType::Struct(struct_fields, _) => { StructVectorMut::with_capacity(struct_fields, capacity).into() } @@ -61,7 +67,6 @@ impl VectorMut { | DType::Utf8(_) | DType::Binary(_) | DType::List(..) - | DType::FixedSizeList(..) | DType::Extension(_) => vortex_panic!("Unsupported dtype for VectorMut"), } } @@ -146,6 +151,14 @@ impl VectorMut { vortex_panic!("Expected BinaryVectorMut, got {self:?}"); } + /// Returns a reference to the inner [`FixedSizeListVectorMut`] if `self` is of that variant. + pub fn as_fixed_size_list(&self) -> &FixedSizeListVectorMut { + if let VectorMut::FixedSizeList(v) = self { + return v; + } + vortex_panic!("Expected FixedSizeListVectorMut, got {self:?}"); + } + /// Returns a reference to the inner [`StructVectorMut`] if `self` is of that variant. pub fn as_struct(&self) -> &StructVectorMut { if let VectorMut::Struct(v) = self { @@ -196,6 +209,15 @@ impl VectorMut { vortex_panic!("Expected BinaryVectorMut, got {self:?}"); } + /// Consumes `self` and returns the inner [`FixedSizeListVectorMut`] if `self` is of that + /// variant. + pub fn into_fixed_size_list(self) -> FixedSizeListVectorMut { + if let VectorMut::FixedSizeList(v) = self { + return v; + } + vortex_panic!("Expected FixedSizeListVectorMut, got {self:?}"); + } + /// Consumes `self` and returns the inner [`StructVectorMut`] if `self` is of that variant. pub fn into_struct(self) -> StructVectorMut { if let VectorMut::Struct(v) = self {