Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions vortex-compute/src/arrow/fixed_size_list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use arrow_array::{ArrayRef, FixedSizeListArray};
use arrow_schema::Field;
use vortex_error::VortexResult;
use vortex_vector::FixedSizeListVector;

use crate::arrow::IntoArrow;

impl IntoArrow<ArrayRef> for FixedSizeListVector {
fn into_arrow(self) -> VortexResult<ArrayRef> {
let (elements, list_size, validity) = self.into_parts();

let converted_elements = elements.as_ref().clone().into_arrow()?;
let field = Arc::new(Field::new_list_field(
converted_elements.data_type().clone(),
true, // Vectors are always nullable.
));

Ok(Arc::new(FixedSizeListArray::try_new(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe leave a todo with the Arrow bug linked?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't run into the same arrow bug since vectors are always nullable

field,
list_size as i32,
converted_elements,
validity.into_arrow()?,
)?))
}
}
1 change: 1 addition & 0 deletions vortex-compute/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use vortex_error::VortexResult;
mod binaryview;
mod bool;
mod decimal;
mod fixed_size_list;
mod mask;
mod null;
mod primitive;
Expand Down
15 changes: 12 additions & 3 deletions vortex-compute/src/mask/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use std::ops::BitAnd;
use vortex_dtype::{NativeDecimalType, NativePType};
use vortex_mask::Mask;
use vortex_vector::{
BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, NullVector, PVector,
PrimitiveVector, StructVector, Vector, match_each_dvector, match_each_pvector,
match_each_vector,
BinaryViewType, BinaryViewVector, BoolVector, DVector, DecimalVector, FixedSizeListVector,
NullVector, PVector, PrimitiveVector, StructVector, Vector, match_each_dvector,
match_each_pvector, match_each_vector,
};

/// Trait for masking the validity of an array or vector.
Expand Down Expand Up @@ -79,6 +79,15 @@ impl<T: BinaryViewType> MaskValidity for BinaryViewVector<T> {
}
}

impl MaskValidity for FixedSizeListVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (elements, list_size, validity) = self.into_parts();
// SAFETY: we are preserving the original elements and `list_size`, only modifying the
// validity.
unsafe { Self::new_unchecked(elements, list_size, validity.bitand(mask)) }
}
}

impl MaskValidity for StructVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (fields, validity) = self.into_parts();
Expand Down
24 changes: 24 additions & 0 deletions vortex-vector/src/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Definition and implementation of [`FixedSizeListVector`] and [`FixedSizeListVectorMut`].

mod vector;
pub use vector::FixedSizeListVector;

mod vector_mut;
pub use vector_mut::FixedSizeListVectorMut;

use crate::{Vector, VectorMut};

impl From<FixedSizeListVector> for Vector {
fn from(v: FixedSizeListVector) -> Self {
Self::FixedSizeList(v)
}
}

impl From<FixedSizeListVectorMut> for VectorMut {
fn from(v: FixedSizeListVectorMut) -> Self {
Self::FixedSizeList(v)
}
}
305 changes: 305 additions & 0 deletions vortex-vector/src/fixed_size_list/vector.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Definition and implementation of [`FixedSizeListVector`].

use std::sync::Arc;

use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
use vortex_mask::Mask;

use crate::{FixedSizeListVectorMut, Vector, VectorOps};

/// An immutable vector of fixed-size lists.
///
/// `FixedSizeListVector` can be considered a borrowed / frozen version of
/// [`FixedSizeListVectorMut`], which is created via the [`freeze`](crate::VectorMutOps::freeze)
/// method.
///
/// See the documentation for [`FixedSizeListVectorMut`] for more information.
#[derive(Debug, Clone)]
pub struct FixedSizeListVector {
/// The child vector of elements.
pub(super) elements: Arc<Vector>,

/// The size of every list in the vector.
pub(super) list_size: u32,

/// The validity mask (where `true` represents a list is **not** null).
///
/// Note that the `elements` vector will have its own internal validity, denoting if individual
/// list elements are null.
pub(super) validity: Mask,

/// The length of the vector (which is the same as the length of the validity mask).
///
/// This is stored here as a convenience, as the validity also tracks this information.
pub(super) len: usize,
}

impl FixedSizeListVector {
/// Creates a new [`FixedSizeListVector`] from the given `elements` vector, size of each list,
/// and validity mask.
///
/// # Panics
///
/// Panics if the length of the `validity` mask multiplied by the `list_size` is not
/// equal to the length of the `elements` vector.
///
/// Put another way, the length of the `elements` vector divided by the `list_size` must be
/// equal to the length of the validity, or this function will panic.
pub fn new(elements: Arc<Vector>, list_size: u32, validity: Mask) -> Self {
Self::try_new(elements, list_size, validity)
.vortex_expect("Failed to create `FixedSizeListVector`")
}

/// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each
/// list, and validity mask.
///
/// # Errors
///
/// Returns and error if the length of the `validity` mask multiplied by the `list_size` is not
/// equal to the length of the `elements` vector.
///
/// Put another way, the length of the `elements` vector divided by the `list_size` must be
/// equal to the length of the validity.
pub fn try_new(elements: Arc<Vector>, list_size: u32, validity: Mask) -> VortexResult<Self> {
let len = validity.len();
let elements_len = elements.len();

if list_size == 0 {
vortex_ensure!(
elements.is_empty(),
"A degenerate (`list_size == 0`) `FixedSizeListVector` should have no underlying elements",
);
} else {
vortex_ensure!(
list_size as usize * len == elements_len,
"Tried to create a `FixedSizeListVector` of length {len} and list_size {list_size} \
with an child vector of size {elements_len} ({list_size} * {len} != {elements_len})",
);
}

Ok(Self {
elements,
list_size,
validity,
len,
})
}

/// Tries to create a new [`FixedSizeListVector`] from the given `elements` vector, size of each
/// list, and validity mask without validation.
///
/// # Safety
///
/// The caller must ensure that the length of the `validity` mask multiplied by the `list_size`
/// is exactly equal to the length of the `elements` vector.
pub unsafe fn new_unchecked(elements: Arc<Vector>, list_size: u32, validity: Mask) -> Self {
let len = validity.len();

if cfg!(debug_assertions) {
Self::new(elements, list_size, validity)
} else {
Self {
elements,
list_size,
validity,
len,
}
}
}

/// Decomposes the `FixedSizeListVector` into its constituent parts (child elements, list size,
/// and validity).
pub fn into_parts(self) -> (Arc<Vector>, u32, Mask) {
(self.elements, self.list_size, self.validity)
}

/// Returns the child vector of elements, which represents the contiguous fixed-size lists of
/// the `FixedSizeListVector`.
pub fn elements(&self) -> &Arc<Vector> {
&self.elements
}

/// Returns the size of every list in the vector.
pub fn list_size(&self) -> u32 {
self.list_size
}
}

impl VectorOps for FixedSizeListVector {
type Mutable = FixedSizeListVectorMut;

fn len(&self) -> usize {
self.len
}

fn validity(&self) -> &Mask {
&self.validity
}

fn try_into_mut(self) -> Result<Self::Mutable, Self>
where
Self: Sized,
{
let len = self.len;
let list_size = self.list_size;

let elements = match Arc::try_unwrap(self.elements) {
Ok(elements) => elements,
Err(elements) => return Err(FixedSizeListVector { elements, ..self }),
};

let validity = match self.validity.try_into_mut() {
Ok(validity) => validity,
Err(validity) => {
return Err(FixedSizeListVector {
elements: Arc::new(elements),
list_size,
validity,
len,
});
}
};

match elements.try_into_mut() {
Ok(mutable_elements) => Ok(FixedSizeListVectorMut {
elements: Box::new(mutable_elements),
list_size,
validity,
len,
}),
Err(elements) => Err(FixedSizeListVector {
elements: Arc::new(elements),
list_size,
validity: validity.freeze(),
len,
}),
}
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use vortex_mask::Mask;

use super::*;
use crate::{PVectorMut, Vector, VectorMutOps};

#[test]
fn test_constructor_and_validation() {
// Valid construction with new().
let elements: Arc<Vector> = Arc::new(
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
.freeze()
.into(),
);
let validity = Mask::new_true(2);
let vec = FixedSizeListVector::new(elements.clone(), 3, validity.clone());
assert_eq!(vec.len(), 2);
assert_eq!(vec.list_size(), 3);

// Valid construction with try_new().
let result = FixedSizeListVector::try_new(elements.clone(), 3, validity);
assert!(result.is_ok());
assert_eq!(result.unwrap().len(), 2);

// Length mismatch error - elements length != list_size * validity length.
let bad_validity = Mask::new_true(3); // Should be 2 for 6 elements with list_size=3.
let result = FixedSizeListVector::try_new(elements.clone(), 3, bad_validity);
assert!(result.is_err());

// Degenerate case (list_size = 0) with empty elements is valid.
let empty_elements: Arc<Vector> = Arc::new(
PVectorMut::<i32>::from_iter(Vec::<i32>::new())
.freeze()
.into(),
);
let validity = Mask::new_true(5);
let result = FixedSizeListVector::try_new(empty_elements, 0, validity);
assert!(result.is_ok());
let vec = result.unwrap();
assert_eq!(vec.len(), 5);
assert_eq!(vec.list_size(), 0);

// Degenerate case with non-empty elements should fail.
let result = FixedSizeListVector::try_new(elements, 0, Mask::new_true(1));
assert!(result.is_err());

// Test unsafe new_unchecked in debug mode (it should still validate).
let elements: Arc<Vector> =
Arc::new(PVectorMut::<i32>::from_iter([1, 2, 3, 4]).freeze().into());
let validity = Mask::new_true(2);
let vec = unsafe { FixedSizeListVector::new_unchecked(elements, 2, validity) };
assert_eq!(vec.len(), 2);
assert_eq!(vec.list_size(), 2);
}

#[test]
fn test_try_into_mut_conversion() {
// Create a vector that we solely own.
let elements: Arc<Vector> = Arc::new(
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
.freeze()
.into(),
);
let validity = Mask::new_true(2);
let vec = FixedSizeListVector::new(elements, 3, validity);

// Successful conversion when solely owned.
let result = vec.try_into_mut();
assert!(result.is_ok());
let mut_vec = result.unwrap();
assert_eq!(mut_vec.len(), 2);
assert_eq!(mut_vec.list_size(), 3);

// Freeze and try again - roundtrip test.
let vec = mut_vec.freeze();
let result = vec.try_into_mut();
assert!(result.is_ok());

// Test failed conversion with shared ownership.
let elements: Arc<Vector> =
Arc::new(PVectorMut::<i32>::from_iter([1, 2, 3, 4]).freeze().into());
let validity = Mask::new_true(2);
let vec = FixedSizeListVector::new(elements, 2, validity);

// Keep a clone to maintain shared ownership.
let _shared = vec.clone();

let result = vec.try_into_mut();
assert!(result.is_err());

// The error case should return the original vector.
if let Err(returned_vec) = result {
assert_eq!(returned_vec.len(), 2);
assert_eq!(returned_vec.list_size(), 2);
}
}

#[test]
fn test_accessors_and_parts() {
let elements: Arc<Vector> = Arc::new(
PVectorMut::<i32>::from_iter([1, 2, 3, 4, 5, 6])
.freeze()
.into(),
);
let validity = Mask::new_true(3);
let vec = FixedSizeListVector::new(elements, 2, validity);

// Test accessors.
assert_eq!(vec.len(), 3);
assert_eq!(vec.list_size(), 2);
assert_eq!(vec.elements().len(), 6);
assert_eq!(vec.validity().true_count(), 3);

// Test into_parts.
let (parts_elements, list_size, parts_validity) = vec.into_parts();
assert_eq!(parts_elements.len(), 6);
assert_eq!(list_size, 2);
assert_eq!(parts_validity.true_count(), 3);
}
}
Loading
Loading